<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mol. Biosci.</journal-id>
<journal-title>Frontiers in Molecular Biosciences</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mol. Biosci.</abbrev-journal-title>
<issn pub-type="epub">2296-889X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">839051</article-id>
<article-id pub-id-type="doi">10.3389/fmolb.2022.839051</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Molecular Biosciences</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Assessment of Greenhouse Tomato Anthesis Rate Through Metabolomics Using LASSO Regularized Linear Regression Model</article-title>
<alt-title alt-title-type="left-running-head">Siriwach et&#x20;al.</alt-title>
<alt-title alt-title-type="right-running-head">LASSO on Tomato Anthesis Rate</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Siriwach</surname>
<given-names>Ratklao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1604964/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Matsuzaki</surname>
<given-names>Jun</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Saito</surname>
<given-names>Takeshi</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Nishimura</surname>
<given-names>Hiroshi</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Isozaki</surname>
<given-names>Masahide</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Isoyama</surname>
<given-names>Yosuke</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sato</surname>
<given-names>Muneo</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1286592/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Arita</surname>
<given-names>Masanori</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/13070/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Akaho</surname>
<given-names>Shotaro</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1677747/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Higashide</surname>
<given-names>Tadahisa</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yano</surname>
<given-names>Kentaro</given-names>
</name>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Hirai</surname>
<given-names>Masami Yokota</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/27997/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>RIKEN Center for Sustainable Resource Science</institution>, <addr-line>Yokohama</addr-line>, <country>Japan</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Institute of Vegetable and Floriculture Science, NARO</institution>, <addr-line>Tsukuba</addr-line>, <country>Japan</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Mie Prefecture Agricultural Research Institute</institution>, <addr-line>Matsusaka</addr-line>, <country>Japan</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>National Institute of Genetics</institution>, <addr-line>Mishima</addr-line>, <country>Japan</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>National Institute of Advanced Industrial Science and Technology</institution>, <addr-line>Tsukuba</addr-line>, <country>Japan</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>Bioinformatics Laboratory</institution>, <institution>Department of Life Sciences</institution>, <institution>School of Agriculture</institution>, <institution>Meiji University</institution>, <addr-line>Kawasaki</addr-line>, <country>Japan</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/13413/overview">Wolfram Weckwerth</ext-link>, University of Vienna, Austria</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/378658/overview">Jos&#xe9; Juan Ordaz-Ortiz</ext-link>, Instituto Polit&#xe9;cnico Nacional de M&#xe9;xico (CINVESTAV), Mexico</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1302725/overview">Zhongda Zeng</ext-link>, Dalian University, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Masami Yokota Hirai, <email>masami.hirai@riken.jp</email>
</corresp>
<fn fn-type="equal" id="fn1">
<label>
<sup>&#x2020;</sup>
</label>
<p>These authors have contributed equally to this&#x20;work</p>
</fn>
<fn fn-type="other">
<p>This article was submitted to Metabolomics, a section of the journal Frontiers in Molecular Biosciences</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>01</day>
<month>03</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>9</volume>
<elocation-id>839051</elocation-id>
<history>
<date date-type="received">
<day>19</day>
<month>12</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>03</day>
<month>02</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Siriwach, Matsuzaki, Saito, Nishimura, Isozaki, Isoyama, Sato, Arita, Akaho, Higashide, Yano and Hirai.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Siriwach, Matsuzaki, Saito, Nishimura, Isozaki, Isoyama, Sato, Arita, Akaho, Higashide, Yano and Hirai</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these&#x20;terms.</p>
</license>
</permissions>
<abstract>
<p>While the high year-round production of tomatoes has been facilitated by solar greenhouse cultivation, these yields readily fluctuate in response to changing environmental conditions. Mathematic modeling has been applied to forecast phenotypes of tomatoes using environmental measurements (e.g., temperature) as indirect parameters. In this study, metabolome data, as direct parameters reflecting plant internal status, were used to construct a predictive model of the anthesis rate of greenhouse tomatoes. Metabolome data were obtained from tomato leaves and used as variables for linear regression with the least absolute shrinkage and selection operator (LASSO) for prediction. The constructed model accurately predicted the anthesis rate, with an R<sup>2</sup> value of 0.85. Twenty-nine of the 161 metabolites were selected as candidate markers. The selected metabolites were further validated for their association with anthesis rates using the different metabolome datasets. To assess the importance of the selected metabolites in cultivation, the relationships between the metabolites and cultivation conditions were analyzed <italic>via</italic> correspondence analysis. Trigonelline, whose content did not exhibit a diurnal rhythm, displayed major contributions to the cultivation, and is thus a potential metabolic marker for predicting the anthesis rate. This study demonstrates that machine learning can be applied to metabolome data to identify metabolites indicative of agricultural traits.</p>
</abstract>
<kwd-group>
<kwd>metabolome</kwd>
<kwd>metabolites</kwd>
<kwd>tomato</kwd>
<kwd>anthesis rate</kwd>
<kwd>machine learning</kwd>
<kwd>LASSO</kwd>
<kwd>trigonelline</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Tomatoes (<italic>Solanum lycopersicum</italic> L.) are produced worldwide, with the highest rates of production among non-grain crops after potatoes (<xref ref-type="bibr" rid="B8">FAOSTAT, 2018</xref>). The high year-round production of tomato fruits has been facilitated by greenhouse cultivation in many countries. Greenhouse cultivation provides the optimal environmental conditions, such as temperature, humidity, and light conditions, needed to grow plants (<xref ref-type="bibr" rid="B25">Peet and Welles, 2005</xref>). However, in addition to the automatic control of environmental conditions, prompt treatment by tomato growers is necessary to mitigate the effects of extreme weather conditions. For example, extreme heat causes pre-harvest physiological disorders, resulting in fruit cracking and blossom drop in tomato plants. For such extreme heat, temporary equipment and/or manual control is required to lower the temperature in the greenhouse (<xref ref-type="bibr" rid="B19">Liebisch et&#x20;al., 2009</xref>; <xref ref-type="bibr" rid="B30">Saure, 2014</xref>). Therefore, for greenhouse cultivation, there is a need to continuously and adequately manage the environmental conditions inside greenhouses. Moreover, the morphological or physiological status of tomato plants can be used to infer subsequent plant growth and outcome (crop harvest). This means that more favorable growth conditions could be investigated and elucidated to enhance plant growth and maximize tomato fruit production. At present, tomato growers empirically control the growth conditions in greenhouses according to extreme weather conditions and plant&#x20;vigor.</p>
<p>Recently, omics data have been utilized in phenotype prediction and the identification of genes that control traits of interest. Among the omics data, gene expression data have been employed, as gene expression profiles can be easily collected by microarray experiments or sequencing technologies (<xref ref-type="bibr" rid="B40">Yamamoto et&#x20;al., 2016</xref>; <xref ref-type="bibr" rid="B9">Gao et&#x20;al., 2018</xref>; <xref ref-type="bibr" rid="B18">Liabeuf et&#x20;al., 2018</xref>). <xref ref-type="bibr" rid="B41">Yano et&#x20;al. (2006)</xref> introduced an accurate prediction method for phenotypes with comprehensive gene expression profiles using a model on a statistical index and correspondence analysis (CA). In addition to transcriptome analysis, comprehensive metabolite profiles (patterns of metabolite contents across a wide range of experimental conditions) have also become practical with high-throughput mass spectrometry-based technologies. Since metabolites are directly related to phenotypes rather than events of gene expression, phenotype prediction using metabolome data is a promising strategy with which to considerably improve predictability.</p>
<p>There are both direct and indirect approaches to the omics analysis of a target trait. Omics data (e.g., gene expression and/or metabolic profiles) obtained from a given organ represent the genetic and physiological status of the same organ. Therefore, omics data are directly available to identify genes and/or metabolites controlling a given trait in an organ. For example, omics data from the fruit of tomato plants rather than other organs (e.g., leaves) are suitable for the detection of genes and metabolites that play a key role in fruit development. However, the direct approach is unfavorable because for the collection of omics data, fruits need to be removed from the plant. To maximize the quantity of fruit production in the greenhouse, it is better to use vegetative organs, such as, rather of the fruit, for the collection of omics data. If omics data from vegetative organs is able to accurately represent the status of tomato fruit, the indirect approach could also prove to be effective and efficient for the identification of genes and metabolites for a trait, as well as for phenotype prediction.</p>
<p>The metabolic profiling of vegetative organs has been reported to be highly correlated with the quantity of tomato fruit produced. For example, the association between vegetative and reproductive growth of greenhouse tomatoes has been studied for a long time (<xref ref-type="bibr" rid="B15">Khan and Sagar, 1969</xref>; <xref ref-type="bibr" rid="B35">Tanaka and Fujita, 1974</xref>). The allocation of assimilated carbon between vegetative organs (leaves) and reproductive organs (flowers and fruits) is controlled by genetic and environmental factors, such as light intensity and temperature (<xref ref-type="bibr" rid="B6">Dinar and Rudich, 1985</xref>; <xref ref-type="bibr" rid="B13">Heuvelink and Buiskool, 1995</xref>). Previous studies have also suggested that the metabolic profiles of vegetative organs, rather than reproductive organs, are attractive and suitable for the construction of a prediction model for fruit&#x20;yield.</p>
<p>When the metabolic profiles in a vegetative organ are effective in accurately predicting fruit yield, the profiles of a metabolite(s) must be strongly associated with yield. The metabolite(s) allows us to predict not only the yield, but also the traits that are highly correlated with the yield. For example, the effective number of flowers that eventually develop mature fruits is correlated with the yield. This suggests that the effective number of flowers newly generated within a period (e.g., a week) in the greenhouse, referred to as the &#x201c;anthesis rate&#x201d; in this study, is an effective index for the prediction of fruit production. In addition, this index has practical and diagnostic advantages for maximizing fruit production. When the predicted anthesis rate is too low for commercial fruit production, the environmental condition can be reconsidered to increase the rate. The improvement enhances the subsequent plant growth and increases the effective number of flowers, then maximizes tomato fruit production.</p>
<p>In this study, we present a statistical model with comprehensive metabolic profiles aimed at maximizing tomato fruit production in greenhouses, wherein the metabolic profiles in leaves were employed to predict the anthesis rate. Because metabolome data is a high-dimensional multivariate data, variable selection is a crucial step to characterize the underlying patterns of these variables and narrow them down to find significant variables. Sparse modeling including the least absolute shrinkage and selection operator (LASSO) model that we applied in this study is widely used in various areas of data-driven science (<xref ref-type="bibr" rid="B27">Rasmussen and Bro, 2012</xref>; <xref ref-type="bibr" rid="B28">Rish and Grabarnik, 2014</xref>). LASSO model has the ability to perform variable selection by reducing the number of variables. In the LASSO model, significantly contributing variables are weighted with large coefficients, while non-contributing variables are weighted with zero or near-zero coefficients. Consequently, we also identified metabolites that strongly contributed to the prediction of the anthesis rate. To date, the control of the environmental conditions in greenhouses has mainly relied on the experience and knowledge of experts in tomato fruit production. However, the use of machine learning and multivariate analysis with comprehensive metabolic profiles in vegetative organs allows us to not only predict fruit production, but also to adjust the environmental conditions for the enhancement of tomato growth without a need for abundant practical experience. This novel strategy will provide innovative knowledge and skills in greenhouse cultivation for all tomato growers, as well as facilitate the economically efficient production of other crops under greenhouse conditions.</p>
</sec>
<sec id="s2">
<title>2 Materials and Methods</title>
<sec id="s2-1">
<title>2.1 Plant Materials and Growth Conditions</title>
<p>Tomato plants were grown in greenhouses located in Tsukuba (36&#xb0;2&#x2032;4.88&#x2033; N, 140&#xb0;6&#x2032;2.9&#x2033; E) and Matsusaka (34&#xb0;37&#x2032;51.7&#x2033; N, 136&#xb0;29&#x2032;39.5&#x2033; E), Japan.</p>
<sec id="s2-1-1">
<title>2.1.1 Tsukuba Greenhouse (TK01)</title>
<p>In Tsukuba, in the experiment designated TK01, the seeds of the tomato cultivar Ringyoku (National Agricultural Research Organization, Tsukuba, Japan) and rootstock cultivar Maxifort (<italic>S. lycopersicum</italic> &#xd7; <italic>S. habrochaites</italic>; De Ruiter Seeds, Bergschenhoek, Netherlands) were sown on 16 May 2016. CF Momotaro York (CFMY) seeds (Takii Seed, Kyoto, Japan) were sown on 23 May 2016. On day 14 after sowing (DAS), Ringyoku scions were grafted onto Maxifort rootstocks. On DAS 28 (13 June 2016), all seedlings were transplanted into rockwool blocks (Delta4, Grodan, Roermond, Netherlands) and placed on rockwool slabs (Grotop expert, Grodan) in a greenhouse with a plant density of 3.3 plants/m<sup>2</sup>. Culture liquid with an electrical conductivity (EC) of 3.4&#xa0;mS/cm (15.8&#xa0;me/L nitrate, 4.5 me/L P, 9.8&#xa0;me/L K, 9.3&#xa0;me/L Ca, 4.6&#xa0;me/L Mg, 0.07&#xa0;me/L Fe, 0.103&#xa0;me/L B, 0.017&#xa0;me/L Mn, 0.076&#xa0;me/L Zn, 0.00120&#xa0;me/L Cu, and 0.00083&#xa0;me/L Mo) was administered <italic>via</italic> a drip. After 14&#xa0;days of transplanting, culture liquid with an EC of 2.6&#xa0;mS/cm was administered. To control the cultivation environment, a ubiquitous environment control system (Fujitsu, Kawasaki, Japan) was used. The greenhouse was ventilated during the day and heated overnight so that the daily mean temperature was maintained at 25&#xb0;C. A heat pump (Green Package; Nepon, Tokyo, Japan) was operated from 20:00 to 04:00, with a target range of 16&#x2013;20&#xb0;C. The daytime relative humidity was controlled at 75% until 30&#xa0;days after transplanting, and maintained at 70% thereafter. Nineteen days after transplanting, CO<sub>2</sub> was added from 05:00 to 07:00 to reach a concentration of 800&#xa0;ppm. Then, and until 105&#xa0;days after transplanting (26 September 2016), CO<sub>2</sub> was added to a concentration of 400&#xa0;ppm all&#x20;day.</p>
</sec>
<sec id="s2-1-2">
<title>2.1.2 Matsusaka Greenhouse (IA04)</title>
<p>In Matsusaka, two sets of experiments (IA04 and IA06) were conducted. In the experiment designated IA04, the seeds of the tomato cultivars CFMY, C5-159 (Sakata Seed Co., Japan), C5-160 (Sakata Seed Co.), and C6-164 (Sakata Seed Co.) were sown on 27 July 2016. The seedlings grafted onto Maxifort rootstocks were transplanted on 1 September 2016. The plant density was set at 2.4 plants/m<sup>2</sup> and then rearranged to be 3.6 plants/m<sup>2</sup> in late January 2017. A rockwool culture system with drip fertigation was used in the greenhouse. The culture liquid was supplied with an EC of 3.0&#xa0;mS/cm (16&#xa0;me/L N, 4&#xa0;me/L P, 8.0&#xa0;me/L K, 8&#xa0;me/L Ca, and 4&#xa0;me/L Mg). The interior air temperature was controlled within the range of 13&#x2013;27&#xb0;C. The ideal humidity was 80%, and the CO<sub>2</sub> concentration was 800&#xa0;ppm normally without ventilation and 400&#xa0;ppm with ventilation during cloudy weather.</p>
</sec>
<sec id="s2-1-3">
<title>2.1.3 Matsusaka Greenhouse (IA06)</title>
<p>In another experiment, designated IA06, the seeds of the tomato cultivars CFMY, Ringyoku, and Managua (RIJK ZWAAN, Netherlands) were sown on 4 October 2016. The seedlings grafted onto Maxifort rootstocks were transplanted on 31 October 2016. The plant density was 2.4 plants/m<sup>2</sup> in the first 3&#xa0;months and then rearranged to 3.6 plants/m<sup>2</sup>. A rockwool culture system with drip fertigation was used in the greenhouse. The culture liquid was supplied with an EC of 3.0&#xa0;mS/cm (16&#xa0;me/L N, 4&#xa0;me/L P, 8.0&#xa0;me/L K, 8&#xa0;me/L Ca, and 4&#xa0;me/L Mg). The environmental conditions were controlled as in experiment&#x20;IA04.</p>
</sec>
</sec>
<sec id="s2-2">
<title>2.2 Measurement of Anthesis Rates</title>
<p>To measure the anthesis rates, we periodically counted the number of flowers that had not fallen off of each plant. The cumulative numbers of flowers (&#x201c;cumulative anthesis&#x201d;) were plotted (see <xref ref-type="sec" rid="s3">Section 3</xref> for details). From the cumulative anthesis plot, the anthesis rates were calculated from the gradients of a straight line between two neighboring time-points on the horizontal&#x20;axis.</p>
</sec>
<sec id="s2-3">
<title>2.3 Metabolome Analysis</title>
<sec id="s2-3-1">
<title>2.3.1 Sampling of Tomato Leaves</title>
<p>In Tsukuba (TK01), the most basal leaflet of a fully developed and sunlit leaf was sampled for two replications every 2&#xa0;h continuously for 24&#xa0;h at one-week intervals for 4&#xa0;weeks. A total of 192 leaf samples were collected from 16 August 2016 to 6 September 2016 (Ringyoku; <italic>n</italic>&#x20;&#x3d; 96, CFMY; <italic>n</italic>&#x20;&#x3d; 96). In Matsusaka, the fully developed upper leaves were sampled during 10:00&#x2013;14:00 on 13 October 2016, and 19 January 2017, for IA04 for three replications, except for C5-160 for two replications (CFMY; <italic>n</italic>&#x20;&#x3d; 6, C5-159; <italic>n</italic>&#x20;&#x3d; 6, C5-160; <italic>n</italic>&#x20;&#x3d; 4, C6-164; <italic>n</italic>&#x20;&#x3d; 6) and on 19 January 2017 (6 replications) and 9 March 2017 (8 replicates) for IA06 (Ringyoku; <italic>n</italic>&#x20;&#x3d; 14, CFMY; <italic>n</italic>&#x20;&#x3d; 14, Managua; <italic>n</italic>&#x20;&#x3d; 14). The leaves were collected and flash-frozen in liquid nitrogen.</p>
</sec>
<sec id="s2-3-2">
<title>2.3.2 Widely Targeted Metabolomic Analysis</title>
<p>The frozen leaf samples were freeze-dried and powdered. A small amount of samples (0.5&#x2013;8.9&#xa0;mg dry weight) was weighed and 1&#x20;ml/10&#xa0;mg (TK01) or 4&#xa0;mg (IA04 and IA06) dry weight of extraction solvent [80% (v/v) methanol and 0.1% (v/v) formic acid, with 8.4&#xa0;nmol/L lidocaine and 210&#xa0;nmol/L 10-camphorsulfonic acid as internal standards] was added. This mixture was shaken using a Shake Master Neo for 2&#xa0;min at 1,000&#xa0;rpm to extract the metabolites. After centrifugation for 1&#xa0;min at 9,100 &#xd7; g, the supernatant was diluted with the extraction solvent to obtain 0.4&#xa0;mg/ml extracts. Next, 25&#xa0;&#xb5;L of the extract was dried, dissolved in 250&#xa0;&#xb5;L of ultra-pure water, and filtered using Millipore MultiScreenHTS384 well (Merck KGaA, Darmstadt, Germany). A 1-&#xb5;L aliquot of this filtrate (0.04&#xa0;mg/ml) was subjected to widely targeted metabolomics using liquid chromatography coupled with a tandem quadrupole mass spectrometer (LC-QqQ-MS) (UPLC coupled with Xevo TQ-S, Waters, Milford, MA, United&#x20;States) (<xref ref-type="bibr" rid="B31">Sawada et&#x20;al., 2009</xref>; <xref ref-type="bibr" rid="B32">Sawada et&#x20;al., 2019</xref>). The analytical conditions are described in detail in <xref ref-type="sec" rid="s10">Supplementary Tables S1&#x2013;S3</xref>. The metabolome data were deposited in the DROP Met in PRIMe (the Platform for RIKEN Metabolomics) (DM0041, <ext-link ext-link-type="uri" xlink:href="http://prime.psc.riken.jp/archives/data/DropMet/059/">http://prime.psc.riken.jp/archives/data/DropMet/059/</ext-link>).</p>
</sec>
<sec id="s2-3-3">
<title>2.3.3 Measurement of Relative Metabolite Contents</title>
<p>For the Tsukuba data (TK01), the peak areas of 501 target metabolites (including two internal standards) were processed as follows. Values below the detection limit were set to zero. The peak area of each metabolite in a leaf sample was divided by the mean peak area in the extraction solvent control from the same leaf sample to obtain the signal-to-noise ratio. In total, 161 metabolites were detected with signal-to-noise ratios above two in more than half of the leaf samples (<xref ref-type="sec" rid="s10">Supplementary Table S3</xref>). The peak area of each metabolite was divided by that of the internal standard (lidocaine or 10-camphorsulfonic acid) to obtain the relative metabolite content.</p>
<p>The peak areas from the Matsusaka data (IA04 and IA06) were processed in the same manner as those from the Tsukuba data (TK01). After calculating the signal-to-noise ratio, the peak area of each metabolite was divided by that of the internal standard (lidocaine or 10-camphorsulfonic acid) to obtain the relative metabolite content.</p>
</sec>
</sec>
<sec id="s2-4">
<title>2.4 Least Absolute Shrinkage and Selection Operator Regularized Linear Regression Model Analysis</title>
<p>LASSO regularization was used to extract essential metabolites to predict an anthesis rate. We constructed a prediction model of the anthesis rate using LASSO regularized linear regression analysis, called the LASSO model, to identify the &#x201c;predictor metabolites&#x201d; for the anthesis&#x20;rate.</p>
<sec id="s2-4-1">
<title>2.4.1 Least Absolute Shrinkage and Selection Operator Model to Predict the Anthesis Rate in TK01</title>
<p>A LASSO model using metabolome data from TK01, named &#x201c;M-model&#x201d;, was constructed. Before training the model, the relative metabolite contents of each metabolite in all leaf samples were normalized to have a mean of zero and a standard deviation of one (that is, standardization). The LASSO model was implemented using sklearn.linear_model.Lasso in the Scikit-learn package (<xref ref-type="bibr" rid="B20">McKinney, 2010</xref>; <xref ref-type="bibr" rid="B24">Pedregosa et&#x20;al., 2011</xref>).</p>
<p>The M-model was constructed by training the metabolic profiles of 161 metabolites from 192 leaf samples. The linear regression is expressed as:<disp-formula id="e1">
<mml:math id="m1">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>&#x2b;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>where <italic>y</italic>
<sub>
<italic>i</italic>
</sub> is the anthesis rate of the plant with the <italic>i</italic>th leaf samples (1 &#x2264; <italic>i</italic>&#x20;&#x2264; <italic>n</italic>, <italic>n</italic>&#x20;&#x3d; 192), <italic>X</italic>
<sub>
<italic>ij</italic>
</sub> is the relative metabolite content of the <italic>j</italic>th metabolite in the <italic>i</italic>th sample (1 &#x2264; <italic>j</italic>&#x20;&#x2264; <italic>m</italic>, <italic>m</italic>&#x20;&#x3d; 161), <italic>w</italic>
<sub>
<italic>j</italic>
</sub> is the model coefficient of the <italic>j</italic>th metabolite (1 &#x2264; <italic>j</italic>&#x20;&#x2264; <italic>m</italic>), and <italic>w</italic>
<sub>0</sub> is an intercept term. Here, <italic>y</italic>
<sub>
<italic>i</italic>
</sub> and <italic>X</italic>
<sub>
<italic>ij</italic>
</sub> are elements of a vector <italic>y &#x3d;</italic> (<italic>y</italic>
<sub>1</sub>, &#x2026; , <italic>y</italic>
<sub>
<italic>n</italic>
</sub>)<sup>T</sup> and an <italic>n &#xd7; m</italic> matrix <italic>X</italic>, respectively. The linear regression was trained with L1 regularization to perform both feature selection and regularization. The objective function to minimize is:<disp-formula id="e2">
<mml:math id="m2">
<mml:mrow>
<mml:munder>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
<mml:mi>w</mml:mi>
</mml:munder>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mi>w</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:msubsup>
<mml:mo>&#x7c;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
<mml:mi>w</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>where <inline-formula id="inf1">
<mml:math id="m3">
<mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>X</mml:mi>
<mml:mi>w</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:msubsup>
<mml:mo>&#x7c;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> &#x3d; <inline-formula id="inf2">
<mml:math id="m4">
<mml:mrow>
<mml:msubsup>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mi>w</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the sum of the squared errors, <inline-formula id="inf3">
<mml:math id="m5">
<mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> &#x3d; <inline-formula id="inf4">
<mml:math id="m6">
<mml:mrow>
<mml:msubsup>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>m</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the L1-norm of the coefficient vector, and <inline-formula id="inf5">
<mml:math id="m7">
<mml:mrow>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> <inline-formula id="inf6">
<mml:math id="m8">
<mml:mrow>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> is the penalty constant (<xref ref-type="bibr" rid="B36">Tibshirani, 1996</xref>). Thus, in the M-model, significantly contributing metabolites, called the selected metabolites, were weighted with large coefficients (either positive or negative), while non-contributing metabolites were weighted with zero coefficients. <italic>R</italic>
<sup>2</sup> value of the M-model was calculated. The prediction accuracy was assessed by 10-fold cross-validation. <italic>R</italic>
<sup>2</sup> value and the mean squared error (MSE) were used as accuracy metrics.</p>
<p>In addition, the second and third LASSO model training with environmental data (E-model) and combined metabolome and environmental data (C-model), respectively, were constructed in the same manner as the M-model. In the E-model, the <italic>X</italic> matrix contained only environmental factor data (solar irradiance, ambient temperature, relative humidity, and CO<sub>2</sub> concentration). The <italic>X</italic> matrix in the C-model consisted of the metabolic profiles of 161 metabolites and environmental factor&#x20;data.</p>
</sec>
<sec id="s2-4-2">
<title>2.4.2 Least Absolute Shrinkage and Selection Operator Model for the Assessment of the Prediction Accuracy of the Predictor Metabolites</title>
<p>We also used the LASSO model to assess the ability and strength of the predictor metabolites in the M-model by expanding the metabolome data from different experimental designs. The predictor metabolites selected from the M-model were used to reconstruct the LASSO model with additional leaf samples from IA04 and IA06. The model was reconstructed in the same manner as the M-model by training the metabolic profiles of the predictor metabolites of 256 leaf samples from three greenhouses (TK01, IA04, and IA06).</p>
</sec>
</sec>
<sec id="s2-5">
<title>2.5 Classification of Leaf Samples by Principal Component Analysis</title>
<p>The differences in leaf samples were evaluated by the PCA of their metabolic profiles. The relative metabolite content of each metabolite in all leaf samples was standardized. The PCA tool in the Scikit-learn package was used. The first two principal components of each leaf sample were used to project the leaf samples into a two-dimensional space. PCA was performed with two datasets, TK01 and a combined data of TK01, IA04, and IA06. For the PCA of TK01, the metabolic profiles of 161 metabolites from 192 leaf samples were used. For the PCA of data combined from TK01, IA04 and IA06, the metabolic profile of the predictor metabolites of 256 leaf samples from the three greenhouses (TK01, IA04, and IA06) were&#x20;used.</p>
</sec>
<sec id="s2-6">
<title>2.6 Hierarchical Clustering Analysis of the Predictor Metabolites</title>
<p>To evaluate the similarities among the predictor metabolites, the metabolic profiles of 256 leaf samples from the three greenhouses (TK01, IA04, and IA06) were used for HCL. The Pearson correlation coefficient (<italic>r</italic>) of the relative metabolite contents for each pair of metabolites was calculated (<xref ref-type="sec" rid="s10">Supplementary Figure S3</xref> and <xref ref-type="sec" rid="s10">Supplementary Table S4</xref>). Then, the distances between metabolites, namely, the &#x201c;correlation distance&#x201d; (1&#x2013;<italic>r</italic>), were employed for agglomerative clustering. Linkage methods were applied to compute the distances between sub-clusters; then, a dendrogram was generated to mine metabolites showing similar profiles. The optimum linkage method was determined based on the cophenetic correlation coefficient. The best linkage method, which yielded the maximum cophenetic correlation coefficient, was used to create a hierarchical dendrogram (<xref ref-type="bibr" rid="B14">Jones et&#x20;al., 2001</xref>). HCL was implemented using the Python library Scipy.</p>
</sec>
<sec id="s2-7">
<title>2.7 Network Analysis of the Predictor Metabolites With Correspondence Analysis</title>
<p>CA is a multivariate technique and is conceptually similar to PCA. In previous studies, CA has been used to clarify the associations between genes and experimental conditions in microarray analyses (<xref ref-type="bibr" rid="B41">Yano et&#x20;al., 2006</xref>; <xref ref-type="bibr" rid="B5">de Tayrac et&#x20;al., 2009</xref>). We employed CA for network analysis to discover the associations between the predictor metabolites and the associations between the predictor metabolites and the leaf sample characteristics, that is, experimental designs, cultivars, and sampling&#x20;times.</p>
<p>CA was executed against metabolic profiles. The metabolome data were arranged in a matrix where the columns and rows correspond to the predictor metabolites selected by the M-model and 256 leaf samples from the three experimental designs, respectively. The relative metabolite contents of each metabolite in all leaf samples were standardized, and the minimum value was subtracted to prevent negative values. CA was performed using the FactoMineR library in R (<xref ref-type="bibr" rid="B16">L&#xea; et&#x20;al., 2008</xref>). Coordinates with <italic>m-</italic>1 dimensions were assigned to each metabolite and leaf sample, where <italic>m</italic> is the number of predictor metabolites. The coordinate values of all dimensions were retrieved (<xref ref-type="sec" rid="s10">Supplementary Table&#x20;S5</xref>).</p>
<sec id="s2-7-1">
<title>2.7.1 Network Analysis Between the Predictor Metabolites and the Leaf Sample Characteristics</title>
<p>The Euclidean distances for each pair of a metabolite and leaf sample were calculated using coordinates in all dimensions from CA. Theoretically, a smaller Euclidean distance indicates a higher association. Based on the histograms of the Euclidean distance (<xref ref-type="sec" rid="s10">Supplementary Figure S4A</xref>), the 15th percentile of all distances was set as a threshold value to define a significant association. Pairs of a metabolite and leaf sample with distances less than the threshold were selected (<xref ref-type="sec" rid="s10">Supplementary Table S6</xref>). The mean of the distances between each metabolite and each leaf sample characteristics were integrated to construct metabolic networks. Networks were constructed using py2cytoscape and NetworkX libraries in Python, and Cytoscape software (version 3.6.1) (<xref ref-type="bibr" rid="B34">Shannon et&#x20;al., 2003</xref>; <xref ref-type="bibr" rid="B10">Hagberg et&#x20;al., 2008</xref>; <xref ref-type="bibr" rid="B23">Ono et&#x20;al., 2015</xref>). The associations between the metabolites were also evaluated in the same manner.</p>
</sec>
<sec id="s2-7-2">
<title>2.7.2 Network Analysis Among the Predictor Metabolites</title>
<p>CA was used to determine the association among the predictor metabolites. The same process was performed to obtain pairwise Euclidean distances between the metabolites (<xref ref-type="sec" rid="s10">Supplementary Tables S7, S8</xref>). The distances that passed the threshold were integrated to construct the metabolite networks.</p>
</sec>
</sec>
<sec id="s2-8">
<title>Statistical Analysis for the Anthesis Rates</title>
<p>In TK01, the significance of the anthesis rates between the cultivars was analyzed using the Mann-Whitney U test. The significance of the anthesis rates among the experimental designs (TK01, IA04, and IA06) was analyzed using the Kruskal&#x2013;Wallis test with Conover&#x2019;s multiple comparison test. Scipy in Python was used for the statistical analyses.</p>
</sec>
</sec>
<sec id="s3">
<title>3 Results</title>
<sec id="s3-1">
<title>3.1 Data Collection for Anthesis Rate, Leaf Metabolome, and Environmental Factors</title>
<p>In the experiment designated TK01, two tomato cultivars, Ringyoku and CFMY, were grown in Tsukuba, Japan. After transplanting the tomatoes into a greenhouse, the cumulative number of anthesis occurrences was recorded in parallel with leaflet sampling (<xref ref-type="fig" rid="F1">Figure&#x20;1A</xref>). The cumulative number of anthesis occurrences was used to calculate anthesis rates (<xref ref-type="fig" rid="F1">Figures 1B,C</xref>, respectively). The anthesis rates of the Ringyoku and CFMY cultivars were similar and gradually decreased over the growing period. No significant differences were observed between cultivars. During the growing period, fully developed basal and sunlit leaves were collected from plants. Leaf sampling every 2&#xa0;h for 24&#xa0;h was conducted four times at one-week intervals. The sampled leaves were subjected to a widely targeted metabolome analysis using a liquid chromatography-mass spectrometer. From a total of 499 targeted metabolites, 161 metabolites above the signal-to-noise ratio threshold were selected (<xref ref-type="sec" rid="s10">Supplementary Table S3</xref>). The relative metabolite contents of each metabolite in all leaf samples were standardized prior to further analysis. The boxplot (<xref ref-type="fig" rid="F1">Figure&#x20;1D</xref>) and PCA score plot (<xref ref-type="fig" rid="F1">Figure&#x20;1E</xref>) indicated that Ringyoku and CFMY had similar metabolic profiles. Thus, we pooled the metabolic profile data obtained from the two cultivars (192 leaf samples &#xd7; 161 metabolites) for further analysis. In addition, environmental data (solar irradiance, ambient temperature, relative humidity, and CO<sub>2</sub> concentration) were also obtained (<xref ref-type="fig" rid="F1">Figure&#x20;1F</xref>).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Experimental design of TK01. <bold>(A)</bold> Experimental timeline for leaf sampling and observation of anthesis of cultivars CFMY and Ringyoku. The blue rectangle indicates the period of the measurement of environmental factors (e.g., temperature). <bold>(B)</bold> Cumulative anthesis. The arrows and gray vertical lines indicate the dates of leaf sampling for metabolome analysis. <bold>(C)</bold> Distributions of anthesis rates were statistically the same between cultivars (Mann-Whitney U test, <italic>p</italic>&#x20;&#x3e; 0.05, CFMY; <italic>n</italic>&#x20;&#x3d; 21, Ringyoku; <italic>n</italic>&#x20;&#x3d; 21). <bold>(D)</bold> Box plot of the standardized relative metabolite contents of 161 metabolites in 192 leaf samples (CFMY; <italic>n</italic>&#x20;&#x3d; 96, Ringyoku; <italic>n</italic>&#x20;&#x3d; 96). <bold>(E)</bold> PCA score plot of the first two components (PC1 and PC2) of leaf samples (CFMY; <italic>n</italic>&#x20;&#x3d; 96, Ringyoku; <italic>n</italic>&#x20;&#x3d; 96). The metabolic profiles of the 161 metabolites were used for PCA. The numbers in parentheses in the axes are contribution ratios. <bold>(F)</bold> Environmental conditions measured in the experimental timeline. The environmental data in the blue background color used for LASSO analysis (E-model and C-model). The period in the blue background color is consistent with the period for leaf sampling.</p>
</caption>
<graphic xlink:href="fmolb-09-839051-g001.tif"/>
</fig>
</sec>
<sec id="s3-2">
<title>3.2 Least Absolute Shrinkage and Selection Operator Model for Anthesis Rate Prediction in TK01</title>
<p>We constructed three models (M-model, E-model, and C-model) to predict the anthesis rates in TK01. The model was trained and optimized to obtain predictor metabolites.</p>
<p>For the construction of the M-model, the metabolic profiles of 161 metabolites in 192 leaf samples were employed. During model training, we optimized the model by assigning a range of the penalty constant (&#x3b1;) and then measuring the prediction accuracy by cross-validation. The penalty constant (&#x3b1;) of the M-model was fine-tuned to optimize the best prediction model with the selected metabolites. The iteration training was performed by varied &#x3b1; from 5&#x20;&#xd7; 10<sup>&#x2212;5</sup> to 0.5 (<xref ref-type="sec" rid="s10">Supplementary Figure S1A</xref>). At each given &#x3b1;, different sets of metabolites with optimized LASSO coefficients (<italic>w</italic>) were selected (<xref ref-type="sec" rid="s10">Supplementary Figure S1A</xref>). In each loop of a given &#x3b1;, the <italic>R</italic>
<sup>2</sup> value of the M-model was calculated, and the prediction accuracy of the M-model was assessed by 10-fold cross-validation. The <italic>R</italic>
<sup>2</sup> value and the mean squared error (MSE) of the 10-fold cross-validation were also calculated (<xref ref-type="sec" rid="s10">Supplementary Figure S1B</xref>). The <italic>R</italic>
<sup>2</sup> values of the training and cross-validation were used to determine an optimum M-model that contained the selected metabolites as the predictor metabolites for the anthesis rate (<xref ref-type="fig" rid="F2">Figure&#x20;2A</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>LASSO model with ten-fold cross-validation for the prediction of the anthesis rate in TK01. For LASSO regression analysis, the metabolic profiles of 161 metabolites in 192 leaf samples were employed. <bold>(A)</bold> The numbers of metabolites used for predictor variables versus <italic>R</italic>
<sup>2</sup> value. The elbow point suggests the optimum set of metabolites for the prediction model. <bold>(B)</bold> Comparison of anthesis rates between observed and predicted values. Predicted values were obtained from the M-model with 29 selected metabolites. The dotted line represents the agreement between the observed and predicted values. <bold>(C)</bold> Coefficients (<italic>w</italic>) of 29 metabolites selected by the M-model. Red dots are positive coefficients, while blue dots are negative coefficients.</p>
</caption>
<graphic xlink:href="fmolb-09-839051-g002.tif"/>
</fig>
<p>From model optimization, increasing the number of metabolites in the model increases the predictive accuracy (R<sup>2</sup> values) in both training and cross-validation. Until cross-validation <italic>R</italic>
<sup>2</sup> stopped improving while model <italic>R</italic>
<sup>2</sup> continued to increase, this indicates overfitting in a high number of metabolites. Thus, we selected &#x3b1;, where the cross-validation <italic>R</italic>
<sup>2</sup> started to plateau and was closest to training <italic>R</italic>
<sup>2</sup> as our optimal model. In <xref ref-type="fig" rid="F2">Figure&#x20;2A</xref>, the optimum number of metabolites was determined to be 29 at the elbow point on the graph that yielded the closest <italic>R</italic>
<sup>2</sup> values between the training and cross-validation. Using the contributions of these 29 metabolites (<xref ref-type="fig" rid="F2">Figure&#x20;2B</xref>) as predictor metabolites, we constructed a prediction model for TK01 (M-model). The M-model provided good prediction performance for the anthesis rates (<xref ref-type="fig" rid="F2">Figure&#x20;2C</xref>). The <italic>R</italic>
<sup>2</sup> value of the M model, <italic>R</italic>
<sup>2</sup> s value, and MSE of the 10-fold cross-validation are summarized in <xref ref-type="table" rid="T1">Table&#x20;1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>The prediction accuracies of the three models in TK01.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="left">Variable used for LASSO model construction</th>
<th rowspan="2" align="center">
<italic>R</italic>
<sup>2</sup> value (LASSO model)</th>
<th colspan="2" align="center">Cross-validation</th>
</tr>
<tr>
<th align="center">
<italic>R</italic>
<sup>2</sup> value</th>
<th align="center">MSE</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">The M-model with metabolic profiles of 29 metabolites</td>
<td align="char" char=".">0.85</td>
<td align="char" char=".">0.75</td>
<td align="char" char=".">0.013</td>
</tr>
<tr>
<td align="left">The E-model (only environmental factors)</td>
<td align="char" char=".">0.11</td>
<td align="char" char=".">0.10</td>
<td align="char" char=".">0.055</td>
</tr>
<tr>
<td align="left">The C-model with metabolic profiles of 36 metabolites and environmental factors</td>
<td align="char" char=".">0.89</td>
<td align="char" char=".">0.83</td>
<td align="char" char=".">0.010</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To examine the possibility of including environmental factors in the prediction model, we also attempted to construct a LASSO model, the E-model, using four environmental parameters (interior air temperature, interior relative humidity, interior CO<sub>2</sub> concentration, and cumulative solar irradiance) recorded at 5-min intervals (<xref ref-type="fig" rid="F1">Figure&#x20;1F</xref>). The prediction performance of the environmental parameters was poor (<xref ref-type="table" rid="T1">Table&#x20;1</xref> and <xref ref-type="sec" rid="s10">Supplementary Figure S2A</xref>). Finally, the C-model model was constructed using a combination of metabolites and environmental factors. The combination slightly improved the prediction accuracy of the anthesis rate (<xref ref-type="table" rid="T1">Table&#x20;1</xref> and <xref ref-type="sec" rid="s10">Supplementary Figure&#x20;S2B</xref>).</p>
</sec>
<sec id="s3-3">
<title>3.3 Assessment of the Accuracy of Anthesis Rate Prediction Using the Predictor Metabolites</title>
<p>To assess the prediction accuracy of the anthesis rates by the contents of the 29 selected metabolites as the predictor metabolites from the M-model, datasets from two greenhouses (IA04 and IA06) were&#x20;used.</p>
<sec id="s3-3-1">
<title>3.3.1 Differences in Metabolic Profiles Among Experimental Designs</title>
<p>In IA04 and IA06, the experimental designs were conducted at a different greenhouse location (Matsusaka) from TK01 (Tsukuba). In addition, these three experiments were performed in different growth seasons. Moreover, in addition to Ringyoku and CFMY, four additional cultivars were also used in IA04 and IA06 (<xref ref-type="sec" rid="s2-1">section 2.1</xref>). During the recording of the cumulative numbers of anthesis occurrences, the leaflets were sampled for metabolome analysis at one time point around noon on 2&#xa0;days (<xref ref-type="fig" rid="F3">Figure&#x20;3A</xref>). Therefore, metabolic profiles must be varied by differences in the experimental designs. The relative metabolite contents of the 29 predictor metabolites on TK01, IA04, and IA06 is shown in a boxplot in <xref ref-type="fig" rid="F3">Figure&#x20;3B</xref>. The distribution of the relative metabolite contents in TK01 was relatively compact, while the IA04 and IA06 data exhibited relatively larger variances. This was caused by the mixed effects of different cultivars, greenhouse conditions, and seasons. In addition, PCA for the relative metabolite contents of the 29 predictor metabolites and all leaf samples (<italic>n</italic>&#x20;&#x3d; 256) from the three greenhouses were performed to investigate the differences among the experimental designs. The TK01 leaf samples were noticeably separable from the IA04 and IA06 leaf samples, while the IA04 and IA06 leaf samples were clustered together (<xref ref-type="fig" rid="F3">Figure&#x20;3C</xref>). In addition to the metabolic profiles, the anthesis rates differed among the three experimental designs (<xref ref-type="fig" rid="F3">Figure&#x20;3D</xref>). The anthesis rate in IA04 was slightly higher than that in TK01, while IA06 showed the highest anthesis rate among the three experimental designs. The differences in the metabolic profiles and anthesis rate of TK01 and the two experimental designs (IA04 and IA06) made it difficult to obtain a good prediction by imputing data from IA04 and IA06 into the M-model.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Predictability assessment of the 29 predictor metabolites with expanding metabolome datasets. <bold>(A)</bold> Experimental timeline for leaf sampling and anthesis measurements in IA04 (cultivars: CFMY, C5-159, and C5-16) and IA06 (cultivars: CFMY, Ringyoku, and MNG). <bold>(B)</bold> Box plot of standardized relative metabolite contents of the 29 predictor metabolites in each cultivar in three experimental designs (TK01, IA04, and IA06). The numbers of leaf samples (<italic>n</italic>): CFMY (<italic>n</italic>&#x20;&#x3d; 96) and Ringyoku (<italic>n</italic>&#x20;&#x3d; 96) in TK01, CFMY (<italic>n</italic>&#x20;&#x3d; 6), C5-159 (<italic>n</italic>&#x20;&#x3d; 6), C6-164 (<italic>n</italic>&#x20;&#x3d; 6), and C5-160 (<italic>n</italic>&#x20;&#x3d; 4) in IA04, and Ringyoku (<italic>n</italic>&#x20;&#x3d; 14), CFMY (<italic>n</italic>&#x20;&#x3d; 14), and Managua (<italic>n</italic>&#x20;&#x3d; 14) in IA06. <bold>(C)</bold> PCA score plot of leaf samples (<italic>n</italic>&#x20;&#x3d; 256) by using metabolic profiles of the 29 predictor metabolites from three experimental designs (TK01, IA04 and IA06). The contribution ratio is shown in parentheses for the first and second principal component (axis). The colors indicate the experimental designs, and the markers represent the cultivars. <bold>(D)</bold> Anthesis rates used for the LASSO model (TK01, <italic>n</italic>&#x20;&#x3d; 16; IA04, <italic>n</italic>&#x20;&#x3d; 8; IA06, <italic>n</italic>&#x20;&#x3d; 6). Asterisks indicate significant differences according to the Kruskal-Wallis test with Conover&#x2019;s multiple comparison test (&#x2a;, <italic>p</italic>&#x20;&#x3c; 0.05; &#x2a;&#x2a;, <italic>p</italic>&#x20;&#x3c; 0.01; and &#x2a;&#x2a;&#x2a;, <italic>p</italic>&#x20;&#x3c; 0.001). <bold>(E)</bold> Model coefficients (<italic>w</italic>) of 13 metabolites selected in the LASSO model construction with metabolome datasets from three experimental designs (TK01, IA04 and IA06). The red dots are positive coefficients, while the blue dots are negative coefficients. <bold>(F)</bold> Comparison of anthesis rates between observed and predicted values obtained from the model constructed by the three datasets. The dotted line represents the agreement between the observed and predicted values.</p>
</caption>
<graphic xlink:href="fmolb-09-839051-g003.tif"/>
</fig>
</sec>
<sec id="s3-3-2">
<title>3.3.2 Least Absolute Shrinkage and Selection Operator Model to Assess the Prediction Accuracy of the Predictor Metabolites</title>
<p>We evaluated the predictive ability of 29 predictor metabolites selected from the M-model. If the predictor metabolites are biologically associated with the anthesis rate, broaden number of leaf samples from different experimental designs should provide a good prediction model. To clarify whether a more universal model could be established, the relative metabolite content of the 29 predictor metabolites and the anthesis rates obtained in TK01, IA04, and IA06 were combined and subjected to the LASSO model. A total of 13 out of the 29 metabolites that yielded the minimum MSE were selected (<italic>R</italic>
<sup>2</sup> &#x3d; 0.75) (<xref ref-type="fig" rid="F3">Figure&#x20;3E</xref>). The 10-fold cross-validation results demonstrated the acceptable fitting and prediction accuracy of the model (MSE &#x3d; 0.26). The model showed good prediction performance across the three datasets (cross-validated <italic>R</italic>
<sup>2</sup> &#x3d; 0.69) (<xref ref-type="fig" rid="F3">Figure&#x20;3F</xref>). This result indicates that the predictor metabolites selected by the LASSO model as contributing variables in a specific dataset (TK01) could be effective for the prediction of the anthesis rate in general.</p>
<p>Among the two sets of metabolites selected from the M-model and this combined data model, the LASSO coefficients of the selected metabolites showed that tyramine, trigonelline, glycerophosphocholine, and L-threonic acid had a high association with the anthesis rate in both models.</p>
</sec>
</sec>
<sec id="s3-4">
<title>3.4 Candidate Metabolites Associated With the Anthesis Rate</title>
<p>Metabolites showing significant associations with anthesis rates are attractive candidates for markers of reproductive traits, including anthesis rates, fruit development, and production. We detected candidate metabolites related to anthesis rates by LASSO analysis (<xref ref-type="sec" rid="s3-3">Section 3.3</xref>). To understand the biological characteristics of the 29 predictor metabolites and identify candidate metabolites for future use as prediction markers, we investigated the association between the 29 selected metabolites and anthesis rates using hierarchical clustering analysis (HCL) and correspondence analysis&#x20;(CA).</p>
<p>First, HCL was used to visualize the metabolic profiles of TK01, IA04, and IA06. Pearson correlation coefficients (<italic>r</italic>) between each pair of the 29 predictor metabolites were obtained to evaluate the similarity in the profiles (<xref ref-type="sec" rid="s10">Supplementary Figure S3</xref>). Strong correlations were observed, particularly in the top selected metabolites, such as tyramine, trigoneline, glycerophosphocholine, and serotonin, of the M-model (<xref ref-type="fig" rid="F4">Figure&#x20;4A</xref>). This result suggests that each of these metabolites plays a similar and important role in anthesis rate estimation. It indicates that it is possible to choose only a small number of metabolites as key predictors of anthesis&#x20;rates.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Metabolite association of 29 predictor metabolites. <bold>(A)</bold> Dendrogram representing agglomerative clustering of the correlation distances of the 29 selected metabolites in average linkage. The cluster threshold was 0.5, as indicated by the black dotted line. Lines of the same color represent the same clusters. <bold>(B)</bold> Network for metabolites (threshold: &#x2264;15th percentile of Euclidean distances). The node size represents the number of edges linked to other metabolites.</p>
</caption>
<graphic xlink:href="fmolb-09-839051-g004.tif"/>
</fig>
<p>Next, CA was conducted for network analysis to elucidate the associations among the 29 predictor metabolites. In the metabolic network (<xref ref-type="fig" rid="F4">Figure&#x20;4B</xref>), all of the connected metabolites were amines, except for chlorogenic acid, rhoifolin, and L-threonic acid. Thus, the nitrogen-containing metabolites showed similar accumulation patterns across the leaf samples (<xref ref-type="fig" rid="F4">Figure&#x20;4B</xref>). Among all metabolite-to-metabolite edges, trigonelline has the most edges linked to other metabolites, indicating that trigonelline is a major coexisting metabolite with others.</p>
<p>CA was also conducted for network analysis to elucidate the associations between the 29 predictor metabolites and leaf sample characteristics, that is, experimental designs, cultivars, and sampling times. Among the leaf sample characteristics, the experimental design was the only factor displaying a clear separation in PCA (<xref ref-type="fig" rid="F3">Figure&#x20;3C</xref>), whereas the cultivars and sampling times did not show distinct separation (<xref ref-type="sec" rid="s10">Supplementary Figures S4B,C</xref>). Thus, in CA, we first examined the network between the predictor metabolites and the experimental designs (TK01, IA04, and IA06). In the network (<xref ref-type="fig" rid="F5">Figure&#x20;5A</xref>), IA04 and IA06 shared seven similarly dominant metabolites. Four out of the seven metabolites, glycerophosphocholine, serotonin, trigonelline, and tyramine, were in the top five of the 29 predictor metabolites (<xref ref-type="fig" rid="F2">Figure&#x20;2C</xref>). TK01 had nine highly associated metabolites. Among them, one metabolite, trigonelline, was linked to all three experimental designs in the network (<xref ref-type="fig" rid="F5">Figure&#x20;5A</xref>). Next, we examined the association between metabolites and cultivars. In the metabolite to cultivar network (<xref ref-type="fig" rid="F5">Figure&#x20;5B</xref>), a network pattern similar to the experimental design was observed. The cultivars IA04 and IA06 shared highly associated metabolites but did not share with the cultivars in TK01, except trigonelline, which was associated with all cultivars (<xref ref-type="fig" rid="F5">Figure&#x20;5B</xref>).</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Metabolite association with experimental designs, cultivars, and sampling times. <bold>(A)</bold> Network of metabolites and growth conditions. <bold>(B)</bold> Network of metabolites and cultivars. The cultivars were divided into subcategories of experimental design; for example, the CFMY samples were divided into three and labeled CFMY_TK01, CFMY_IA04, and CFMY_IA06. <bold>(C)</bold> Network of metabolites and sampling times. <bold>(D)</bold> Diurnal changes of the relative content of phosphocholine (scaled between 0 and 1).</p>
</caption>
<graphic xlink:href="fmolb-09-839051-g005.tif"/>
</fig>
</sec>
<sec id="s3-5">
<title>3.5 Candidates of Stable Metabolites for the Prediction of the Anthesis Rate</title>
<p>Taking into account the leaf sampling time, metabolite content generally changes according to the circadian rhythm. For future use as key indicators of anthesis rate, metabolites whose contents do not change depending on the leaf sampling time are preferred. Because the leaf samples from TK01 were collected every 2&#xa0;h for a day in time-series format, we constructed a Euclidean distance network of TK01 samples to identify the metabolite associated with leaf sampling time, namely day (06:00&#x2012;18:00) or night (20:00&#x2012;04:00) (<xref ref-type="fig" rid="F5">Figure&#x20;5C</xref>). Among the nine metabolites strongly associated with TK01, phosphocholine was highly associated only at night. This result is consistent with the accumulation pattern of phosphocholine, which showed a diurnal bell-shaped pattern peaking at night (<xref ref-type="fig" rid="F5">Figure&#x20;5D</xref>). Eight other metabolites, including trigonelline, shared associations during both day and night, indicating high metabolite production, which may produce stable production throughout the&#x20;day.</p>
<p>To further evaluate the diurnal fluctuations of the 29&#x20;LASSO-selected metabolites in TK01, the relative contents of each metabolite were scaled between 0 and 1. The distribution of the standard deviations (SD) of the 29 metabolites is shown in <xref ref-type="fig" rid="F6">Figure&#x20;6A</xref>. The standard deviations of the metabolite contents ranged from 0.148 to 0.230. Among these, the standard deviation of trigonelline was relatively small (SD &#x3d; 0.167). In addition, the trigonelline content was relatively stable over the course of a day (<xref ref-type="fig" rid="F6">Figure&#x20;6B</xref>) compared to that of the other metabolites, such as phosphocholine, glycerophosphocholine, L-glutamic acid, and 4-aminobutyric acid, which exhibited strong diurnal fluctuations (<xref ref-type="sec" rid="s10">Supplementary Figure&#x20;S5</xref>).</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Diurnal fluctuations of metabolite content in tomato leaves. <bold>(A)</bold> Distribution of the standard deviations of 29 metabolites. The red arrow indicates the standard deviation of trigonelline at 0.167. <bold>(B)</bold> Diurnal fluctuations of the relative content of trigonelline (scaled between 0 and 1).</p>
</caption>
<graphic xlink:href="fmolb-09-839051-g006.tif"/>
</fig>
<p>Taken together, our results suggest that trigonelline is an attractive metabolite for use as a marker of the anthesis rate of tomatoes. Trigonelline was one of the top five LASSO-selected metabolites for the prediction of the anthesis rate (<xref ref-type="fig" rid="F2">Figures 2C</xref>, <xref ref-type="fig" rid="F3">3E</xref>), showed no diurnal changes, and exhibited stable content among the different cultivation conditions and varieties (<xref ref-type="fig" rid="F6">Figures 6A,B</xref>). Other metabolites among the top five, such as tyramine, were also available not only for the prediction of the anthesis rate, but also as markers under specific cultivation conditions.</p>
</sec>
</sec>
<sec id="s4">
<title>4 Discussion</title>
<p>Machine learning approaches have the potential to provide prediction models for agricultural traits and effectively identify metabolites, genes, or environmental factors associated with these traits (<xref ref-type="bibr" rid="B21">Men&#xe9;ndez et&#x20;al., 2011</xref>; <xref ref-type="bibr" rid="B1">Acharjee, 2013</xref>; <xref ref-type="bibr" rid="B4">Das et&#x20;al., 2018</xref>; <xref ref-type="bibr" rid="B7">Du et&#x20;al., 2019</xref>; <xref ref-type="bibr" rid="B32">Sawada et&#x20;al., 2019</xref>). Our study employed LASSO regularized linear regression model analysis to construct a prediction model of the anthesis rate using leaf metabolome data as predictor variables and identify the 29 predictor metabolites as candidate biomarkers. Importantly, we identified trigonelline as a key metabolite for anthesis rate prediction using the LASSO models and CA. Moreover, because the trigonelline content in the leaf was relatively stable over the course of a day, it was identified as an attractive biomarker of anthesis&#x20;rate.</p>
<sec id="s4-1">
<title>4.1 Possible Uses of Least Absolute Shrinkage and Selection Operator-Selected Metabolites as Biomarkers</title>
<p>The prediction of reproduction and fruit development in tomato is a powerful tool for the diagnosis of plants and the optimal management of the environmental conditions to maximize plant yields. Since anthesis is directly linked to tomato fruit production, it is a good index with which to evaluate tomato cultivation. The identification of metabolites involved in anthesis can be employed as metabolite markers for the prediction of anthesis and&#x20;yield.</p>
<p>In the construction of the models using LASSO, unimportant metabolites were penalized by L1 regularization, leaving more prominent metabolites after variable selection. A reduction in the number of metabolites is desirable, because a smaller number of metabolites can be more easily measured for future use as biomarkers. As a result, 29 metabolites, including both primary and specialized (secondary) metabolites, were selected from among 161 metabolites. Most of the 29 selected metabolites were nitrogen-containing compounds, such as amino acids and their derivatives, alkaloids, amines, and phospholipids. The LASSO-selected metabolites could indicate the nitrogen status associated with the anthesis rate in tomatoes.</p>
<p>Among the 29 metabolites, trigonelline (<italic>N</italic>-methylnicotinate), a quaternary ammonium, exhibited a metabolic profile similar to that of the majority of the selected metabolites. (<xref ref-type="fig" rid="F4">Figure&#x20;4B</xref>). In addition, trigonelline demonstrated the greatest association with all three growth conditions and all cultivars, while other metabolites were associated with only leaf samples from particular experiments (<xref ref-type="fig" rid="F5">Figures 5A&#x2013;C</xref>). Moreover, compared to other metabolites, trigonelline showed a relatively stable accumulation over the course of a day (<xref ref-type="fig" rid="F5">Figures 5D</xref>, <xref ref-type="fig" rid="F6">6B</xref> and <xref ref-type="sec" rid="s10">Supplementary Figure S5</xref>). Among 29 metabolites associated with anthesis rate, trigonelline was shown to be a key metabolite related to anthesis rate. These results support that trigonelline is a suitable biomarker without diurnal fluctuations.</p>
<p>Trigonelline is known to increase in tomato leaves in response to increased nitrogen content in nutrient solutions (<xref ref-type="bibr" rid="B38">Tyih&#xe1;k et&#x20;al., 1988</xref>), and can thus serve as a possible indicator of nitrogen status within the plant body. Therefore, we investigated the correlation between trigonelline content in leaves and nitrogen fertilizer absorption in IA04 and IA06 (<xref ref-type="sec" rid="s10">Supplementary Table S9</xref>). The results showed a positive correlation (<italic>r</italic>&#x20;&#x3d; 0.56, <italic>p</italic>&#x20;&#x3c; 0.05) in IA06 and a weak correlation (<italic>r</italic>&#x20;&#x3d; 0.30, <italic>p</italic>&#x20;&#x3c; 0.05) in IA04, supporting this hypothesis. Trigonelline is synthesized from nicotinic acid, which is a metabolite of the nicotinamide adenine dinucleotide (NAD) synthesis/degradation (<xref ref-type="bibr" rid="B3">Ashihara, 2006</xref>). The functions of trigonelline in plants have been reported in terms of various aspects, such as cell cycle regulation, nodulation, and reduction of oxidative stress (<xref ref-type="bibr" rid="B22">Minorsky, 2002</xref>). A recent study reported on the function of trigonelline as a detoxified metabolite of excess nicotinic acid in the NAD cycle (<xref ref-type="bibr" rid="B17">Li et&#x20;al., 2017</xref>). The demethylation of trigonelline regenerated nicotinic acid for utilization in NAD synthesis as a reservoir metabolite. Demethylating activity has also been observed in the leaves of some plants, as well as in coffee plant seeds, during germination (<xref ref-type="bibr" rid="B3">Ashihara, 2006</xref>). In <italic>Arabidopsis thaliana</italic>, NAD is known to play an important role in growth phase transition (<xref ref-type="bibr" rid="B11">Hashida et&#x20;al., 2016</xref>). In a previous study, the perturbation of NAD redox homeostasis due to the overexpression of genes involved in NAD synthesis resulted in the ectopic generation of reactive oxygen species, leading to early flower stalk wilting and shortened plant longevity (<xref ref-type="bibr" rid="B11">Hashida et&#x20;al., 2016</xref>). In addition, NAD accumulation was reported in pollen before germination, indicating that NAD metabolism plays a crucial role in pollen maturation (<xref ref-type="bibr" rid="B12">Hashida et&#x20;al., 2013</xref>). Our hypothesis is that trigonelline may be involved in flower development via NAD homeostasis, however, further experiments are required to confirm this hypothesis.</p>
</sec>
<sec id="s4-2">
<title>4.2 Improving Predictability by Using Environmental Data</title>
<p>Although we attempted to use environmental factors to predict reproductive traits, the prediction performances of the generated models were poor (<xref ref-type="table" rid="T1">Table&#x20;1</xref> and <xref ref-type="sec" rid="s10">Supplementary Figure S2A</xref>). These results support our understanding that short-term environmental data are insufficient for yield prediction. Accumulated historic datasets of environmental factors may be required to achieve more accurate predictions (<xref ref-type="bibr" rid="B2">Adams, 2002</xref>; <xref ref-type="bibr" rid="B26">Qaddoum et&#x20;al., 2013</xref>; <xref ref-type="bibr" rid="B29">Saito et&#x20;al., 2020</xref>). On the other hand, the combination of metabolome and environmental factor data resulted in improved prediction performance (<xref ref-type="table" rid="T1">Table&#x20;1</xref>). Considering a plant as an autotrophic production system, it is reasonable that a combination of environmental factors (system inputs) and metabolic status (a system internal condition) can produce more accurate production estimates (system outputs) than either one individually. Thus, monitoring both types of factors in a greenhouse system management is likely to yield the best prediction performance.</p>
</sec>
<sec id="s4-3">
<title>4.3 Machine Learning Algorithms for Metabolome Data</title>
<p>Among the machine learning approaches, LASSO linear regression analysis was chosen for the following reasons. First, linear regression is often used to estimate biological rates (<xref ref-type="bibr" rid="B33">Schneider et&#x20;al., 2010</xref>). Thus, linear regression seems to be an appropriate choice for our experiments. Second, our dataset contained more variables than samples, which could lead to severe overfitting in a more complex model (<xref ref-type="bibr" rid="B37">Trunk, 1979</xref>). A simpler model, such as a linear regression model combined with LASSO regularization, is preferred; therefore, the LASSO linear regression method is employed in this study. In fact, we have previously tested several other regression algorithms, including ridge regression, random forest regressor, k-nearest neighbor regression, and support vector regression (<xref ref-type="bibr" rid="B24">Pedregosa et&#x20;al., 2011</xref>; <xref ref-type="bibr" rid="B39">VanderPlas, 2016</xref>), all of which performed worse than or the same as the LASSO model with our dataset (data not shown). A detailed comparison of these algorithms will be described elsewhere. Based on this knowledge, LASSO was chosen for this&#x20;study.</p>
</sec>
</sec>
</body>
<back>
<sec id="s5">
<title>Data Availability Statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found below: <ext-link ext-link-type="uri" xlink:href="http://prime.psc.riken.jp/archives/data/DropMet/059/">http://prime.psc.riken.jp/archives/data/DropMet/059/</ext-link>.</p>
</sec>
<sec id="s6">
<title>Author Contributions</title>
<p>RS, JM, KY, and MYH conceived and designed the study. RS and JM conducted the study, analyzed, interpreted the data, and wrote the paper. JM, TS, HN, MI, YI, MS, and TH contributed to the acquisition of data. KY and MYH supervised the research. MA, SA, TH, KY, and MYH edited and reviewed the article. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>This work was supported by the Cabinet Office, Government of Japan, Cross-ministerial Strategic Innovation Promotion Program (SIP), &#x201c;Technologies for creating next-generation agriculture, forestry and fisheries&#x201d; (funding agency: Bio-oriented Technology Research Advancement Institution, NARO), and MEXT KAKENHI Grant Numbers 19H04870 to KY and 18H04808 and 20H04852 to MYH. This work was also supported in part by the Research Funding for the Computational Software Supporting Program of Meiji University. Computations were partially performed on the NIG supercomputer at the ROIS National Institute of Genetics.</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ack>
<p>We thank Y. Yamada for data repository on the DROP&#x20;Met.</p>
</ack>
<sec id="s10">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fmolb.2022.839051/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fmolb.2022.839051/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Presentation1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet1.xlsx" id="SM2" mimetype="application/xlsx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Acharjee</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Comparison of Regularized Regression Methods for &#x223c;Omics Data</article-title>. <source>Metabolomics</source> <volume>03</volume> (<issue>3</issue>), <fpage>126</fpage>. <pub-id pub-id-type="doi">10.4172/2153-0769.1000126</pub-id> </citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Adams</surname>
<given-names>S. R.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Predicting the Weekly Fluctuations in Glasshouse Tomato Yields</article-title>. <source>Acta Hortic.</source> <volume>593</volume>, <fpage>19</fpage>&#x2013;<lpage>23</lpage>. <pub-id pub-id-type="doi">10.17660/ActaHortic.2002.593.1</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ashihara</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Metabolism of Alkaloids in Coffee Plants</article-title>. <source>Braz. J.&#x20;Plant Physiol.</source> <volume>18</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1590/s1677-04202006000100001</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Das</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Nair</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Reddy</surname>
<given-names>V. K.</given-names>
</name>
<name>
<surname>Venkatesh</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Evaluation of Multiple Linear, Neural Network and Penalised Regression Models for Prediction of rice Yield Based on Weather Parameters for West Coast of India</article-title>. <source>Int. J.&#x20;Biometeorol.</source> <volume>62</volume> (<issue>10</issue>), <fpage>1809</fpage>&#x2013;<lpage>1822</lpage>. <pub-id pub-id-type="doi">10.1007/s00484-018-1583-6</pub-id> </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>de Tayrac</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>L&#xea;</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Aubry</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mosser</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Husson</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Simultaneous Analysis of Distinct Omics Data Sets with Integration of Biological Knowledge: Multiple Factor Analysis Approach</article-title>. <source>BMC Genomics</source> <volume>10</volume>, <fpage>32</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2164-10-32</pub-id> </citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dinar</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Rudich</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>1985</year>). <article-title>Effect of Heat Stress on Assimilate Metabolism in Tomato Flower Buds</article-title>. <source>Ann. Bot.</source> <volume>56</volume> (<issue>2</issue>), <fpage>249</fpage>&#x2013;<lpage>257</lpage>. <pub-id pub-id-type="doi">10.1093/oxfordjournals.aob.a087009</pub-id> </citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Du</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Campbell</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Walia</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Q.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Network&#x2010;based Feature Selection Reveals Substructures of Gene Modules Responding to Salt Stress in rice</article-title>. <source>Plant Direct</source> <volume>3</volume> (<issue>8</issue>), <fpage>e00154</fpage>. <pub-id pub-id-type="doi">10.1002/pld3.154</pub-id> </citation>
</ref>
<ref id="B8">
<citation citation-type="book">
<collab>FAOSTAT</collab> (<year>2018</year>). <source>Food, Agriculture Organization of the United, Nations</source>. <publisher-loc>Rome, Italy</publisher-loc>: <publisher-name>FAOSTAT Database</publisher-name>. </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Teng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Genomic Prediction of Complex Phenotypes Using Genic Similarity Based Relatedness Matrix</article-title>. <source>Front. Genet.</source> <volume>9</volume> (<issue>364</issue>), <fpage>364</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2018.00364</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Hagberg</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Schult</surname>
<given-names>D. A.</given-names>
</name>
<name>
<surname>Swart</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2008</year>). &#x201c;<article-title>Exploring Network Structure, Dynamics, and Function using NetworkX</article-title>,&#x201d; in <source>Proceedings of the 7th Python in Science Conference</source>, <conf-date>August 19&#x2013;24, 2008</conf-date>. Editors <person-group person-group-type="editor">
<name>
<surname>Varoquaux</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Vaught</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Millman</surname>
<given-names>J.</given-names>
</name>
</person-group> (<publisher-loc>Pasadena, CA USA</publisher-loc>), <fpage>11</fpage>&#x2013;<lpage>15</lpage>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="http://conference.scipy.org/proceedings/SciPy2008/paper_2/">http://conference.scipy.org/proceedings/SciPy2008/paper_2/</ext-link>
</comment>. </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hashida</surname>
<given-names>S.-n.</given-names>
</name>
<name>
<surname>Itami</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Takahara</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Hirabayashi</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Uchimiya</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kawai-Yamada</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Increased Rate of NAD Metabolism Shortens Plant Longevity by Accelerating Developmental Senescence inArabidopsis</article-title>. <source>Plant Cel Physiol</source> <volume>57</volume> (<issue>11</issue>), <fpage>2427</fpage>&#x2013;<lpage>2439</lpage>. <pub-id pub-id-type="doi">10.1093/pcp/pcw155</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hashida</surname>
<given-names>S.-n.</given-names>
</name>
<name>
<surname>Takahashi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Takahara</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kawai-Yamada</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kitazaki</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Shoji</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>NAD&#x2b; Accumulation during Pollen Maturation in Arabidopsis Regulating Onset of Germination</article-title>. <source>Mol. Plant</source> <volume>6</volume> (<issue>1</issue>), <fpage>216</fpage>&#x2013;<lpage>225</lpage>. <pub-id pub-id-type="doi">10.1093/mp/sss071</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Heuvelink</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Buiskool</surname>
<given-names>R. P. M.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Influence of Sink-Source Interaction on Dry Matter Production in Tomato</article-title>. <source>Ann. Bot.</source> <volume>75</volume> (<issue>4</issue>), <fpage>381</fpage>&#x2013;<lpage>389</lpage>. <pub-id pub-id-type="doi">10.1006/anbo.1995.1036</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Jones</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Oliphant</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Peterson</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2001</year>). <source>SciPy: Open Source Scientific Tools for Python</source>. </citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sagar</surname>
<given-names>G. R.</given-names>
</name>
</person-group> (<year>1969</year>). <article-title>Alteration of the Pattern of Distribution of Photosynthetic Products in the Tomato by Manipulation of the Plant</article-title>. <source>Ann. Bot.</source> <volume>33</volume> (<issue>4</issue>), <fpage>753</fpage>&#x2013;<lpage>762</lpage>. <pub-id pub-id-type="doi">10.1093/oxfordjournals.aob.a084322</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>L&#xea;</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Josse</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Husson</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>FactoMineR: AnRPackage for Multivariate Analysis</article-title>. <source>J.&#x20;Stat. Soft.</source> <volume>25</volume> (<issue>1</issue>), <fpage>18</fpage>. <pub-id pub-id-type="doi">10.18637/jss.v025.i01</pub-id> </citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Jia</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>A Novel N-Methyltransferase in Arabidopsis Appears to Feed a Conserved Pathway for Nicotinate Detoxification Among Land Plants and Is Associated with Lignin Biosynthesis</article-title>. <source>Plant Physiol.</source> <volume>174</volume> (<issue>3</issue>), <fpage>1492</fpage>&#x2013;<lpage>1504</lpage>. <pub-id pub-id-type="doi">10.1104/pp.17.00259</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liabeuf</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Sim</surname>
<given-names>S.-C.</given-names>
</name>
<name>
<surname>Francis</surname>
<given-names>D. M.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Comparison of Marker-Based Genomic Estimated Breeding Values and Phenotypic Evaluation for Selection of Bacterial Spot Resistance in Tomato</article-title>. <source>Phytopathology</source> <volume>108</volume> (<issue>3</issue>), <fpage>392</fpage>&#x2013;<lpage>401</lpage>. <pub-id pub-id-type="doi">10.1094/PHYTO-12-16-0431-R</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liebisch</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Max</surname>
<given-names>J.&#x20;F. J.</given-names>
</name>
<name>
<surname>Heine</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Horst</surname>
<given-names>W. J.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Blossom&#x2010;end Rot and Fruit Cracking of Tomato Grown in Net&#x2010;covered Greenhouses in Central Thailand Can Partly Be Corrected by Calcium and boron Sprays</article-title>. <source>Z. Pflanzenern&#xe4;hr. Bodenk.</source> <volume>172</volume> (<issue>1</issue>), <fpage>140</fpage>&#x2013;<lpage>150</lpage>. <pub-id pub-id-type="doi">10.1002/jpln.200800180</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>McKinney</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2010</year>). &#x201c;<article-title>Data Structures for Statistical Computing in Python</article-title>,&#x201d; in <source>Proceedings of the 9th Python in Science Conference</source>, <conf-date>June 28&#x2013;July 3, 2010</conf-date>. Editor <person-group person-group-type="editor">
<name>
<surname>Millman</surname>
<given-names>S.e.v.d.W.a.J.</given-names>
</name>
</person-group> (<publisher-loc>Austin, TX</publisher-loc>) <volume>445</volume>, <fpage>51</fpage>&#x2013;<lpage>56</lpage>. <pub-id pub-id-type="doi">10.25080/Majora-92bf1922-00a</pub-id> </citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Men&#xe9;ndez</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Eilers</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Tikunov</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bovy</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>van Eeuwijk</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Penalized Regression Techniques for Modeling Relationships between Metabolites and Tomato Taste Attributes</article-title>. <source>Euphytica</source> <volume>183</volume> (<issue>3</issue>), <fpage>379</fpage>&#x2013;<lpage>387</lpage>. <pub-id pub-id-type="doi">10.1007/s10681-011-0374-5</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Minorsky</surname>
<given-names>P. V.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Trigonelline: A Diverse Regulator in Plants</article-title>. <source>Plant Physiol.</source> <volume>128</volume> (<issue>1</issue>), <fpage>7</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1104/pp.900014</pub-id> </citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ono</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Muetze</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Kolishovski</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Shannon</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Demchak</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>CyREST: Turbocharging Cytoscape Access for External Tools via a RESTful API</article-title>. <source>F1000Res</source> <volume>4</volume>, <fpage>478</fpage>. <pub-id pub-id-type="doi">10.12688/f1000research.6767.1</pub-id> </citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pedregosa</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Varoquaux</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Gramfort</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Michel</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Thirion</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Grisel</surname>
<given-names>O.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>Scikit-learn: Machine Learning in Python</article-title>. <source>J.&#x20;Mach Learn. Res.</source> <volume>12</volume>, <fpage>2825</fpage>&#x2013;<lpage>2830</lpage>. <pub-id pub-id-type="doi">10.1145/2786984.2786995</pub-id> </citation>
</ref>
<ref id="B25">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Peet</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Welles</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2005</year>). &#x201c;<article-title>Greenhouse Tomato Production</article-title>,&#x201d; in <source>Tomatoes</source>. Editor <person-group person-group-type="editor">
<name>
<surname>Heuvelink</surname>
<given-names>E.</given-names>
</name>
</person-group> (<publisher-loc>Wallingford, UK</publisher-loc>: <publisher-name>CABI Publishing</publisher-name>), <fpage>257</fpage>&#x2013;<lpage>304</lpage>. <pub-id pub-id-type="doi">10.1079/9780851993966.0257</pub-id> </citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qaddoum</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Hines</surname>
<given-names>E. L.</given-names>
</name>
<name>
<surname>Iliescu</surname>
<given-names>D. D.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Yield Prediction for Tomato Greenhouse Using EFuNN</article-title>. <source>ISRN Artif. Intelligence</source> <volume>2013</volume>, <fpage>1</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1155/2013/430986</pub-id> </citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rasmussen</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Bro</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>A Tutorial on the Lasso Approach to Sparse Modeling</article-title>. <source>Chemometrics Intell. Lab. Syst.</source> <volume>119</volume>, <fpage>21</fpage>&#x2013;<lpage>31</lpage>. <pub-id pub-id-type="doi">10.1016/j.chemolab.2012.10.003</pub-id> </citation>
</ref>
<ref id="B28">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Rish</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Grabarnik</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2014</year>). <source>Sparse Modeling: Theory, Algorithms, and Applications</source>. <publisher-loc>Boca Raton</publisher-loc>: <publisher-name>CRC Press</publisher-name>. </citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Saito</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Kawasaki</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ahn</surname>
<given-names>D.-H.</given-names>
</name>
<name>
<surname>Ohyama</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Higashide</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Prediction and Improvement of Yield and Dry Matter Production Based on Modeling and Non-destructive Measurement in Year-Round Greenhouse Tomatoes</article-title>. <source>Hortic. J.</source> <volume>89</volume> (<issue>4</issue>), <fpage>425</fpage>&#x2013;<lpage>431</lpage>. <pub-id pub-id-type="doi">10.2503/hortj.UTD-170</pub-id> </citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Saure</surname>
<given-names>M. C.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Why Calcium Deficiency Is Not the Cause of Blossom-End Rot in Tomato and Pepper Fruit - a Reappraisal</article-title>. <source>Scientia Horticulturae</source> <volume>174</volume>, <fpage>151</fpage>&#x2013;<lpage>154</lpage>. <pub-id pub-id-type="doi">10.1016/j.scienta.2014.05.020</pub-id> </citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sawada</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Akiyama</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Sakata</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kuwahara</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Otsuki</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Sakurai</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>Widely Targeted Metabolomics Based on Large-Scale MS/MS Data for Elucidating Metabolite Accumulation Patterns in Plants</article-title>. <source>Plant Cel Physiol</source> <volume>50</volume> (<issue>1</issue>), <fpage>37</fpage>&#x2013;<lpage>47</lpage>. <pub-id pub-id-type="doi">10.1093/pcp/pcn183</pub-id> </citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sawada</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sato</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Okamoto</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Masuda</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yamaki</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Tamari</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Metabolome-based Discrimination of chrysanthemum Cultivars for the Efficient Generation of Flower Color Variations in Mutation Breeding</article-title>. <source>Metabolomics</source> <volume>15</volume> (<issue>9</issue>), <fpage>118</fpage>. <pub-id pub-id-type="doi">10.1007/s11306-019-1573-7</pub-id> </citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schneider</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hommel</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Blettner</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Linear Regression Analysis: Part 14 Of A Series On Evaluation Of Scientific Publications</article-title>. <source>Dtsch Arztebl Int.</source> <volume>107</volume> (<issue>44</issue>), <fpage>776</fpage>&#x2013;<lpage>782</lpage>. <pub-id pub-id-type="doi">10.3238/arztebl.2010.0776</pub-id> </citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shannon</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Markiel</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ozier</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Baliga</surname>
<given-names>N. S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.&#x20;T.</given-names>
</name>
<name>
<surname>Ramage</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2003</year>). <article-title>Cytoscape: a Software Environment for Integrated Models of Biomolecular Interaction Networks</article-title>. <source>Genome Res.</source> <volume>13</volume> (<issue>11</issue>), <fpage>2498</fpage>&#x2013;<lpage>2504</lpage>. <pub-id pub-id-type="doi">10.1101/gr.1239303</pub-id> </citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tanaka</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fujita</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>1974</year>). <article-title>Nutrio-physiological Studies on the Tomato Plant IV. Source-Sink Relationship and Structure of the Source-Sink Unit</article-title>. <source>Soil Sci. Plant Nutr.</source> <volume>20</volume> (<issue>3</issue>), <fpage>305</fpage>&#x2013;<lpage>315</lpage>. <pub-id pub-id-type="doi">10.1080/00380768.1974.10433252</pub-id> </citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tibshirani</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>1996</year>). <article-title>Regression Shrinkage and Selection via the Lasso</article-title>. <source>J.&#x20;R. Stat. Soc. Ser. B (Methodological)</source> <volume>58</volume> (<issue>1</issue>), <fpage>267</fpage>&#x2013;<lpage>288</lpage>. <pub-id pub-id-type="doi">10.1111/j.2517-6161.1996.tb02080.x</pub-id> </citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Trunk</surname>
<given-names>G. V.</given-names>
</name>
</person-group> (<year>1979</year>). <article-title>A Problem of Dimensionality: a Simple Example</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell.</source> <volume>1</volume> (<issue>3</issue>), <fpage>306</fpage>&#x2013;<lpage>307</lpage>. <pub-id pub-id-type="doi">10.1109/TPAMI.1979.4766926</pub-id> </citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tyih&#xe1;k</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Sarhan</surname>
<given-names>A. R. T.</given-names>
</name>
<name>
<surname>Cong</surname>
<given-names>N. T.</given-names>
</name>
<name>
<surname>Barna</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Kir&#xe1;ly</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>1988</year>). <article-title>The Level of Trigonelline and Other Quaternary Ammonium Compounds in Tomato Leaves in Ratio to the Changing Nitrogen Supply</article-title>. <source>Plant Soil</source> <volume>109</volume> (<issue>2</issue>), <fpage>285</fpage>&#x2013;<lpage>287</lpage>. <pub-id pub-id-type="doi">10.1007/bf02202097</pub-id> </citation>
</ref>
<ref id="B39">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>VanderPlas</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). <source>Python Data Science Handbook : Essential Tools for Working with Data</source>. <publisher-name>O&#x27;Reilly Media</publisher-name>. </citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yamamoto</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Matsunaga</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Onogi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kajiya-Kanegae</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Minamikawa</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Suzuki</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>A Simulation-Based Breeding Design that Uses Whole-Genome Prediction in Tomato</article-title>. <source>Sci. Rep.</source> <volume>6</volume>, <fpage>19454</fpage>. <pub-id pub-id-type="doi">10.1038/srep19454</pub-id> </citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yano</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Imai</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Shimizu</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hanashita</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>A New Method for Gene Discovery in Large-Scale Microarray Data</article-title>. <source>Nucleic Acids Res.</source> <volume>34</volume> (<issue>5</issue>), <fpage>1532</fpage>&#x2013;<lpage>1539</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkl058</pub-id> </citation>
</ref>
</ref-list>
</back>
</article>