<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2024.1392409</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Tackling unbalanced datasets for yellow and brown rust detection in wheat</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Cuenca-Romero</surname><given-names>Carmen</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2668940"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Apolo-Apolo</surname><given-names>Orly Enrique</given-names>
</name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/884712"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Rodr&#xed;guez V&#xe1;zquez</surname><given-names>Jaime Nolasco</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Egea</surname><given-names>Gregorio</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/499698"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>P&#xe9;rez-Ruiz</surname><given-names>Manuel</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/922654"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Universidad de Sevilla, &#xc1;rea de Ingenier&#xed;a Agroforestal, Dpto. de Ingenier&#xed;a Aeroespacial y Mec&#xe1;nica de Fluidos</institution>, <addr-line>Seville</addr-line>, <country>Spain</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Earth and Environmental Sciences, KU Leuven</institution>, <addr-line>Leuven</addr-line>, <country>Belgium</country></aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Shawn Carlisle Kefauver, University of Barcelona, Spain</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Ebenezer Olaniyi, Mississippi State University, United States</p>
<p>Haiguang Wang, China Agricultural University, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Manuel P&#xe9;rez-Ruiz, <email xlink:href="mailto:manuelperez@us.es">manuelperez@us.es</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>14</day>
<month>05</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1392409</elocation-id>
<history>
<date date-type="received">
<day>27</day>
<month>02</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>25</day>
<month>04</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Cuenca-Romero, Apolo-Apolo, Rodr&#xed;guez V&#xe1;zquez, Egea and P&#xe9;rez-Ruiz</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Cuenca-Romero, Apolo-Apolo, Rodr&#xed;guez V&#xe1;zquez, Egea and P&#xe9;rez-Ruiz</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>This study evaluates the efficacy of hyperspectral data for detecting yellow and brown rust in wheat, employing machine learning models and the SMOTE (Synthetic Minority Oversampling Technique) augmentation technique to tackle unbalanced datasets. Artificial Neural Network (ANN), Support Vector Machine (SVM), Random Forest (RF), and Gaussian Na&#xef;ve Bayes (GNB) models were assessed. Overall, SVM and RF models showed higher accuracies, particularly when utilizing SMOTE-enhanced datasets. The RF model achieved 70% accuracy in detecting yellow rust without data alteration. Conversely, for brown rust, the SVM model outperformed others, reaching 63% accuracy with SMOTE applied to the training set. This study highlights the potential of spectral data and machine learning (ML) techniques in plant disease detection. It emphasizes the need for further research in data processing methodologies, particularly in exploring the impact of techniques like SMOTE on model performance.</p>
</abstract>
<kwd-group>
<kwd>wheat</kwd>
<kwd>rust</kwd>
<kwd>SMOTE</kwd>
<kwd>unbalanced datasets</kwd>
<kwd>machine learning</kwd>
</kwd-group>
<counts>
<fig-count count="5"/>
<table-count count="5"/>
<equation-count count="5"/>
<ref-count count="38"/>
<page-count count="11"/>
<word-count count="6006"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Technical Advances in Plant Science</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>In wheat crop, foliar diseases such as rust are directly related to decreased yield and grain quality (<xref ref-type="bibr" rid="B11">Figueroa et&#xa0;al., 2018</xref>). Yield losses caused by diseases depend on the crop cultivar&#x2019;s resistance or susceptibility and the specific type of rust affecting the crop. Potential losses may reach up to 5% in resistant varieties, but in highly susceptible cultivars, this value can be 80% under favorable conditions for the disease (<xref ref-type="bibr" rid="B3">Beard et&#xa0;al., 2005</xref>). To reduce the effects of the disease on yield, farmers often make preventive applications when the first pustules are seen. However, treatments are usually not effective since the damage caused by the rust has already occurred at the cellular level (<xref ref-type="bibr" rid="B2">Bauriegel and Herppich, 2014</xref>). Consequently, early disease detection is essential to optimize their management and maximize crop production (<xref ref-type="bibr" rid="B27">Salvagiotti et&#xa0;al., 2005</xref>; <xref ref-type="bibr" rid="B21">Orchi et&#xa0;al., 2021</xref>).</p>
<p>As mentioned, crop disease identification primarily relies on human visual inspection (<xref ref-type="bibr" rid="B36">Yadav et&#xa0;al., 2019</xref>). However, this method is subjective, time-consuming, and prone to human error (<xref ref-type="bibr" rid="B4">Bock et&#xa0;al., 2010</xref>). As an alternative to visual methods, many technologies based on remote sensing have been developed to achieve more accurate, rapid, and cost-effective detection of crop diseases (<xref ref-type="bibr" rid="B38">Zhang et&#xa0;al., 2019</xref>). These technologies offer great potential for early and non-destructive detection of plant diseases, enabling timely intervention (<xref ref-type="bibr" rid="B32">Terentev et&#xa0;al., 2022</xref>).</p>
<p>From a remote-sensing perspective, disease detection uses various tools (<xref ref-type="bibr" rid="B37">Yang, 2020</xref>). However, in the past years, spectral information has gained significance, as highlighted by <xref ref-type="bibr" rid="B34">Wan et&#xa0;al. (2022)</xref>. The reliance on spectral information is based on the understanding that each disease induces unique spectral reflectivity patterns in crops, resulting from the harm inflicted on plant tissues (<xref ref-type="bibr" rid="B6">Clevers, 1999</xref>). The changes can be detected by hyperspectral sensors (spectroradiometers and cameras), which are considered state-of-the-art for disease detection in crops (<xref ref-type="bibr" rid="B16">Khanal et&#xa0;al., 2020</xref>). These sensors offer an exceptional level of spectral resolution, capturing data related to biotic and abiotic stresses that might not be easily detected by other sensors with lower spectral resolution (<xref ref-type="bibr" rid="B35">Weiss et&#xa0;al., 2020</xref>). Given this scenario, hyperspectral cameras have emerged as a promising alternative to spectroradiometers among the hyperspectral sensors. They provide the unique capability to capture a high-resolution spectrum for each pixel in an image. Despite their advantages, hyperspectral images have several limitations, independent of the equipment&#x2019;s cost, that should be considered. According to the work conducted by <xref ref-type="bibr" rid="B25">Roberts et&#xa0;al. (2018)</xref>, the issues are related to the availability of robust commercial instrumentation and the large amount of data generated during the analysis. Due to the large amount of data generated, hyperspectral images require extensive processing work, which involves a significant amount of time and complex algorithms to reduce spectral dimensionality (<xref ref-type="bibr" rid="B22">Paoletti et&#xa0;al., 2019</xref>). In this regard, spectroradiometers offer less detailed information because they do not produce an image as an outcome. However, they are a more affordable solution in terms of equipment cost and data processing.</p>
<p>Many approaches have been derived for data processing of spectroradiometers and hyperspectral cameras. One of the most widely used approaches is the application of vegetation spectral indices obtained from the combination of specific spectral bands (<xref ref-type="bibr" rid="B12">Giovos et&#xa0;al., 2021</xref>). These indices may detect crop diseases by observing changes in the leaf&#x2019;s external (i.e., necrosis and chlorosis) and internal architecture (i.e., chloroplast dysfunction), as explained by <xref ref-type="bibr" rid="B18">Lin et&#xa0;al. (2017)</xref>. Extensive research has been conducted to detect diseases using spectral indices. As an example, <xref ref-type="bibr" rid="B8">Devadas et&#xa0;al. (2009)</xref> showed the suitability of specific indices like the Anthocyanin Reflectance Index (ARI) to discriminate between healthy and rust-infected wheat leaves at a medium-late growth stage and the Transformed Chlorophyll Absorption in Reflectance Index (TCARI) to detect wheat leaf rust. Other studies, such as the one conducted by <xref ref-type="bibr" rid="B1">Ashourloo et&#xa0;al. (2014)</xref>, demonstrated remarkable accuracies exceeding 85% in estimating disease severity using a Leaf Rust Disease Severity Index (LRDSI). While the LRDSI has been successful, it has limitations in the early detection of symptoms due to the spectral similarity between affected and healthy leaf areas. Spectral indices offer valuable insights; however, they may fall short in specific scenarios as they don&#x2019;t encompass the comprehensive data required for in-depth research analysis.</p>
<p>An alternative to employing spectral indices for disease identification is leveraging the full spectrum of radiation reflected and captured by hyperspectral sensors. However, given the vastness of hyperspectral datasets and their intricate processing requirements, integrating ML models with hyperspectral data for disease identification has garnered increased interest in recent years. In this sense, models such as ANN, SVM, RF, and GNB, among others, have been proposed (<xref ref-type="bibr" rid="B29">Singh et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B30">Su, 2020</xref>). In light of these facts, hyperspectral information for disease detection has been successfully utilized. However, the research often relies on datasets with limited data volume, particularly concerning the context of ML. A comprehensive and balanced dataset is essential for broad generalization when constructing a resilient ML model. However, field data collection requires considerable effort and resources, which limits data availability for analysis. Because of this, data augmentation techniques are expected to be employed to improve the overall learning procedure and performance of ML models. Data augmentation is primarily performed on imbalanced datasets, which exhibit a significant disparity in the number of data instances in each class (<xref ref-type="bibr" rid="B13">Hadad et&#xa0;al., 2009</xref>). This imbalance has consequences for the learning process by resulting in low predictive accuracy for the minority class (<xref ref-type="bibr" rid="B7">Daskalaki et&#xa0;al., 2006</xref>), as many performance measures used to guide training penalize minority classes. Rules that predict minority classes are highly specialized and have low coverage, which often causes them to be discarded in favor of more general rules. In addition, the noise treatment may affect the classification of minority classes, as they may be erroneously discarded as noise (<xref ref-type="bibr" rid="B24">Pulgar et&#xa0;al., 2017</xref>).</p>
<p>According to the literature review by <xref ref-type="bibr" rid="B15">Kamilaris and Prenafeta-Bold&#xfa; (2018)</xref>, 37% of the reviewed articles apply data augmentation and highlight the importance of such techniques in scientific works with small hyperspectral datasets (i.e., images). Limited resources are available concerning the refinement of hyperspectral data from spectroscopy. <xref ref-type="bibr" rid="B5">Chawla et&#xa0;al. (2002)</xref> introduced the Synthetic Minority Over-sampling Technique (SMOTE), which interpolates between minority class instances to address data imbalance. This tool augments the minority class by generating new synthetic data based on existing examples. From an agriculture perspective, researchers like <xref ref-type="bibr" rid="B19">Ma et&#xa0;al. (2019)</xref> employed SMOTE to balance the imbalanced training dataset, aiming to develop a model that distinguishes between powdery mildew and aphid infestations in winter wheat using bi-temporal Landsat-8 imagery. A recent study by <xref ref-type="bibr" rid="B9">Divakar et&#xa0;al. (2021)</xref> utilized SMOTE to classify areas affected by wilt disease in bananas.</p>
<p>Based on the above literature review and our knowledge, this technique has rarely been applied to agricultural tasks, particularly for detecting wheat yellow and brown rust. Hence, this study aims to evaluate the feasibility of differentiating cultivars affected by yellow and brown rust in durum and bread wheat using complete spectral signatures acquired through spectroscopy. Moreover, it will assess the impact of the SMOTE algorithm on the development of ML models for the accurate detection of both types of rust.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Field experiment and data acquisition</title>
<p>The field experiment was conducted in a greenhouse located at the School of Agricultural Engineering, University of Seville (37&#xb0;21&#x2032;9&#x2033; N, 5&#xb0; 56 &#x2032; 10.5 &#x2032; W; Datum: WGS84), Spain. The study was conducted on spring wheat (<italic>Triticum aestivum</italic> L.) cultivated during the 2020/2021 growing season. The experiment included three cultivars of durum wheat, namely &#x2018;Don Ricardo&#x2019;, &#x2018;Kiko Nick&#x2019;, and &#x2018;Amilcar&#x2019;, as well as three cultivars of bread wheat, specifically &#x2018;Conil&#x2019;, &#x2018;Califa&#x2019;, and &#x2018;Arthur Nick&#x2019;. These cultivars were arranged in a randomized design with six replicates for each cultivar. Half the pots were inoculated with rust races to have healthy and infected pots. Pots of bread wheat were inoculated with yellow rust (<italic>Puccinia striiformis</italic> f. sp. <italic>tritici.)</italic>, and pots belonging to durum wheat were inoculated with brown rust, also called leaf rust (<italic>Puccina triticina)</italic>. The inoculation occurred on days 87 and 94 after seeding (DAS) for bread and durum wheat, respectively (<xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>).</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Illustration showing the experimental design <bold>(A)</bold>, the inoculation process <bold>(B)</bold>, a detailed view of leaves with yellow rust <bold>(C)</bold>, and the position of the spectroradiometer during the measurements <bold>(D)</bold>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1392409-g001.tif"/>
</fig>
<p>In addition to the visual score evaluation, each pot was subject to three spectral measurements captured at a distance of about 0.15 meters from the plant. Before the visibility of symptoms, the spectral signature was derived from the average of the canopy. However, once the pustules became completely visible, measurements were explicitly taken from the affected areas using a portable spectroradiometer. The sensor used was the spectroradiometer (UNISPEC-DC, PP-systems, Inc., Amesbury, MA, USA), which allows the measurement of reflectance from two optical fibres, channels A and B. One channel records the incident radiation, while the other records the reflected radiation. Each channel includes a photodiode detector that covers a spectral region ranging from 310 to 1100 nm. The sensor offers a spectral resolution between 3.1 and 3.4 nm. A white reference (99% reflectance Spectralon panel) calibrated the spectroradiometer. Hyperspectral data were collected around noon under completely sunny conditions, with data collection performed for each pot at intervals of 3-4 days. Seven measurements were made on pots inoculated with yellow rust on DAS 87, 94, 98, 101, 105, 108, and 112, and six measurements were made on pots inoculated with brown rust on DAS 94, 98, 101, 105, 108, and 112.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Data preprocessing</title>
<p>For each wavelength (<inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, the spectral reflectance (<inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>&#x3bb;</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> was calculated following <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>:</p>
<disp-formula id="eq1">
<label>(Equation 1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>&#x3bb;</mml:mi>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>%</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mi>L</mml:mi>
<mml:mi>&#x3bb;</mml:mi>
<mml:mi>r</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mi>L</mml:mi>
<mml:mi>&#x3bb;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>100</mml:mn>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:msubsup>
<mml:mi>L</mml:mi>
<mml:mi>&#x3bb;</mml:mi>
<mml:mi>r</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> denotes the spectral radiance the crop surface reflects in wavelength <inline-formula>
<mml:math display="inline" id="im4">
<mml:mi>&#x3bb;</mml:mi>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im5">
<mml:mrow>
<mml:msubsup>
<mml:mi>L</mml:mi>
<mml:mi>&#x3bb;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> the spectral radiance the crop surface receives in wavelength <inline-formula>
<mml:math display="inline" id="im6">
<mml:mi>&#x3bb;</mml:mi>
</mml:math>
</inline-formula>.</p>
<p>The spectral signature of each pot was obtained by calculating the mean of the three measurements taken. This resulted in 36 spectral signatures for bread wheat and 36 for durum wheat. These spectra were classified into three groups: &#x2018;Healthy&#x2019; (H) for non-inoculated plants, &#x2018;Asymptomatic Leaf&#x2019; (AL) for inoculated plants without visible symptoms, and &#x2018;Symptomatic Leaf&#x2019; (SL) for inoculated plants displaying visual symptoms. The selection of these categories was intentional, serving as target variables for prediction. Each category was meticulously crafted to include a diverse range of instances, thereby facilitating the development of accurate and robust predictive models. Subsequently, the spectra underwent standardization using the Scikit-learn package version 1.2.2 (<xref ref-type="bibr" rid="B23">Pedregosa et&#xa0;al., 2011</xref>), scaling the values from 0 to 1. Machine learning estimators often need standardization procedures as they perform optimally when features exhibit an approximately normal distribution. Following standardization, the Savitzky-Golay algorithm (<xref ref-type="bibr" rid="B10">D&#xf3;pido et&#xa0;al., 2012</xref>) was applied with the following parameters: a window frame length of 11, polynomial order of 4, and the first derivative.</p>
<p>To mitigate the substantial variance in the quantity of data entries across categories, SMOTE (Synthetic Minority Over-sampling Technique) was utilized to augment the available data. The SMOTE technique is grounded in oversampling the minority class, thereby generating synthetic data for each data point within this underrepresented class. To generate these synthetic data points, the feature vector of the sample is subtracted from its nearest neighbour. This difference is then multiplied by a random number between 0 and 1 and added to the feature vector. Thus, synthetic data points are generated along the linear segments connecting any or all nearest neighbours, chosen randomly and based on the required oversampling. This study employed a random state of 888 to ensure reproducibility. <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref> illustrates the data points for each category before and after applying the SMOTE technique. Furthermore, the proportion of actual data within each category is provided after the SMOTE procedure.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Comparative data on bread wheat and durum wheat cultivars.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" rowspan="2" align="left">Categories</th>
<th valign="top" colspan="2" align="center">Bread wheat</th>
<th valign="top" colspan="2" align="center">Durum wheat</th>
</tr>
<tr>
<th valign="top" align="left">Number of actual data</th>
<th valign="top" align="left">Actual data (%)</th>
<th valign="top" align="left">Number of actual data</th>
<th valign="top" align="left">Actual data (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><bold>H</bold>
</td>
<td valign="top" align="left">125</td>
<td valign="top" align="left">100</td>
<td valign="top" align="left">107</td>
<td valign="top" align="left">100</td>
</tr>
<tr>
<td valign="top" align="left"><bold>AL</bold>
</td>
<td valign="top" align="left">44</td>
<td valign="top" align="left">35.2</td>
<td valign="top" align="left">54</td>
<td valign="top" align="left">50.46</td>
</tr>
<tr>
<td valign="top" align="left"><bold>SL</bold>
</td>
<td valign="top" align="left">64</td>
<td valign="top" align="left">51.2</td>
<td valign="top" align="left">36</td>
<td valign="top" align="left">33.64</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The table presents the number and percentage of actual data for each category: Healthy (H), Asymptomatic Leaf (AL), and Symptomatic Leaf (SL).</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Various dataset processing techniques were utilized to assess the influence of synthetic data generated by SMOTE on the development of prediction models. Throughout all scenarios, the H category was solely composed of actual data. The distinct processing methods are as follows:</p>
<list list-type="simple">
<list-item>
<p>&#x2022; No SMOTE was applied; no synthetic data was introduced.</p>
</list-item>
<list-item>
<p>&#x2022; SMOTE was applied to the entire data set: synthetic data were introduced into the training, testing, and second validation sets.</p>
</list-item>
<list-item>
<p>&#x2022; SMOTE applied to the training set only: The testing and second validation sets consisted exclusively of actual data.</p>
</list-item>
</list>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Training of ML models</title>
<p>After the preprocessing step, the dataset was split into three parts: 30% for validation, 63% for training, and 7% for testing the models. The flowchart (<xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>) provides the workflow associated with the different stages involved in disease detection.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Workflow of hyperspectral data processing for disease classification in wheat. The process begins with hyperspectral data acquisition, followed by data pre-processing, including standardization and Savitzky-Golay filtering, with increased data points. The dataset is split into 63% for training, 30% for validation, and 7% for testing. Subsequently, the training of classification models is conducted, culminating in the evaluation of the model&#x2019;s performance.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1392409-g002.tif"/>
</fig>
<p>Determining the best ML model for classification purposes is a challenging task, and often, the optimal decision is made through trial and error (<xref ref-type="bibr" rid="B14">Jagtap et&#xa0;al., 2022</xref>). This study implemented four models with the scikit-learn library: ANN, SVM, RF, and GNB. The RF and GNB models were configured with default parameters. A second-degree polynomial kernel was employed for the SVM model, with an independent term value of 2 in the kernel function.</p>
<p>The development of the ANN entailed utilizing RandomizedSearchCV to optimize the parameter settings. A total of 50 interactions were performed, with a random state set to 42. The parameters considered during the optimization process were alpha, hidden layer sizes, and learning rate init. For yellow rust, alpha was set to 0.0001, hidden layer sizes were 20 and 20, and the learning rate was set to 0.001. Conversely, alpha was set to 0.1 for brown rust, the hidden layer size was 30, and the learning rate was set to 0.01. The solver employed for the yellow rust dataset was Adam, while for brown rust, LBFGS was selected due to its better suitability for the data structure. All other parameters retained the default configuration of Scikit Learn. The ANN models developed using the dataset without the SMOTE application served as a reference because they achieved the highest accuracy results (see <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>).</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>F1-scores achieved by the SVM (Support Vector Machine) model for wheat disease classification are presented for the categories Healthy (H), asymptomatic leaf (AL), and Symptomatic Leaf (SL) across datasets for both yellow rust and brown rust.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" rowspan="2" align="left">Categories</th>
<th valign="top" colspan="3" align="center">Yellow rust</th>
<th valign="top" colspan="3" align="center">Brown rust</th>
</tr>
<tr>
<th valign="top" align="center">Without SMOTE</th>
<th valign="top" align="center">SMOTE</th>
<th valign="top" align="center">SMOTE on trainning</th>
<th valign="top" align="center">Without SMOTE</th>
<th valign="top" align="center">SMOTE</th>
<th valign="top" align="center">SMOTE on trainning</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center"><bold>H</bold>
</td>
<td valign="top" align="center">0.75</td>
<td valign="top" align="center">0.77</td>
<td valign="top" align="center">0.68</td>
<td valign="top" align="center">0.70</td>
<td valign="top" align="center">0.64</td>
<td valign="top" align="center">0.74</td>
</tr>
<tr>
<td valign="top" align="center"><bold>AL</bold>
</td>
<td valign="top" align="center">0.54</td>
<td valign="top" align="center">0.87</td>
<td valign="top" align="center">0.50</td>
<td valign="top" align="center">0.46</td>
<td valign="top" align="center">0.75</td>
<td valign="top" align="center">0.54</td>
</tr>
<tr>
<td valign="top" align="center"><bold>SL</bold>
</td>
<td valign="top" align="center">0.67</td>
<td valign="top" align="center">0.89</td>
<td valign="top" align="center">0.63</td>
<td valign="top" align="center">0.29</td>
<td valign="top" align="center">0.94</td>
<td valign="top" align="center">0.44</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The table compares model performance without using SMOTE, with SMOTE, and with SMOTE applied during the training phase.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Matrics for model evaluation</title>
<p>The data processing was conducted using Google Collaboratory, which provides the necessary Python environment and libraries for data analysis and visualization. Regarding statistical assessment, the classification models were compared based on their network classification accuracy. Accuracy (<xref ref-type="disp-formula" rid="eq2">Equation 2</xref>) quantifies the percentage of instances in which the model has made correct predictions, and it is defined as follows:</p>
<disp-formula id="eq2">
<label>(Equation 2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:mtext>Accuracy</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>Number&#xa0;of&#xa0;correct&#xa0;predictions</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>Total&#xa0;number&#xa0;of&#xa0;predictions</mml:mtext>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>For each category with balanced data, evaluation was performed using the F1_score (<xref ref-type="disp-formula" rid="eq3">Equation 3</xref>) derived from the confusion matrix, and it is defined as:</p>
<disp-formula id="eq3">
<label>(Equation 3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:mtext>F</mml:mtext>
<mml:mn>1</mml:mn>
<mml:mo>_</mml:mo>
<mml:mtext>score</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mtext>x&#xa0;precision&#xa0;x&#xa0;recall</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>precision</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mtext>recall</mml:mtext>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where precision (<xref ref-type="disp-formula" rid="eq4">Equation 4</xref>) and recall (<xref ref-type="disp-formula" rid="eq5">Equation 5</xref>) are defined as follows:</p>
<disp-formula id="eq4">
<label>(Equation 4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:mtext>Precision</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mtext>FP</mml:mtext>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq5">
<label>(Equation 5)</label>
<mml:math display="block" id="M5">
<mml:mrow>
<mml:mtext>Recall</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mtext>FN</mml:mtext>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>,where TP represents True Positive, FP stands for False Positive, and FN represents False Negative. For models trained with imbalanced categories, precision was employed for their evaluation.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Spectral reflectance analysis</title>
<p>As can be observed in <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>, the mean values can vary between categories, especially for yellow rust (<xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3A</bold></xref>). However, in brown rust (<xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3B</bold></xref>), the mean values exhibit a higher degree of overlap between categories. Overall, the mean reflectance values obtained for brown rust are higher than for yellow rust. In both cases, the most significant overlap occurs in the visible spectrum region, although it also occurs between the &#x201c;H&#x201d; and &#x201c;AL&#x201d; categories for brown rust. Both types of rust show considerable standard deviations, leading to significant overlap across all categories. To address this issue, specific classification models have been developed for each rust type to enhance accuracy in categorization. Notably, for yellow rust, the mean value of the healthy category exceeds that of the asymptomatic leaf category, while the reverse is true for brown rust.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Reflectance spectra are presented for healthy (H), asymptomatic (AL), and symptomatic leaf (SL) categories, illustrating the spectra of wheat leaves affected by yellow rust <bold>(A)</bold> and brown rust <bold>(B)</bold>. Mean reflectance values and standard deviations have been computed for these predefined categories. The spectra are displayed in an unnormalized format.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1392409-g003.tif"/>
</fig>
<p>Upon close examination of each plot at the rust level, it is observed that for yellow rust (<xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3A</bold></xref>), the category with the lowest mean reflectance value is the asymptomatic leaf, followed by the healthy category. In contrast, the symptomatic leaf category displays the highest mean reflectance value. This trend remains consistent in the visible spectrum (400-700 nm) and NIR regions (700-800 nm). All categories exhibit a standard deviation wide enough to cause overlap, although the healthy category displays the highest variability. In the visible spectrum region, the similarity in mean reflectance values between the healthy and asymptomatic leaf categories is noteworthy, with the symptomatic category achieving a higher mean value than both. In the NIR region, the mean reflectance difference increases between the healthy and asymptomatic leaf categories while it decreases between the healthy and symptomatic leaf categories.</p>
<p>
<xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3B</bold></xref> presents the mean reflectance and standard deviation values for the various categories of wheat leaves infested with brown rust. The mean reflectance values and standard deviations are similar across the categories in the visible spectrum region. However, in the NIR region, there is a noticeable increase in the mean reflectance value for the symptomatic leaf category compared to the others, indicating that the spectral signatures of H and asymptomatic plants are very similar, which presents challenges in early detection. Similar to yellow rust, the data obtained for brown rust also exhibit significant standard deviations, resulting in an overlap among categories.</p>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Models&#x2019; performance</title>
<p>The classification models were constructed using the training dataset, encompassing labelled data from all wheat varieties. The models were fine-tuned using the validation dataset. Subsequently, the developed models were tested for performance using the test set, comprising spectral data from all varieties within each wheat type. The corresponding accuracy (%) of each model used in this study, based on their respective datasets, is presented in <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Performance comparison of Machine Learning (ML) models for yellow and brown rust classification.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" rowspan="2" align="left">Models</th>
<th valign="top" colspan="3" align="center">Yellow rust</th>
<th valign="top" colspan="3" align="center">Brown rust</th>
</tr>
<tr>
<th valign="top" align="left">Without SMOTE</th>
<th valign="top" align="left">SMOTE</th>
<th valign="top" align="left">SMOTE on training</th>
<th valign="top" align="left">Without SMOTE</th>
<th valign="top" align="left">SMOTE</th>
<th valign="top" align="left">SMOTE on training</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><bold>ANN</bold>
</td>
<td valign="top" align="left">65.71</td>
<td valign="top" align="left">76</td>
<td valign="top" align="left">60</td>
<td valign="top" align="left">57</td>
<td valign="top" align="left">76.3</td>
<td valign="top" align="left">55</td>
</tr>
<tr>
<td valign="top" align="left"><bold>SVM</bold>
</td>
<td valign="top" align="left">68.6</td>
<td valign="top" align="left">85</td>
<td valign="top" align="left">62.86</td>
<td valign="top" align="left">58</td>
<td valign="top" align="left">78.35</td>
<td valign="top" align="left">63</td>
</tr>
<tr>
<td valign="top" align="left"><bold>RF</bold>
</td>
<td valign="top" align="left">70</td>
<td valign="top" align="left">81.5</td>
<td valign="top" align="left">68.5</td>
<td valign="top" align="left">53</td>
<td valign="top" align="left">73.2</td>
<td valign="top" align="left">55</td>
</tr>
<tr>
<td valign="top" align="left"><bold>GNB</bold>
</td>
<td valign="top" align="left">64</td>
<td valign="top" align="left">61</td>
<td valign="top" align="left">57</td>
<td valign="top" align="left">37</td>
<td valign="top" align="left">60</td>
<td valign="top" align="left">38</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The table presents the accuracy percentages of Artificial Neural Networks (ANN), Support Vector Machines (SVM), Random Forests (RF), and Gaussian Naive Bayes (GNB) with and without the application of Synthetic Minority Over-sampling Technique (SMOTE) during training.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>The results show that the model&#x2019;s accuracy is consistently higher for classifying yellow rust than brown rust. Among the models, the GNB model displayed the least accuracy in both yellow and brown rust contexts. Consequently, our analysis will primarily concentrate on the outcomes achieved by the ANN, SVM, and RF models.</p>
<p>The SVM model obtained the highest accuracy for the dataset where SMOTE was not applied, followed by the ANN model for brow rust and the RF model for yellow rust. Similarly, in cases where the dataset was augmented using SMOTE, the highest accuracy values were obtained by SVM models. The accuracy achieved in this dataset is the highest among all models compared to the results obtained in the remaining datasets. Furthermore, these models also excel in the dataset where the SMOTE algorithm was exclusively applied during training. Nevertheless, it is noteworthy that the accuracy obtained in the SMOTE dataset during training decreased compared to the datasets where SMOTE was and was not applied for all models. However, the exception to this trend is observed for the SVM and RF models in the case of brown rust. In the SVM model, the accuracy of the model trained with the original dataset increased by five percentage points when SMOTE was applied in training. For the RF model, this increase was two percentual points.</p>
<p>
<xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref> shows the F1-scores achieved by the ANN model for yellow and brown rust prediction. Regarding yellow rust, it can be observed that in the dataset without SMOTE, the &#x201c;H&#x201d; category, characterized by a more substantial number of data points, achieved higher values. In contrast, the &#x201c;AL&#x201d; category displayed the lowest value. It is worth noting that this category was composed of fewer data than the others. In the dataset where the algorithm was fully implemented, notable enhancements were observed in the &#x201c;AL&#x201d; and &#x201c;SL&#x201d; categories, which incorporated synthetic data. However, the &#x201c;H&#x201d; category, comprised solely of actual data, obtained a lower score than the dataset where SMOTE was not applied. Conversely, in the dataset where SMOTE was only applied to the training dataset, it was observed that the &#x201c;H&#x201d; category maintained an outcome similar to that of the dataset with complete SMOTE application and a decrease relative to the original dataset. However, the &#x201c;SL&#x201d; and &#x201c;AL&#x201d; categories obtained similar and slightly higher F1-scores than the dataset where the SMOTE algorithm was not applied.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>F1-scores achieved by the ANN (Artificial Neural Network) model for wheat disease classification are presented for the categories Healthy (H), asymptomatic leaf (AL), and Symptomatic Leaf (SL) across datasets for both yellow rust and brown rust.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left"/>
<th valign="top" colspan="3" align="left">Yellow rust</th>
<th valign="top" colspan="3" align="left">Brown rust</th>
</tr>
<tr>
<th valign="top" align="left">Categories</th>
<th valign="top" align="left">Without SMOTE</th>
<th valign="top" align="left">SMOTE</th>
<th valign="top" align="left">SMOTE on training</th>
<th valign="top" align="left">Without SMOTE</th>
<th valign="top" align="left">SMOTE</th>
<th valign="top" align="left">SMOTE on training</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">H</td>
<td valign="top" align="left">0.73</td>
<td valign="top" align="left">0.63</td>
<td valign="top" align="left">0.65</td>
<td valign="top" align="left">0.66</td>
<td valign="top" align="left">0.66</td>
<td valign="top" align="left">0.67</td>
</tr>
<tr>
<td valign="top" align="left">AL</td>
<td valign="top" align="left">0.45</td>
<td valign="top" align="left">0.77</td>
<td valign="top" align="left">0.50</td>
<td valign="top" align="left">0.47</td>
<td valign="top" align="left">0.81</td>
<td valign="top" align="left">0.61</td>
</tr>
<tr>
<td valign="top" align="left">SL</td>
<td valign="top" align="left">0.62</td>
<td valign="top" align="left">0.87</td>
<td valign="top" align="left">0.59</td>
<td valign="top" align="left">0.37</td>
<td valign="top" align="left">0.82</td>
<td valign="top" align="left">0.63</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The table compares model performance without using SMOTE, with SMOTE, and with SMOTE applied during the training phase.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>A similar trend is observed for brown rust as for yellow rust. The category &#x201c;H&#x201d; results were consistent across all three datasets. In the categories &#x201c;AL&#x201d; and &#x201c;SL,&#x201d; higher F1-scores were obtained in the dataset where SMOTE was fully applied. In contrast to the trend observed in yellow rust, in the dataset where SMOTE was only used in the training, categories SL and AL increased their accuracy by 26 and 14 points, respectively, compared to the dataset where SMOTE was not applied. The F1-scores obtained by the SVM models show behaviour similar to that of the ANN model (<xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>). The &#x201c;H&#x201d; category demonstrates consistent performance across all three datasets with slight variations. For the &#x201c;AL&#x201d; and &#x201c;SL&#x201d; categories, a notable enhancement is observed when SMOTE is applied to the entire dataset, contrasting the performance of the non-SMOTE dataset. However, in the dataset where SMOTE was solely used during training, the accuracy obtained decreases by 3-4 percentual points for yellow rust and increases for brown rust. In the latter case, the 15-point increase in the &#x201c;SL&#x201d; category is worth noting compared to the original dataset.</p>
<p>
<xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref> displays the F1-scores results of the RF model. In the case of yellow rust, the &#x201c;H&#x201d; category maintains consistency across all datasets, with a slight advantage in the non-SMOTE dataset. The &#x201c;AL&#x201d; category shows improvement with SMOTE applied during training, while &#x201c;SL&#x201d; remains unchanged. A similar pattern is observed for the &#x201c;H&#x201d; category in the context of brown rust. Interestingly, the dataset containing actual data yielded the lowest values for &#x201c;AL&#x201d; and &#x201c;SL&#x201d;, but the application of SMOTE during training increased their values by 12 and 20 points, respectively.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>F1-scores achieved by the RF (Random Forest) model for wheat disease classification are presented for the categories Healthy (H), asymptomatic leaf (AL), and Symptomatic Leaf (SL) across datasets for both yellow rust and brown rust.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" rowspan="2" align="left">Categories</th>
<th valign="top" colspan="3" align="center">Yellow rust</th>
<th valign="top" colspan="3" align="center">Brown rust</th>
</tr>
<tr>
<th valign="top" align="center">Without SMOTE</th>
<th valign="top" align="center">SMOTE</th>
<th valign="top" align="center">SMOTE on training</th>
<th valign="top" align="center">Without SMOTE</th>
<th valign="top" align="center">SMOTE</th>
<th valign="top" align="center">SMOTE on training</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center"><bold>H</bold>
</td>
<td valign="top" align="center">0.78</td>
<td valign="top" align="center">0.70</td>
<td valign="top" align="center">0.73</td>
<td valign="top" align="center">0.66</td>
<td valign="top" align="center">0.59</td>
<td valign="top" align="center">0.64</td>
</tr>
<tr>
<td valign="top" align="center"><bold>AL</bold>
</td>
<td valign="top" align="center">0.32</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.50</td>
<td valign="top" align="center">0.34</td>
<td valign="top" align="center">0.71</td>
<td valign="top" align="center">0.46</td>
</tr>
<tr>
<td valign="top" align="center"><bold>SL</bold>
</td>
<td valign="top" align="center">0.71</td>
<td valign="top" align="center">0.88</td>
<td valign="top" align="center">0.74</td>
<td valign="top" align="center">0.17</td>
<td valign="top" align="center">0.89</td>
<td valign="top" align="center">0.37</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The table compares model performance without using SMOTE, with SMOTE, and with SMOTE applied during the training phase.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>When comparing the results obtained for each model, it can be observed that the best accuracies are achieved by the models that used datasets augmented with the SMOTE algorithm. However, the presence of synthetic data in the test dataset may raise concerns about the reliability of the results. Regarding the dataset for the category consisting solely of actual data, RF was the best model for yellow rust classification. The SVM model performed better for brown rust when the same dataset was used.</p>
<p>The SVM model achieved the highest F1-score for yellow rust in the &#x201c;AL&#x201d; category. In the case of brown rust, the best model was ANN for the same category. Conversely, the highest accuracy for the &#x201c;AL&#x201d; category in yellow rust was found in the original dataset, and for brown rust, it occurred in the dataset where SMOTE was applied during training. Finally, the best F1-score for the &#x201c;SL&#x201d; category in yellow rust was achieved by the RF model, and for brown rust, it was the SVM model, both using the dataset with SMOTE applied during training.</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Confusion matrix</title>
<p>For the set of confusion matrices shown in <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>, it was observed that both models had a similar total number of errors in the dataset where SMOTE was not applied. However, the number of classification errors by categories differs significantly between the two models. In the SVM model, the category with the highest number of errors was &#x201c;H&#x201d; particularly when distinguishing it from the &#x201c;AL&#x201d; category. Notably, there were many mistakes in classifying the &#x201c;SL&#x201d; and &#x201c;H&#x201d; categories. On the other hand, in the RF model, there is a drastic decrease in the error rate for classifying the &#x201c;H&#x201d; category, representing an improvement compared to the SVM model. However, an increase in misclassifications in the &#x201c;AL&#x201d; and &#x201c;SL&#x201d; categories was observed, especially when distinguishing them from the &#x201c;H&#x201d; category. In the dataset where SMOTE was applied entirely, both models exhibited the highest number of correct predictions in the categories &#x201c;AL&#x201d; and &#x201c;SL,&#x201d; which included synthetic data. However, most errors occurred in the category &#x201c;H&#x201d;, consisting entirely of actual data, particularly in distinguishing between &#x201c;H&#x201d; and &#x201c;AL&#x201d;. Notably, the number of errors in this distinction is higher in this dataset than in the original data.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Confusion matrices were obtained by SVM (Support Vector Machine) and RF (Random Forest) models to predict &#x201c;H&#x201d;, &#x201c;SL&#x201d;, and &#x201c;AL&#x201d; yellow rust categories. SVM for the dataset without SMOTE <bold>(A)</bold>, RF for the dataset without SMOTE <bold>(B)</bold>, SVM for the dataset with SMOTE <bold>(C)</bold>, RF for the dataset with SMOTE <bold>(D)</bold>, SVM for the dataset with SMOTE on training <bold>(E)</bold>, RF for the dataset with SMOTE on training <bold>(F)</bold>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1392409-g004.tif"/>
</fig>
<p>Finally, in the dataset where SMOTE was exclusively applied to the training set, a similar pattern was observed compared to the dataset without SMOTE. The category with the highest number of correct predictions was &#x201c;H,&#x201d; which showed better results with RF than SVM. For this category, the SVM model exhibited a more significant number of incorrect predictions with the &#x201c;SL&#x201d; category. On the other hand, for the &#x201c;AL&#x201d; category in the SVM model, the number of errors was balanced with the &#x201c;SL&#x201d; and &#x201c;H&#x201d; categories. However, in the RF model, it is observed that most incorrect predictions were made mainly concerning the &#x201c;H&#x201d; category.</p>
<p>Based on the data obtained in <xref ref-type="fig" rid="f4"><bold>Figures&#xa0;4A, B, E, F</bold></xref>, it is evident that the RF model is better at classifying the predominant category &#x201c;H&#x201d; consisting exclusively of real data. All models exhibit similar behaviour regarding the &#x201c;SL&#x201d; category, whose spectral characteristics differ the most from the other categories. Finally, for the &#x201c;AL&#x201d; category, it is noteworthy that both SVM and RF models perform well when SMOTE is applied during training. However, they misclassify instances differently, with the RF model standing out. This is attributed to its ability, within the margin of error, to more accurately approximate two categories with similar spectral characteristics, namely &#x201c;AL&#x201d; and &#x201c;H&#x201d;.</p>
<p>The confusion matrices obtained with the SVM model for each dataset with the highest accuracy for brown rust are displayed in <xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>The SVM (Support Vector Machine) model obtained confusion matrices to predict Healthy (H), Symptomatic Leaf (SL), and Asymptomatic (AL) brown rust categories. SVM for the dataset without SMOTE <bold>(A)</bold>, SVM for the dataset with SMOTE <bold>(B)</bold>, and SVM for the dataset with SMOTE on training <bold>(C)</bold>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1392409-g005.tif"/>
</fig>
<p>For the set of confusion matrices shown in <xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>, it was observed that, in the dataset in which SMOTE was not applied, the category with the highest number of correct predictions is &#x2018;H.&#x2019; Within this category, it is noteworthy that the highest number of errors was made with the &#x2018;AL&#x2019; category. The same pattern is repeated for the &#x2018;AL&#x2019; category, with all incorrect predictions made with the &#x2018;H&#x2019; category. The &#x2018;SL&#x2019; category showed the most significant errors, evenly distributed among the remaining categories.</p>
<p>The same behaviour was observed for yellow rust in the dataset where SMOTE was applied entirely. Finally, the trend observed for the &#x2018;H&#x2019; category without data augmentation repeats itself in the dataset where SMOTE was exclusively applied to the training set. However, for the &#x2018;SL&#x2019; and &#x2018;AL&#x2019; categories, there is a slight increase in the number of correct predictions, and the error ratio remains consistent compared to the original dataset. Therefore, this latter model demonstrates the highest efficiency in category distinction, although it also shows notable deficiencies in classifying the &#x2018;SL&#x2019; category.</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>This study examines the spectral reflectance signatures for three different disease categories. It explores the application of the SMOTE algorithm across various hyperspectral datasets for predicting wheat rust, specifically focusing on its impact on model accuracy and F1-scores.</p>
<p>Regarding leaf rust classification, the present study yields results similar to those obtained by <xref ref-type="bibr" rid="B26">Ruan et&#xa0;al. (2021)</xref>, who achieved an accuracy of 86.2% using an SVM model to classify healthy and rust-infected wheat leaves. While they also employed SMOTE to balance the data, they did not examine its effects. However, our findings demonstrate a significant improvement in model performance by applying the SMOTE algorithm. Specifically, we observed accuracy improvements ranging from 16% to 20% for yellow rust and 11% to 20% for brown rust when SMOTE was applied across the entire dataset. These results align with previous studies, such as those by <xref ref-type="bibr" rid="B33">U&#x11f;uz and Uysal (2021)</xref> and <xref ref-type="bibr" rid="B28">Singh and Arora (2020)</xref>, which utilized a dataset of 3400 hyperspectral images to distinguish between two diseases and healthy plants across three categories. Nevertheless, our study offers a detailed analysis, particularly concerning the category comprised solely of original data, where no accuracy improvement was noted. This highlights the complex effects of data augmentation techniques like SMOTE on model accuracy. Similarly, <xref ref-type="bibr" rid="B30">Su et&#xa0;al. (2019)</xref> reported a 3.36% improvement in model performance when comparing outcomes on imbalanced versus balanced standard datasets, underscoring the beneficial impact of data-balancing techniques.</p>
<p>In contrast to our findings, <xref ref-type="bibr" rid="B28">Singh and Arora (2020)</xref> reported an increase in overall accuracy across all categories when applying the SMOTE algorithm, with 75% of their dataset comprising synthetic data. This discrepancy highlights the varied outcomes that can occur based on the dataset&#x2019;s composition, particularly the proportion of synthetic data introduced. This variability in results highlights the complex relationship between dataset characteristics and the efficacy of data augmentation techniques, prompting a more thorough investigation into the factors influencing model performance. Furthermore, our study adds to the extensive literature on leveraging the complete spectrum of spectroradiometers for plant disease detection. Works such as <xref ref-type="bibr" rid="B20">Naidu et&#xa0;al. (2009)</xref> and <xref ref-type="bibr" rid="B17">Khosrokhani and Nasr (2022)</xref> have demonstrated the potential of combining spectral data with machine learning models, yielding high accuracy rates. Our findings are consistent with these studies, particularly in revealing more significant classification errors for categories with analogous spectral characteristics.</p>
<p>
<xref ref-type="bibr" rid="B31">Sun et&#xa0;al. (2024)</xref> utilized SMOTE to assess the severity of peanut blight. They concluded that while SMOTE serves as a valuable approach for tackling data imbalance, it is important to mention that SMOTE generates synthetic samples containing noise. This observation could explain why, in our study, no notable differences were found in the &#x201c;H&#x201d; class upon applying the algorithm. The spectra of healthy leaves closely resemble those generated by SMOTE. However, the algorithm exhibited better performance for the other classes, as it is more common to encounter noise in infected leaves, primarily due to pustules.</p>
<p>In conclusion, our research enhances the understanding of the role of data augmentation in machine learning for plant disease detection. It underscores the importance of large, diverse datasets and the careful consideration of the balance between actual and synthetic data. The choice of machine learning models should be tailored to the specific characteristics of the dataset and the disease under investigation. This study contributes to academic knowledge and holds practical implications in agricultural technology, especially in developing robust, accurate systems for early disease detection and management.</p>
</sec>
<sec id="s5" sec-type="conclusion">
<label>5</label>
<title>Conclusion</title>
<p>This study investigated the efficacy of various ML models in detecting yellow and brown rust in wheat crops using hyperspectral data, emphasizing the role of SMOTE in enhancing model accuracy. SMOTE significantly improved model accuracy, particularly in training datasets, especially for minority categories with synthetic data. However, this might affect real-world applicability due to potential accuracy distortion. The RF model showed 70% accuracy for yellow rust using only actual data. The SVM model achieved 63% accuracy for brown rust when SMOTE was applied to the training set, highlighting these models&#x2019; ability to discern features effectively. However, similarity in spectral characteristics between specific categories, like &#x2018;H&#x2019; and &#x2018;AL&#x2019;, posed challenges. The application of SMOTE generally decreased the performance of the &#x2018;H&#x2019; class in both RF and SVM models. Still, it improved accuracy for minority classes &#x2018;AL&#x2019; and &#x2018;SL&#x2019;, achieving 61% accuracy for the &#x2018;AL&#x2019; category in brown rust detection. These findings underline the importance of data augmentation for enhancing category-specific accuracy and advocate for further research into data processing and augmentation techniques to refine ML model performance in hyperspectral data analysis.</p>
</sec>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>CC: Methodology, Software, Writing &#x2013; original draft. OA: Data curation, Formal analysis, Investigation, Writing &#x2013; original draft. JR: Data curation, Writing &#x2013; original draft. GE: Conceptualization, Funding acquisition, Project administration, Writing &#x2013; review &amp; editing. MP: Conceptualization, Funding acquisition, Project administration, Writing &#x2013; review &amp; editing.</p>
</sec>
</body>
<back>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. The European Union and the Ministry of Science and Innovation funded this study. Project reference PID2021-125080OB-100.</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>This work was performed under the research group AGR278-PAIDI &#x201c;Smart Biosystem Laboratory&#x201d;, and we thank all members for their support in the field trials.</p>
</ack>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The author(s) declared that they were an editorial board member of Frontiers, at the time of submission. This had no impact on the peer review process and the final decision.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ashourloo</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Mobasheri</surname> <given-names>M. R.</given-names>
</name>
<name>
<surname>Huete</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Evaluating the effect of different wheat rust disease symptoms on vegetation indices using hyperspectral measurements</article-title>. <source>Remote Sens.</source> <volume>6</volume>, <fpage>5107</fpage>&#x2013;<lpage>5123</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs6065107</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bauriegel</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Herppich</surname> <given-names>W. B.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Hyperspectral and chlorophyll fluorescence imaging for early detection of plant diseases, with special reference to Fusarium spec. infections on wheat</article-title>. <source>Agriculture</source> <volume>4.1</volume>, <fpage>32</fpage>&#x2013;<lpage>57</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture4010032</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Beard</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Thomas</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Loughman</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Jayasena</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2005</year>). <source>Managing stripe rust and leaf rust of wheat</source>. (<publisher-loc>Western Australia</publisher-loc>: <publisher-name>Farmnote. Department of Agriculture, Govternment of Western Australia</publisher-name>). Available at: <uri xlink:href="https://www.agric.wa.gov.au/grains-research-development/managing-stripe-rust-and-leaf-rust-wheat-western-Australia?page=0%2C1">https://www.agric.wa.gov.au/grains-research-development/managing-stripe-rust-and-leaf-rust-wheat-western-Australia?page=0%2C1</uri>.</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bock</surname> <given-names>C. H.</given-names>
</name>
<name>
<surname>Poole</surname> <given-names>G. H.</given-names>
</name>
<name>
<surname>Parker</surname> <given-names>P. E.</given-names>
</name>
<name>
<surname>Gottwald</surname> <given-names>T. R.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Plant disease severity estimated visually, by digital photography and image analysis, and by hyperspectral imaging</article-title>. <source>Crit. Rev. Plant Sci.</source> <volume>29</volume>, <fpage>59</fpage>&#x2013;<lpage>107</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/07352681003617285</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chawla</surname> <given-names>N. V.</given-names>
</name>
<name>
<surname>Bowyer</surname> <given-names>K. W.</given-names>
</name>
<name>
<surname>Hall</surname> <given-names>L. O.</given-names>
</name>
<name>
<surname>Kegelmeyer</surname> <given-names>W. P.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>SMOTE: Synthetic minority over-sampling technique</article-title>. <source>J. Artif. Intell. Res.</source> <volume>16</volume>, <fpage>321</fpage>&#x2013;<lpage>357</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1613/jair.953</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Clevers</surname> <given-names>J. G. P. W.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>The use of imaging spectrometry for agricultural applications</article-title>. <source>ISPRS Journal of Photogrammetry and Remote sensing</source> <volume>54</volume> (<issue>5-6</issue>), <fpage>299</fpage>&#x2013;<lpage>304</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S0924-2716(99)00033-7</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Daskalaki</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Kopanas</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Avouris</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Evaluation of classifiers for an uneven class distribution problem</article-title>. <source>Appl. Artif. Intel.</source> <volume>20</volume>, <fpage>381</fpage>&#x2013;<lpage>417</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/08839510500313653</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Devadas</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Lamb</surname> <given-names>D. W.</given-names>
</name>
<name>
<surname>Simpfendorfer</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Evaluating ten spectral vegetation indices for identifying rust infection in individual wheat leaves</article-title>. <source>Precis. Agric.</source> <volume>10</volume>, <fpage>459</fpage>&#x2013;<lpage>470</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11119-008-9100-2</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Divakar</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Bhattacharjee</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Priyadarshini</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Smote-DL: A Deep Learning Based Plant Disease Detection Method</article-title>. In <conf-name>Proceedings of the 2021 6th International Conference for Convergence in Technology (I2CT)</conf-name>, <conf-loc>Maharashtra, India</conf-loc>. <fpage>1</fpage>&#x2013;<lpage>6</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/I2CT51068.2021.9417920</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dopido</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Marpu</surname> <given-names>P. R.</given-names>
</name>
<name>
<surname>Plaza</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Bioucas Dias</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Benediktsson</surname> <given-names>J. A.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Semisupervised Self-Learning for Hyperspectral Image Classification</article-title>. <source>IEEE Trans. Geosci. Remote Sens</source>. <volume>51</volume>, <fpage>4032</fpage>&#x2013;<lpage>4044</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2012.2228275</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Figueroa</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Hammond-Kosack</surname> <given-names>K. E.</given-names>
</name>
<name>
<surname>Solomon</surname> <given-names>P. S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>A review of wheat diseases&#x2014;a field perspective</article-title>. <source>Mol. Plant Pathol.</source> <volume>19</volume>, <fpage>1523</fpage>&#x2013;<lpage>1536</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/mpp.12618</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Giovos</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Tassopoulos</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Kalivas</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Lougkos</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Priovolou</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Remote sensing vegetation indices in viticulture: A critical review</article-title>. <source>Agriculture</source> <volume>11</volume>, <elocation-id>457</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture11050457</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Hadad</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Evin</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Drozdowicz</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Modelo para el tratamiento de datos desbalanceados basado en redes neuronales autoorganizadas</article-title>. In <conf-name>XVII Congreso Argentino de Bioingenier&#xed;a</conf-name>, <conf-loc>Rosario, Santa Fe</conf-loc>.</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jagtap</surname> <given-names>S. T.</given-names>
</name>
<name>
<surname>Phasinam</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Kassanuk</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Jha</surname> <given-names>S. S.</given-names>
</name>
<name>
<surname>Ghosh</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Thakar</surname> <given-names>C. M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Towards application of various machine learning techniques in agriculture</article-title>. <source>Materials Today: Proc.</source> <volume>51</volume>, <fpage>793</fpage>&#x2013;<lpage>797</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.matpr.2021.06.236</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kamilaris</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Prenafeta-Bold&#xfa;</surname> <given-names>F. X.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Deep learning in agriculture: A survey</article-title>. <source>Comput. Electron. Agric.</source> <volume>147</volume>, <fpage>70</fpage>&#x2013;<lpage>90</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2018.02.016</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khanal</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Kc</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Fulton</surname> <given-names>J. P.</given-names>
</name>
<name>
<surname>Shearer</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ozkan</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Remote sensing in agriculture&#x2014;accomplishments, limitations, and opportunities</article-title>. <source>Remote Sens.</source> <volume>12</volume>, <elocation-id>3783</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs12223783</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khosrokhani</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Nasr</surname> <given-names>A. H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Applications of the remote sensing technology to detect and monitor the rust disease in the wheat &#x2013; a literature review</article-title>. <source>Geocarto Int.</source> <volume>37</volume>, <fpage>13268</fpage>&#x2013;<lpage>13290</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/10106049.2022.2076922</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhinyan</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Haibo</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Yuntao</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Xi</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Habitat monitoring to evaluate crop disease and pest distributions based on multi-source satellite remote sensing imagery</article-title>. <source>Optik Optics</source> <volume>145</volume>, <fpage>66</fpage>&#x2013;<lpage>73</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ijleo.2017.06.071</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Jing</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Integrating growth and environmental parameters to discriminate powdery mildew and aphid of winter wheat using Bi-temporal landsat-8 imagery</article-title>. <source>Remote Sens.</source> <volume>11</volume>, <issue>7</issue>, <fpage>846</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs11070846</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Naidu</surname> <given-names>R. A.</given-names>
</name>
<name>
<surname>Perry</surname> <given-names>E. M.</given-names>
</name>
<name>
<surname>Pierce</surname> <given-names>F. J.</given-names>
</name>
<name>
<surname>Mekuria.</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>The potential of spectral reflectance technique for the detection of grapevine leafroll-associated virus-3 in two red-berried wine grape cultivars</article-title>. <source>Comput. Electron. Agric.</source> <volume>66</volume>, <fpage>38</fpage>&#x2013;<lpage>45</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2008.11.007</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Orchi</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Sadik</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Khaldoun</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>On using artificial intelligence and the internet of things for crop disease detection: A contemporary survey</article-title>. <source>Agriculture</source> <volume>12</volume>, <elocation-id>9</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture12010009</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Paoletti</surname> <given-names>M. E.</given-names>
</name>
<name>
<surname>Haut</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Plaza</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Plaza</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Deep learning classifiers for hyperspectral imagery: A review</article-title>. <source>ISPRS J. Photogrammetry Remote Sens.</source> <volume>158</volume>, <fpage>279</fpage>&#x2013;<lpage>317</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.isprsjprs.2019.09.006</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pedregosa</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Varoquaux</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Gramfort</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Michel</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Thirion</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Grisel</surname> <given-names>O.</given-names>
</name>
<etal/>
</person-group>. (<year>2011</year>). <article-title>Scikit-learn: machine learning in python</article-title>. <source>J Mach Learn Res.</source> <volume>12</volume>, <fpage>2825</fpage>&#x2013;<lpage>2830</lpage>.</citation>
</ref>
<ref id="B24">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Pulgar</surname> <given-names>F.J.</given-names>
</name>
<name>
<surname>Rivera</surname> <given-names>A. J.</given-names>
</name>
<name>
<surname>Charte</surname> <given-names>F.</given-names>
</name>
<name>
<surname>del Jesus</surname> <given-names>M. J.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>On the impact of&#xa0;imbalanced data in convolutional neural networks performance</article-title>. In <conf-name>Proceedings of the International Conference on Hybrid Artificial Intelligence Systems</conf-name>, <publisher-loc>La Rioja, Spain</publisher-loc> <fpage>220</fpage>&#x2013;<lpage>232</lpage>.</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Roberts</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Power</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Chapman</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Chandra</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Cozzolino</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>A brief update on the advantages, applications, and limitations of hyperspectral and chemical imaging in food authentication</article-title>. <source>Appl. Sci.</source> <volume>8</volume>, <elocation-id>505</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/app8040505</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ruan</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Ye</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Prediction of wheat stripe rust occurrence with time series sentinel-2 images</article-title>. <source>Agriculture</source> <volume>11</volume>, <elocation-id>1079</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture11111079</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Salvagiotti</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Castellar&#xed;n</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Pedrol</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Gonz&#xe1;lez</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Incremona</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Efecto de la fertilizaci&#xf3;n con potasio y cloro sobre el rendimiento y severidad de las enfermedades foliares en trigo</article-title>. <source>Informaciones Agron&#xf3;micas del Cono Sur</source> <volume>26</volume>, <fpage>16</fpage>&#x2013;<lpage>19</lpage>. Available at: <uri xlink:href="https://fertilizar.org.ar/wp-content/uploads/2005/06/Salvagiotti-Trigo-Cloruros-IA-2005.pdf">https://fertilizar.org.ar/wp-content/uploads/2005/06/Salvagiotti-Trigo-Cloruros-IA-2005.pdf</uri> (Accessed <access-date>December 25, 2023</access-date>).</citation>
</ref>
<ref id="B28">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Singh</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Arora</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>CNN-based detection of healthy and unhealthy wheat crops</article-title>,&#x201d; in <conf-name>2020 International Conference on Smart Electronics and Communication (ICOSEC)</conf-name>, <conf-loc>Trichy, India</conf-loc>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ICOSEC49089.2020.9215340</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Singh</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Ganapathysubramanian</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Singh</surname> <given-names>A. K.</given-names>
</name>
<name>
<surname>Sarkar</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Machine learning for high-throughput stress phenotyping in plants</article-title>. <source>Trends Plant Sci.</source> <volume>21</volume>, <fpage>110</fpage>&#x2013;<lpage>124</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.tplants.2015.10.015</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Su</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Min</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A CNN-LSVM model for imbalanced images identification of wheat leaf</article-title>. <source>Neural Network World</source> <volume>29</volume> (<issue>5</issue>), <fpage>345</fpage>&#x2013;<lpage>361</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.14311/NNW.2019.29.021</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Shu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Estimation of peanut southern blight severity in hyperspectral data using the synthetic minority oversampling technique and fractional-order differentiation</article-title>. <source>Agriculture</source>. <volume>14</volume> (<issue>3</issue>), <fpage>476</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture14030476</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Terentev</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Dolzhenko</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Fedotov</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Eremenko</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Current state of hyperspectral remote sensing for early plant disease detection: A review</article-title>. <source>Sensores</source> <volume>22</volume> (<issue>3</issue>), <elocation-id>757</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s22030757</pub-id>.</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>U&#x11f;uz</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Uysal</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Classification of olive leaf diseases using deep convolutional neural networks</article-title>. <source>Neural Computing Appl.</source> <volume>33</volume>, <fpage>4133</fpage>&#x2013;<lpage>4149</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00521-020-05235-5</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wan</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Hyperspectral sensing of plant diseases: principle and methods</article-title>. <source>Agronomy</source> <volume>12</volume>, <elocation-id>1451</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy12061451</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Weiss</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Fr&#xe9;d&#xe9;ric</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Grgory</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Remote sensing for agricultural applications: A meta-review</article-title>. <source>Remote Sens. Environ.</source> <volume>236</volume>, <elocation-id>111402</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.rse.2019.111402</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Yadav</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Kumar Rana</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Nagpal</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Plant leaf disease detection and classification using particle swarm optimization</article-title>. <conf-name>Lect. Notes Comput. Sci. (including Subser. Lect. Notes Artif. Intell. Lect. Notes Bioinformatics) 11407 LNCS</conf-name>, <fpage>294</fpage>&#x2013;<lpage>306</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-3-030-19945-6_21</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Remote sensing and precision agriculture technologies for crop disease detection and management with a practical application example</article-title>. <source>Engineering</source> <volume>6.5</volume>, <fpage>528</fpage>&#x2013;<lpage>532</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.eng.2019.10.015</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Pu</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Gonz&#xe1;lez-Moreno</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Yuan</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>K.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Monitoring plant diseases and pests through remote sensing sensor technology: A review</article-title>. <source>Comput. Electron. Agric.</source> <volume>165</volume>, <fpage>104943</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2019.104943</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>
