<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2023.1200901</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Research on machine vision and deep learning based recognition of cotton seedling aphid infestation level</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Xu</surname>
<given-names>Xin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/840781"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Shi</surname>
<given-names>Jing</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Yongqin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>He</surname>
<given-names>Qiang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Liangliang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1489998"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sun</surname>
<given-names>Tong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ding</surname>
<given-names>Ruifeng</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lu</surname>
<given-names>Yanhui</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2267653"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Xue</surname>
<given-names>Chaoqun</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Qiao</surname>
<given-names>Hongbo</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2272021"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>College of Information and Management Science, Henan Agricultural University</institution>, <addr-line>Zhengzhou</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Institute of Plant Protection, Xinjiang Academy of Agricultural Sciences</institution>, <addr-line>Urumqi</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Institute of Plant Protection, Chinese Academy of Agricultural Sciences</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Zhengzhou Tobacco Research Institute of China National Tobacco Corporation (CNTC)</institution>, <addr-line>Zhengzhou</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Xiaoyulong Chen, Guizhou University, China</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Lin Jiao, Anhui University, China; Guanqiu Qi, Buffalo State College, United States</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Chaoqun Xue, <email xlink:href="mailto:xuecq@ztri.com.cn">xuecq@ztri.com.cn</email>; Hongbo Qiao, <email xlink:href="mailto:qiaohb@henu.edu.cn">qiaohb@henu.edu.cn</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>14</day>
<month>08</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>14</volume>
<elocation-id>1200901</elocation-id>
<history>
<date date-type="received">
<day>05</day>
<month>04</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>10</day>
<month>07</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Xu, Shi, Chen, He, Liu, Sun, Ding, Lu, Xue and Qiao</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Xu, Shi, Chen, He, Liu, Sun, Ding, Lu, Xue and Qiao</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p><italic>Aphis gossypii</italic> Glover is a major insect pest in cotton production, which can cause yield reduction in severe cases. In this paper, we proposed the <italic>A. gossypii</italic> infestation monitoring method, which identifies the infestation level of <italic>A. gossypii</italic> at the cotton seedling stage, and can improve the efficiency of early warning and forecasting of <italic>A. gossypii</italic>, and achieve precise prevention and cure according to the predicted infestation level. We used smartphones to collect <italic>A</italic>. <italic>gossypii</italic> infestation images and compiled an infestation image data set. And then constructed, trained, and tested three different <italic>A. gossypii</italic> infestation recognition models based on Faster Region-based Convolutional Neural Network (R-CNN), You Only Look Once (YOLO)v5 and single-shot detector (SSD) models. The results showed that the YOLOv5 model had the highest mean average precision (mAP) value (95.7%) and frames per second (FPS) value (61.73) for the same conditions. In studying the influence of different image resolutions on the performance of the YOLOv5 model, we found that YOLOv5s performed better than YOLOv5x in terms of overall performance, with the best performance at an image resolution of 640&#xd7;640 (mAP of 96.8%, FPS of 71.43). And the comparison with the latest YOLOv8s showed that the YOLOv5s performed better than the YOLOv8s. Finally, the trained model was deployed to the Android mobile, and the results showed that mobile-side detection was the best when the image resolution was 256&#xd7;256, with an accuracy of 81.0% and FPS of 6.98. The real-time recognition system established in this study can provide technical support for infestation forecasting and precise prevention of <italic>A. gossypii</italic>.</p>
</abstract>
<kwd-group>
<kwd>
<italic>Aphis gossypii</italic> Glover</kwd>
<kwd>Faster R-CNN</kwd>
<kwd>YOLOv5</kwd>
<kwd>SSD</kwd>
<kwd>deep learning</kwd>
</kwd-group>
<counts>
<fig-count count="10"/>
<table-count count="7"/>
<equation-count count="6"/>
<ref-count count="44"/>
<page-count count="14"/>
<word-count count="7399"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Technical Advances in Plant Science</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction </title>
<p>Cotton is an important cash crop in China, with Xinjiang serving as the main production location. In 2022, the cotton planting area in Xinjiang was 2.4969 million hectares, accounting for 83.22% of China; the yield per unit area was 2158.9 kilograms per hectare; the total production was 5.391 million tons, accounting for 90.20% of China&#x2019;s total production (<xref ref-type="bibr" rid="B1">National Bureau of Statistics, 2022</xref>). <italic>Aphis gossypii</italic> Glover is one of the most serious insect pests in cotton cultivation (<xref ref-type="bibr" rid="B2">Luo et&#xa0;al., 2017</xref>), and can cause leaf curling and wilting after sucking nutrients from cotton leaves, which in turn affects the growth and development of cotton plants, leading to a decrease in yield and quality. The occurrence of <italic>A. gossypii</italic> leads to a cotton yield reduction of approximately 15% to 30% and can cause total crop failure in severe cases (<xref ref-type="bibr" rid="B3">Herron et&#xa0;al., 2000</xref>; <xref ref-type="bibr" rid="B4">Fan et&#xa0;al., 2013</xref>). Therefore, it is important to study the <italic>A. gossypii</italic> occurrence patterns and infestation features and to explore a fast and efficient method for monitoring and detecting this pest. In this way, the efficiency of early warning and forecasting can be improved cotton yield reduction can be mitigated.</p>
<p>The current method of <italic>A. gossypii</italic> infestation detection is still mainly based on manual investigation, mainly through manual field inspection, measurement, statistics and expert identification (<xref ref-type="bibr" rid="B5">Liu et&#xa0;al., 2019</xref>), which is not only time-consuming and laborious but also subjective, with a high rate of misjudgment and poor real-time response. Thus, there is an urgent need for a time- and labor-saving technology for <italic>A. gossypii</italic> detection to improve the efficiency of infestation level recognition.</p>
<p>Image processing technology, in which image signals are converted into the corresponding digital signals and are processed by using computers, provides new technological solutions for the detection of crop pests and diseases(<xref ref-type="bibr" rid="B6">Arnal, 2013</xref>). Crop pest and disease image recognition technology has the characteristics of rapidity, accuracy and real-time employability. Research on this technology has mainly focused on three aspects: crop pest and disease image segmentation, feature extraction and classification recognition. <xref ref-type="bibr" rid="B7">Zhang et&#xa0;al. (2018)</xref> proposed an automatic image segmentation model for diseased leaves with active gradients and local information in which image details such as cotton leaves with a background of uneven illumination, shadows and weeds could be segmented to better achieve the ideal extraction of leaf edges. <xref ref-type="bibr" rid="B8">Lu and Ye (2020)</xref> proposed a semiautomatic locust species and age information detection model based on locust image segmentation, feature variable extraction and support vector machine classification, with 96.16% detection accuracy. <xref ref-type="bibr" rid="B9">Nabilah et&#xa0;al. (2020)</xref> used six traditional feature methods and six deep learning feature methods to extract significant pest features from chili leaf images, and the extracted features were fed into a support vector machine (SVM), random forest, and an artificial neural network for the recognition task. The results showed that the deep learning feature-based methods outperformed the traditional feature-based methods, and the best accuracy of 92.10% was obtained using the SVM classifier. <xref ref-type="bibr" rid="B10">Khan et&#xa0;al. (2020)</xref> designed a cucumber leaf disease detection and classification system and achieved 98.08% classification accuracy for five cucumber leaf diseases using a multi-class support vector machine (M-SVM) approach. <xref ref-type="bibr" rid="B11">Wang et&#xa0;al. (2019)</xref> proposed a pest detection and identification diagnosis system based on transfer learning, which was able to train and test 10 types of pests with an accuracy of 93.84%. <xref ref-type="bibr" rid="B12">Wang et&#xa0;al. (2020)</xref> proposed a convolutional neural network recognition model based on the Inception module and dilated convolution. By setting different parameters, six improved models were obtained, which were trained to recognize 26 diseases in 14 different crops. The final experiment could achieve an average recognition accuracy of 99.37%. <xref ref-type="bibr" rid="B13">Hu et&#xa0;al. (2020)</xref> proposed a convolutional neural network based on data augmentation combined with migration learning to recognize corn leaf diseases, and achieved an average recognition accuracy of 97.6% for Corn Gray leaf spot, Corn Common rust, Corn Northern Leaf Blight, and healthy leaves, with an accuracy of more than 95% for each category. To accurately identify small agricultural pests, <xref ref-type="bibr" rid="B14">Dong et&#xa0;al. (2021)</xref> proposed a CRA-Net which included a channel recalibration feature pyramid network and adaptive anchor module. The results showed that the method achieved an average precision of 67.9%, which was superior to other state-of-the-art methods. <xref ref-type="bibr" rid="B15">Gu et&#xa0;al. (2021)</xref> proposed a method for diagnosing plant diseases and identifying pests with deep features based on transfer learning, and the proposed model achieved 96.02% and 99.61% accuracy, respectively. To solve the problem of small pest identification and detection, <xref ref-type="bibr" rid="B16">Wang R. J. et&#xa0;al. (2021)</xref> proposed a sampling-balanced region proposal generation network, and designed a novel adaptive regionally of interest selection method to learn features at different levels of the feature pyramid. Several experiments on the proposed AgriPest21 data set showed that the method could achieve an average recall rate of 89.0% and mAP of 78.7%, superior to other state-of-the-art methods. <xref ref-type="bibr" rid="B17">Wei et&#xa0;al. (2022)</xref> proposed a multiscale feature fusion-based crop pest and disease classification method and achieved good classification results on 12 pest data sets, with a correct classification rate of 98.2%. <xref ref-type="bibr" rid="B18">Jiao et&#xa0;al. (2022)</xref> developed a CNN-based method for the detection of multi-class pests in complex scenarios, and conducted a large number of comparative experiments on the AgriPest21 data set. The results showed that the method could achieve 77.0% accuracy, which was significantly better than other most advanced methods. <xref ref-type="bibr" rid="B19">Mallick et&#xa0;al. (2023)</xref> proposed an innovative deep learning-based approach for automatic multi-class mung bean pests and diseases detection and classification, and for each class, the proposed model had an overall pests and diseases detection accuracy of 93.65%. Although the above studies achieved good results, image segmentation and feature extraction in complex and variable background environments were still difficult for their models, the number of experimental samples was limited, and the crop pest and disease recognition models they established were unstable, which indicates that there is still a large gap between research and practical application.</p>
<p>With the development of artificial intelligence technology, researchers have started to detect and study crop pests and diseases with the help of deep learning. Deep learning can automatically, efficiently and accurately extract object features from a large number of crop pest and disease images, thus making up for the shortcomings of traditional manual recognition and enabling crop pest and disease image recognition. Deep learning-based image object detection techniques have enabled great advances, and at present, two main detection methods have been developed. One category is object detection based on deep convolutional networks with region proposal, and the representative models are Fast Region-based Convolutional Neural Network (R-CNN) (<xref ref-type="bibr" rid="B20">Ross, 2015</xref>), Faster R-CNN (<xref ref-type="bibr" rid="B21">Ren et&#xa0;al., 2015</xref>), and Mask R-CNN (<xref ref-type="bibr" rid="B22">He et&#xa0;al., 2017</xref>). Among these models, Faster R-CNN is unique in abandoning the traditional sliding window and selective search methods and instead generates detection boxes directly using a region proposal network (RPN), which greatly improves the detection box generation speed. Another category is object detection based on deep convolutional networks with regression computation, and the representative models include You Only Look Once (YOLO) (<xref ref-type="bibr" rid="B23">Redmon et&#xa0;al., 2016</xref>), and single-shot detector (SSD) (<xref ref-type="bibr" rid="B24">Liu et&#xa0;al., 2016</xref>). Of the different models in the YOLO series, YOLOv5 uses the PyTorch framework and is user-friendly, not only making it easy to configure the environment but also enabling very fast training of the model. Moreover, it has very good performance in detecting smaller objects. SSD integrates the YOLO concept of fast detection, offers the advantages of RPN that are found in Faster R-CNN, and improves the handling of multi-size objects, which is to say it does not rely solely on the top-level feature map for prediction.</p>
<p>Currently, the research on agricultural object detection for both fruit detection and pest and disease recognition is increasingly turning to deep learning. <xref ref-type="bibr" rid="B25">Shen et&#xa0;al. (2018)</xref> used Faster R-CNN to extract regions in images that might contain insects and to classify the insects in these regions, and its mean average precision reached 88%. <xref ref-type="bibr" rid="B16">Liu and Wang (2020)</xref> proposed a method for early recognition of tomato leaf spot based on the MobileNetv2-YOLOv3 model, and the results showed that in all test sets, the F1 scores and average precision (AP) values were 93.24% and 91.32%, respectively, and the average IOU value was 86.98%. <xref ref-type="bibr" rid="B27">Chu et&#xa0;al. (2021)</xref> developed a novel suppressed Mask R-CNN for apple detection, and the network they developed had an F1 value of 0.905 and a detection time of 0.25 seconds per frame on a standard desktop computer, which were better than the values for state-of-the-art models. <xref ref-type="bibr" rid="B16">Wang X. W. et&#xa0;al. (2021)</xref> proposed an improved object detection algorithm based on YOLOv3 to address the problem of the complex background in early stage images of tomato pests and diseases in natural environments; this model enhanced the recognition of pests and diseases, with an average recognition accuracy of 91.81%. <xref ref-type="bibr" rid="B29">Li. et&#xa0;al (2021)</xref> proposed a detection method named Lemon-YOLO (L-YOLO) to improve the accuracy and real-time detection of lemons in natural environments. The experimental results show that the AP value and FPS value of the proposed L-YOLO on the lemon test set are 96.28% and 106, respectively 5.68% and 28 higher than that of YOLOv3. <xref ref-type="bibr" rid="B30">Zhang et&#xa0;al. (2021)</xref> first developed a synthetic soybean leaf disease image data set, and then designed a multi-feature fusion Faster R-CNN (MF<sup>3</sup> R-CNN) to detect soybean leaf disease in complex scenes, obtaining the best average precision of 83.34% in the actual test data set. <xref ref-type="bibr" rid="B31">Sun et&#xa0;al. (2021)</xref> proposed a mobile-based detection model, Mobile End AppleNet (MEAN)-SSD, for the real-time detection of apple leaf diseases on mobile devices that can automatically extract apple leaf spot features and detect five common apple leaf spots. <xref ref-type="bibr" rid="B32">Qi et&#xa0;al. (2022)</xref> proposed a squeeze-and-excitation (SE)-YOLOv5-based object detection model to recognize tomato virus disease. The trained network model was evaluated on a test set, and its mean average precision reached 94.10%. <xref ref-type="bibr" rid="B33">Zhao et&#xa0;al. (2022)</xref> proposed a new Faster R-CNN architecture and constructed a strawberry leaf, flower and fruit data set. The results showed that the model was able to effectively detect healthy strawberries and seven strawberry diseases under natural conditions with a mAP of 92.18% and an average detection time of only 229 ms. <xref ref-type="bibr" rid="B34">Liu et&#xa0;al. (2022)</xref> proposed a tomato pest identification algorithm based on an improved YOLOv4 fusion triple attention mechanism, and the proposed algorithm was tested on the established data set with an average recognition accuracy of 95.2%. <xref ref-type="bibr" rid="B35">Ahmad et&#xa0;al. (2022)</xref> implemented an automated system in the form of a smartphone IP camera for pest detection from digital images/video based on eight YOLO object detection architectures, and the results showed that the YOLOv5x architecture achieved the highest mAP (98.3%) at real-time inference speed and could correctly recognize 23 pests in 40.5 ms. The models presented in these studies can achieve fruit detection as well as accurate classification and recognition of pests and diseases; however, most of the existing studies on models for the recognition of crop pests and diseases focus on recognition of the pests themselves, but <italic>A. gossypii</italic>, due to its small size, large quantities and dense accumulation on the undersides of leaves, is a pest that is difficult to identify directly.</p>
<p>Therefore, utilizing the different infestation symptoms cotton leaves exhibit when infested by A. gossypii and determining the severity of A. gossypii occurrence through the features of leaf infestation is an alternative approach. In this study, the level of <italic>A. gossypii</italic> infestation was determined by creating a model that can assess the symptoms in cotton leaves caused by <italic>A. gossypii</italic> infestation. Images of <italic>A. gossypii</italic> infestation in the field environment were quickly acquired using smartphones, and then the data were annotated to construct four types of data sets: level 0, level 1, level 2 and level 3. On this basis, three different <italic>A. gossypii</italic> infestation recognition models based on Faster R-CNN, YOLOv5 and SSD were constructed, and the test results of the three models were compared and analyzed to select the optimal <italic>A. gossypii</italic> infestation recognition model to deploy it to android mobile side, which provides a fast, convenient and low-cost method for <italic>A. gossypii</italic> infestation monitoring. The infestation class recognition model of A. gossypii established in this study can provide technical support for prediction forecast and precise prevention and cure of A. gossypii, which will enhance the utilization rate of pesticides in the field, reduce the cost of agricultural production and enhance the yield and quality of cotton. Afterwards, it will continue to be deployed to the spraying machinery, striving to achieve simultaneous identification and precise prevention and cure as soon as possible.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Experimental design</title>
<p>The experiment was conducted in 2018, 2019, and 2022 at the Korla Experimental Station of the Institute of Plant Protection, Chinese Academy of Agricultural Sciences (41&#xb0;44&#x2032;59&#x2033;N, 85&#xb0;48&#x2032;30&#x2033;E). The experimental station is located in Heshilike Township, Korla City, Bayingol Mongolian Autonomous Prefecture, Xinjiang, China, which is located in the central part of Xinjiang and on the northeastern edge of the Tarim Basin, near the Tianshan Branch to the north and the Taklimakan Desert to the south. Cotton is the main crop grown in this area, and is cultivated with large-scale and simple cropping structures. <italic>A. gossypii</italic> is the main cotton pest in the region, and its peak season occurs from late June to early July (<xref ref-type="bibr" rid="B36">Lu et&#xa0;al., 2022</xref>). Experimental plots with severe occurrence of <italic>A. gossypii</italic> were selected in the field for data acquisition. No pesticides were applied to suppress the population growth of this pest during the experiment. The cotton crops selected for the test were the experimental cultivars &#x2018;Zhongmiansuo49&#x2019; and &#x2018;Xinluzhong66&#x2019; from the Cotton Insect Group of the Institute of Plant Protection, Chinese Academy of Agricultural Sciences. The cotton was sown in mid to late April with a film mulching cultivation mode, and with spot sowing on the film. Standard water and fertilizer management was carried out through drip irrigation under the film.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Data acquisition</title>
<p>Cotton image data were collected at the Korla Experimental Station of the Institute of Plant Protection, Chinese Academy of Agricultural Sciences in 2018, 2019 and 2022 (<xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>). The collection dates for 2018 and 2019 were from late June to mid-July, and the collection dates for 2022 were from early June to early July. The collections were made on sunny days and with low light intensity to avoid image overexposure. To allow the model to learn more features of <italic>A. gossypii</italic> infestation during training, multiple smartphones were used to acquire the cotton images. Image data acquisition was conducted in 2018 and 2019 with the HUAWEI Nova, OnePlus7pro, iPhone 8 Plus, and Mi Note 3 smartphones and in 2022 with the iPhone 8 Plus, iPhone 12, iPhone 13, iPhone XR, and Redmi 5 Plus smartphones. The image acquisition method was overhead vertical shooting. The researchers stood next to the cotton plants with mobile equipment in hand and vertically shot images of the cotton seedlings from a vantage of 1.2-1.5 meters. The data acquired in 2018 and 2019 were used for training, validation, and testing of the <italic>A. gossypii</italic> infestation recognition model, and the cotton images collected all featured the &#x2018;Zhongmiansuo49&#x2019; cultivar. In 2022, in addition to photographing plants of the &#x2018;Zhongmiansuo49&#x2019; cultivar, some images of plants from the &#x2018;Xinluzhong66&#x2019; cultivar were also collected for testing the <italic>A. gossypii</italic> infestation level recognition model detection capabilities on images of other cultivars.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Summary of data acquisition characteristics.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Cotton cultivars</th>
<th valign="middle" align="center">Device</th>
<th valign="middle" align="center">Rear camera pixels (million)</th>
<th valign="middle" align="center">Image resolution</th>
<th valign="middle" align="center">Aperture Value</th>
<th valign="middle" align="center">Focal length (mm)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="4" align="left">&#x2018;Zhongmiansuo49&#x2019;</td>
<td valign="middle" align="center">HUAWEI Nova</td>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">4032 &#xd7; 3016<break/>3016 &#xd7; 3016</td>
<td valign="middle" align="center">f/2.2</td>
<td valign="middle" align="center">4</td>
</tr>
<tr>
<td valign="middle" align="center">OnePlus7pro</td>
<td valign="middle" align="center">48</td>
<td valign="middle" align="center">4000 &#xd7; 3000<break/>4608 &#xd7; 3456</td>
<td valign="middle" align="center">f/1.6<break/>f/2.2</td>
<td valign="middle" align="center">5<break/>2</td>
</tr>
<tr>
<td valign="middle" align="center">iPhone 8 Plus</td>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">3024 &#xd7; 4032</td>
<td valign="middle" align="center">f/1.8</td>
<td valign="middle" align="center">4</td>
</tr>
<tr>
<td valign="middle" align="center">Mi Note 3</td>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">3016 &#xd7; 4032</td>
<td valign="middle" align="center">f/1.8</td>
<td valign="middle" align="center">4</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="left">&#x2018;Zhongmiansuo49&#x2019;</td>
<td valign="middle" align="center">iPhone 12</td>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">3024 &#xd7; 4032</td>
<td valign="middle" align="center">f/1.6</td>
<td valign="middle" align="center">4</td>
</tr>
<tr>
<td valign="middle" align="center">iPhone 13</td>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">3024 &#xd7; 4032</td>
<td valign="middle" align="center">f/1.6</td>
<td valign="middle" align="center">5</td>
</tr>
<tr>
<td valign="middle" align="center">iPhone XR</td>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">3024 &#xd7; 4032</td>
<td valign="middle" align="center">f/1.8</td>
<td valign="middle" align="center">4</td>
</tr>
<tr>
<td valign="middle" align="center">iPhone 8 Plus</td>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">3024 &#xd7; 4032</td>
<td valign="middle" align="center">f/1.8</td>
<td valign="middle" align="center">4</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="left">&#x2018;Xinluzhong66&#x2019;</td>
<td valign="middle" align="center">iPhone XR</td>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">3024 &#xd7; 4032</td>
<td valign="middle" align="center">f/1.8</td>
<td valign="middle" align="center">4</td>
</tr>
<tr>
<td valign="middle" align="center">Redmi 5 Plus</td>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">3000 &#xd7; 4000</td>
<td valign="middle" align="center">f/2.2</td>
<td valign="middle" align="center">4</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Image processing</title>
<p>Directly inputting original images into a model for training can interfere with training by causing problems such as taking up a large amount of memory on the device, slowing down the training speed of the model, and causing memory overflow. Therefore, the original images must be preprocessed. When inputting images into the model, the image formats are adjusted to a certain size. To prevent soil and crop backgrounds from interfering with object detection, the original image is cropped to remove redundant information such as soil from the image. To crop the images, we first started from the center of each original cotton image, cropping the image to 3000&#xd7;3000 and uniformly adjusting the image resolution to 1024&#xd7;1024. Then the data were annotated. The data preprocessing process of this paper is shown in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Data preprocessing flow chart.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1200901-g001.tif"/>
</fig>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Data set construction</title>
<p>Because the training data of the object detection model need to be manually labeled, LabelImg (<xref ref-type="bibr" rid="B37">Tzutalin, 2015</xref>) was chosen as the image labeling tool in this experiment. Examples of acquired images for this experiment are shown in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>. When labeling the image data, the <italic>A. gossypii</italic> infestation grading standard referred to the national grading standard (<xref ref-type="bibr" rid="B38">GB/T 15799-2011, 2011</xref>) (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>). Since the level of infestation suffered by cotton leaves at the seedling stage rarely reaches level 4, individual level 4s were classified as level 3s when conducting data annotation to avoid serious imbalance in the data set. The annotation entailed using rectangular boxes to annotate the cotton images, and according to the grading standard in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>, the leaves in the central region of the cotton plant were annotated as level 0, level 1, level 2 and level 3 according to their different morphological characteristics, and the generated annotation files were all saved as XML files in PASCAL VOC format. The annotated images included the images collected by the various collection devices in 2018 and 2019, and a total of 3051 &#x2018;Zhongmiansuo49&#x2019; cotton images were annotated, including 295 from the HUAWEI Nova, 1237 from the OnePlus7pro, 1270 from the iPhone 8 Plus and 249 from the Mi Note 3.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Example of acquired images: <bold>(A)</bold> level 0, <bold>(B)</bold> level 1, <bold>(C)</bold> level 2, <bold>(D)</bold> level 3, <bold>(E)</bold> level 4.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1200901-g002.tif"/>
</fig>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>
<italic>Aphis gossypii</italic> Glover infestation grading standards.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Infestation level</th>
<th valign="middle" align="center">Infestation description</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">0</td>
<td valign="middle" align="left">No aphids, spreading leaf blades.</td>
</tr>
<tr>
<td valign="middle" align="left">1</td>
<td valign="middle" align="left">There are aphids, but the leaves are not damaged.</td>
</tr>
<tr>
<td valign="middle" align="left">2</td>
<td valign="middle" align="left">There are aphids, and the most severely damaged leaves are crinkled or slightly rolled, nearly semicircular.</td>
</tr>
<tr>
<td valign="middle" align="left">3</td>
<td valign="middle" align="left">There are aphids, and the most heavily damaged leaves are curled up in a semicircle or more than semicircle, and are arc-shaped.</td>
</tr>
<tr>
<td valign="middle" align="left">4</td>
<td valign="middle" align="left">There are aphids, and the most heavily damaged leaves are completely curled and appear spherical.</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To balance the number of labels, we also performed a left-right flip operation on 188 annotated images from the iPhone 8 Plus and 151 annotated images from the Mi Note 3, resulting in 3390 annotated images. After data enhancement by flipping up and down, adding Gaussian noise and changing image brightness, the number of images was 16,950, and 16,950 annotated images were used as the data set for this experiment. The data set was first divided into a training validation set and a test set at a ratio of 9:1, and then the training validation set was divided into a training set and a validation set at a ratio of 9:1. The final training set was 13,729, the validation set was 1,526, the test set was 1,695, and the number of labels for each infestation level was 10,554 for level 0, 10,854 for level 1, 11,066 for level 2, and 11,025 for level 3.</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Model construction</title>
<p>In this study, three classical object detection models, Faster R-CNN, YOLOv5 and SSD, were chosen to conduct the study of <italic>A. gossypii</italic> infestation level recognition.</p>
<p>The structure of the Faster R-CNN (<xref ref-type="bibr" rid="B21">Ren et&#xa0;al., 2015</xref>) model is shown in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>. The model first used the feature extraction network to extract the feature map of each input cotton image, which was shared by the subsequent RPN with the Fast R-CNN network. RPN performs the binary classification task through a softmax classifier, determines whether the anchor belongs to the foreground or background, and obtains the candidate box position through anchor regression. The Fast R-CNN synthesizes the information from the feature maps and candidate boxes, determines the category to which the foreground belongs, and generates the exact location of the final detection box.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Faster R-CNN model structure.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1200901-g003.tif"/>
</fig>
<p>The structure of the YOLOv5 (<xref ref-type="bibr" rid="B39">Jocher, 2020</xref>) model is shown in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>. The network structure of the model is divided into four parts according to the processing stage: input, backbone, neck and prediction. The input part completes basic processing tasks such as data enhancement, adaptive image scaling and anchor box calculation. The backbone network mainly uses a common spatial pattern (CSP) structure to extract the main information from the input samples for use in subsequent stages. The neck part uses feature pyramid network (FPN) and path aggregation network (PAN) structures and uses the information extracted from the backbone to enhance feature fusion. The prediction component makes predictions and calculates the value of each loss. YOLOv5 has four model styles, s, m, l and x. They have the same network structure, and only the depth and width of the models are different.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>YOLOv5 model structure.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1200901-g004.tif"/>
</fig>
<p>The structure of the SSD (<xref ref-type="bibr" rid="B24">Liu et&#xa0;al., 2016</xref>) model is shown in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5</bold>
</xref>. The model inputted cotton images into the backbone network, which could obtain feature maps of different sizes from the pretrained base network, and feature maps of six convolutional layers of different sizes, Conv4_3, Conv7, Conv8_2, Conv9_2, Conv10_2, and Conv11_2, were the output. Six default candidate boxes with different aspect ratios were constructed from each pixel point of these feature maps and then detected and classified separately to generate multiple initial eligible default candidate boxes. Finally, the nonmaximum suppression method was used to screen out the eligible candidate boxes to generate the final set of detected boxes, that is, the <italic>A. gossypii</italic> infestation level.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>SSD model structure.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1200901-g005.tif"/>
</fig>
</sec>
<sec id="s2_6">
<label>2.6</label>
<title>Experimental environment</title>
<p>On the basis of the construction model, the hardware environment was as follows: graphics processor, NVIDIA A 100-PCIE-40 GB; CUDA Cores, 6912; total memory, 4060 MB; and memory interface, 5120-bit. The software environment included PyCharm (2020.3, JetBrains, Prague, Czech), Linux (Ubuntu 20.04.4 LTS, Linus Benedict Torvalds, Helsinki, Suomi), Python (Python 3.8.12, Python Software Foundation, State of Delaware, USA), PyTorch (PyTorch 1.10.2 or PyTorch 1.10.0+CUDA 11.3, Facebook AI Research, California, USA), and Android Studio (2021.2.1, Google, California, USA).</p>
</sec>
<sec id="s2_7">
<label>2.7</label>
<title>Evaluation metrics</title>
<p>To quantitatively analyze the performance of a detection algorithm, researchers have formulated many evaluation metrics, each reflecting different aspects of the performance to some extent. The object detection performance evaluation metrics used in this experiment were Precision (<xref ref-type="bibr" rid="B40">Yang, 1999</xref>). Recall (<xref ref-type="bibr" rid="B40">Yang, 1999</xref>), Accuracy, Average Precision (AP), mean Average Precision (mAP), and Frames Per Second (FPS).</p>
<p>For each category, a curve can be drawn according to the precision and recall rate, and the AP value is the area under the curve. The mAP value is the average value of AP for each category. The classification and localization ability of the object detection model is its main performance representation, and the mAP value is its most intuitive expression. The larger the mAP value is, the higher the precision of the model; the detection speed represents the computational performance of the object detection model and is represented by the FPS value, and the larger the FPS value is, the better the detection speed of the algorithm model (<xref ref-type="bibr" rid="B41">Xu et&#xa0;al., 2021</xref>).</p>
<p>The formula for calculating each evaluation metric is shown below:</p>
<disp-formula>
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>P</mml:mi>
</mml:mstyle>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mstyle>
<mml:mo>+</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mstyle>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#xa0;</mml:mo>
<mml:mtext>&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>R</mml:mi>
</mml:mstyle>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mstyle>
<mml:mo>+</mml:mo>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mstyle>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#xa0;</mml:mo>
<mml:mtext>&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>A</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>c</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>c</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>u</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>r</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>a</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>c</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>y</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mo>=</mml:mo>
</mml:mstyle>
<mml:mfrac>
<mml:mrow>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>T</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>P</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mo>+</mml:mo>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>T</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>N</mml:mi>
</mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>T</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>P</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mo>+</mml:mo>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>F</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>P</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mo>+</mml:mo>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>F</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>N</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mo>+</mml:mo>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>T</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>N</mml:mi>
</mml:mstyle>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mtext>&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(3)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>A</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>P</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mo>=</mml:mo>
</mml:mstyle>
<mml:munderover>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mo>&#x222b;</mml:mo>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mn>0</mml:mn>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mn>1</mml:mn>
</mml:mstyle>
</mml:munderover>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>P</mml:mi>
</mml:mstyle>
<mml:mrow>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mo stretchy="false">(</mml:mo>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>R</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mo stretchy="false">)</mml:mo>
</mml:mstyle>
</mml:mrow>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>d</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>R</mml:mi>
</mml:mstyle>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mtext>&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(4)</label>
<mml:math display="block" id="M5">
<mml:mrow>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>m</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>A</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>P</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mo>=</mml:mo>
</mml:mstyle>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mn>1</mml:mn>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>n</mml:mi>
</mml:mstyle>
</mml:msubsup>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>A</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>P</mml:mi>
</mml:mstyle>
</mml:mrow>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>n</mml:mi>
</mml:mstyle>
</mml:mfrac>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mtext>&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(5)</label>
<mml:math display="block" id="M6">
<mml:mrow>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>F</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>P</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>S</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mo>=</mml:mo>
</mml:mstyle>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>f</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>r</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>a</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>m</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>e</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>N</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>u</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>m</mml:mi>
</mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>e</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>l</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>a</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>p</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>s</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>e</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>d</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>T</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>i</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>m</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold-italic" mathsize="normal">
<mml:mi>e</mml:mi>
</mml:mstyle>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#xa0;</mml:mo>
<mml:mtext>&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where P is precision, R is recall, TP is positive samples correctly predicted as positive samples, FP is negative samples incorrectly predicted as positive samples, FN is positive samples incorrectly predicted as negative samples, TN is negative samples correctly predicted as negative samples, n is the number of categories, frameNum is number of images, and elapsedTime is detection time.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Selecting the best model</title>
<p>In this experiment, three models, Faster R-CNN, YOLOv5 and SSD, were selected, and the same data set was used with each model. Since the width and depth of Yolov5x model were the largest among the four model styles, x model was chosen here to participate in the comparison test. Before training, the image resolution was uniformly set to 512&#xd7;512, the learning rate was set to 0.0005, the iteration rounds were set to 300 rounds, and the batch size of each iteration was set to 16. The training model was saved, and then the model was tested and evaluated with the test set. The AP, mAP, and FPS obtained for the three models&#x2019; tests are shown in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Precision evaluation of each model for detection of <italic>Aphis gossypii</italic> Glover infestation level.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="left">Model</th>
<th valign="middle" colspan="4" align="center">AP (%)</th>
<th valign="middle" rowspan="2" align="center">mAP (%)</th>
<th valign="middle" rowspan="2" align="center">FPS</th>
</tr>
<tr>
<th valign="middle" align="center">0</th>
<th valign="middle" align="center">1</th>
<th valign="middle" align="center">2</th>
<th valign="middle" align="center">3</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Faster R-CNN</td>
<td valign="middle" align="center">88.1</td>
<td valign="middle" align="center">86.2</td>
<td valign="middle" align="center">86.8</td>
<td valign="middle" align="center">88.6</td>
<td valign="middle" align="center">87.4</td>
<td valign="top" align="center">10.44</td>
</tr>
<tr>
<td valign="middle" align="left">YOLOv5x</td>
<td valign="middle" align="center">94.6</td>
<td valign="middle" align="center">94.4</td>
<td valign="middle" align="center">96.7</td>
<td valign="middle" align="center">97.3</td>
<td valign="middle" align="center">95.7</td>
<td valign="top" align="center">61.73</td>
</tr>
<tr>
<td valign="middle" align="left">SSD</td>
<td valign="middle" align="center">63.2</td>
<td valign="middle" align="center">59.2</td>
<td valign="middle" align="center">50.8</td>
<td valign="middle" align="center">73</td>
<td valign="middle" align="center">61.5</td>
<td valign="top" align="center">7.64</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As shown in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>, it can be seen that the mAP value after testing was 87.4% for the Faster R-CNN model, 95.7% for the YOLOv5 model, and 61.5% for the SSD model. Through the test results, it was revealed that the mAP value of the YOLOv5 model for recognizing <italic>A. gossypii</italic> infestation levels was higher than those of the Faster R-CNN and SSD models, which indicates that this model has the highest precision. The YOLOv5 model took the least amount of time to train, at 15.62 hours, under the same conditions that were used with all three models, while the Faster R-CNN and SSD models took 34.32 hours and 33.24 hours to train, respectively. It can also be seen from <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref> that the YOLOv5 model had the fastest detection speed, with an FPS value of 61.73, which was much faster than the other two models, indicating that this model has the best detection speed.</p>
<p>In summary, the test results showed that the YOLOv5 model requires the shortest training time and has the highest mAP value and the fastest detection speed, with a mAP value of 95.7% and an FPS value of 61.73. Therefore, this model has the best performance.</p>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Influence of different image resolutions on the performance of the YOLOv5 model</title>
<p>After the comparison of the three models, it was found that the best performance at an image resolution of 512&#xd7;512 was the YOLOv5 model. To verify the effect of different image resolutions on model performance, two model styles, s and x, in the YOLOv5 model were selected to study the performance of the model at image resolutions of 1024&#xd7;1024, 640&#xd7;640, 512&#xd7;512, 256&#xd7;256, and 128&#xd7;128.</p>
<p>The data set used in this experiment was the same as the data set used in the construction of the three models, the learning rate was uniformly set to 0.0005, the number of iterations rounds was set to 10,000, the batch size of each iteration was set to 16, and the image resolution was set appropriately for each according to the requirements of the experiment before training. There is an early stop mechanism in the YOLOv5 model; that is, after a certain quantity of training iteration rounds, if the model effectiveness has not improved, then the model training is stopped early. The patience parameter set in this experiment was 100; that is, during the model training, training was stopped if the model effectiveness did not improve within 100 consecutive rounds. The training models with different image resolutions were saved. The training results for YOLOv5x and YOLOv5s are shown in <xref ref-type="fig" rid="f6">
<bold>Figures&#xa0;6</bold>
</xref>, <xref ref-type="fig" rid="f7">
<bold>7</bold>
</xref>. Then, the training models for each image resolution were evaluated with the test set. The test results obtained are shown in <xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref>.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Training results of the YOLOv5x model with different image resolutions.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1200901-g006.tif"/>
</fig>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Training results of the YOLOv5s model with different image resolutions.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1200901-g007.tif"/>
</fig>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Comparisons of the test results of the YOLOv5 model with different image resolutions.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="left">Model</th>
<th valign="middle" rowspan="2" align="center">Image size</th>
<th valign="middle" rowspan="2" align="center">Train time (hour)</th>
<th valign="middle" colspan="4" align="center">AP (%)</th>
<th valign="middle" rowspan="2" align="center">mAP (%)</th>
<th valign="middle" rowspan="2" align="center">P (%)</th>
<th valign="middle" rowspan="2" align="center">R (%)</th>
<th valign="middle" rowspan="2" align="center">FPS</th>
</tr>
<tr>
<th valign="middle" align="center">0</th>
<th valign="middle" align="center">1</th>
<th valign="middle" align="center">2</th>
<th valign="middle" align="center">3</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="5" align="left">YOLOv5x</td>
<td valign="middle" align="center">1024&#xd7;1024</td>
<td valign="middle" align="center">141.55</td>
<td valign="middle" align="center">94.0</td>
<td valign="middle" align="center">95.2</td>
<td valign="middle" align="center">96.6</td>
<td valign="middle" align="center">97.7</td>
<td valign="middle" align="center">95.9</td>
<td valign="middle" align="center">96.9</td>
<td valign="middle" align="center">93.5</td>
<td valign="middle" align="center">34.65</td>
</tr>
<tr>
<td valign="middle" align="center">640&#xd7;640</td>
<td valign="middle" align="center">44.66</td>
<td valign="middle" align="center">94.2</td>
<td valign="middle" align="center">94.6</td>
<td valign="middle" align="center">96.6</td>
<td valign="middle" align="center">97.7</td>
<td valign="middle" align="center">95.8</td>
<td valign="middle" align="center">96.1</td>
<td valign="middle" align="center">92.7</td>
<td valign="middle" align="center">51.82</td>
</tr>
<tr>
<td valign="middle" align="center">512&#xd7;512</td>
<td valign="middle" align="center">35.65</td>
<td valign="middle" align="center">93.5</td>
<td valign="middle" align="center">94.4</td>
<td valign="middle" align="center">95.8</td>
<td valign="middle" align="center">96.7</td>
<td valign="middle" align="center">95.1</td>
<td valign="middle" align="center">95.5</td>
<td valign="middle" align="center">92.0</td>
<td valign="middle" align="center">61.79</td>
</tr>
<tr>
<td valign="middle" align="center">256&#xd7;256</td>
<td valign="middle" align="center">33.48</td>
<td valign="middle" align="center">91.4</td>
<td valign="middle" align="center">92.1</td>
<td valign="middle" align="center">95.2</td>
<td valign="middle" align="center">95.9</td>
<td valign="middle" align="center">93.7</td>
<td valign="middle" align="center">95.2</td>
<td valign="middle" align="center">90.4</td>
<td valign="middle" align="center">76.39</td>
</tr>
<tr>
<td valign="middle" align="center">128&#xd7;128</td>
<td valign="middle" align="center">89.77</td>
<td valign="middle" align="center">92.2</td>
<td valign="middle" align="center">93.0</td>
<td valign="middle" align="center">95.3</td>
<td valign="middle" align="center">95.8</td>
<td valign="middle" align="center">94.1</td>
<td valign="middle" align="center">95.6</td>
<td valign="middle" align="center">89.9</td>
<td valign="middle" align="center">81.22</td>
</tr>
<tr>
<td valign="middle" rowspan="5" align="left">YOLOv5s</td>
<td valign="middle" align="center">1024&#xd7;1024</td>
<td valign="middle" align="center">111.40</td>
<td valign="middle" align="center">95.8</td>
<td valign="middle" align="center">94.7</td>
<td valign="middle" align="center">97.7</td>
<td valign="middle" align="center">97.8</td>
<td valign="middle" align="center">96.5</td>
<td valign="middle" align="center">96.9</td>
<td valign="middle" align="center">93.3</td>
<td valign="middle" align="center">62.71</td>
</tr>
<tr>
<td valign="middle" align="center">640&#xd7;640</td>
<td valign="middle" align="center">108.27</td>
<td valign="middle" align="center">96.1</td>
<td valign="middle" align="center">95.3</td>
<td valign="middle" align="center">97.7</td>
<td valign="middle" align="center">98.1</td>
<td valign="middle" align="center">96.8</td>
<td valign="middle" align="center">97.8</td>
<td valign="middle" align="center">93.4</td>
<td valign="middle" align="center">71.43</td>
</tr>
<tr>
<td valign="middle" align="center">512&#xd7;512</td>
<td valign="middle" align="center">52.16</td>
<td valign="middle" align="center">95.1</td>
<td valign="middle" align="center">95.4</td>
<td valign="middle" align="center">97.0</td>
<td valign="middle" align="center">97.9</td>
<td valign="middle" align="center">96.3</td>
<td valign="middle" align="center">95.9</td>
<td valign="middle" align="center">93.1</td>
<td valign="middle" align="center">78.58</td>
</tr>
<tr>
<td valign="middle" align="center">256&#xd7;256</td>
<td valign="middle" align="center">90.91</td>
<td valign="middle" align="center">95.4</td>
<td valign="middle" align="center">95.9</td>
<td valign="middle" align="center">97.7</td>
<td valign="middle" align="center">97.9</td>
<td valign="middle" align="center">96.7</td>
<td valign="middle" align="center">97.1</td>
<td valign="middle" align="center">92.5</td>
<td valign="middle" align="center">82.56</td>
</tr>
<tr>
<td valign="middle" align="center">128&#xd7;128</td>
<td valign="middle" align="center">132.21</td>
<td valign="middle" align="center">87.3</td>
<td valign="middle" align="center">92.7</td>
<td valign="middle" align="center">93.3</td>
<td valign="middle" align="center">93.0</td>
<td valign="middle" align="center">91.6</td>
<td valign="middle" align="center">89.5</td>
<td valign="middle" align="center">85.5</td>
<td valign="middle" align="center">92.32</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As shown in <xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref>, the convergence speed of the YOLOv5x model was the slowest when the image resolution was 128&#xd7;128, and the convergence speed for several other image resolutions did not differ much. When the image resolution was 1024&#xd7;1024, the mAP value of the validation set reached 96%, which was the highest mAP value among the five image resolutions, indicating that this was the resolution at which the model had the best train effect.</p>
<p>As shown in <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref>, the convergence speed of the YOLOv5s model was the slowest when the image resolution was 128&#xd7;128, and the convergence speed of the remaining image resolutions was relatively similar. When the image resolution was 640&#xd7;640, the mAP value of the validation set reached 97.1%, which was the highest mAP value among the five image resolutions, indicating that this was the resolution at which the model had the best train effect.</p>
<p>Comparing <xref ref-type="fig" rid="f6">
<bold>Figures&#xa0;6</bold>
</xref> , <xref ref-type="fig" rid="f7">
<bold>7</bold>
</xref>, it can be seen that the convergence speed of the YOLOv5s model was significantly lower than that of the YOLOv5x model. Except for at the 128&#xd7;128 image resolution in the YOLOv5x model, the YOLOv5s model generally had more training rounds than the YOLOv5x model for all image resolutions.</p>
<p>As shown in <xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref>, it can be seen that in the YOLOv5x model, when the image resolution was 1024&#xd7;1024, the model took the longest time from training to stopping, at 141.55 hours, and the test set had the highest mAP value, at 95.9% but the lowest FPS value, at 34.65. When the image resolution was 256&#xd7;256, the model took the least amount of time from training to stopping, at 33.48 hours, and the test set had the lowest mAP value, at 93.7%. When the image resolution was 128&#xd7;128, the test set had the highest FPS value, at 81.22. Overall, the performance was best when the image resolution was 640&#xd7;640, as the mAP value for that image resolution was 0.1% lower than the highest mAP value, which was for an image resolution of 1024&#xd7;1024, but the FPS value was 17.17 higher. In the YOLOv5s model, when the image resolution was 640&#xd7;640, the test set had the highest mAP value, at 96.8%. When the image resolution was 128&#xd7;128, the model took the longest time from training to stopping, at 132.21 hours, and the test set had the lowest mAP value, at 91.6%, but the highest FPS value, at 92.32. When the image resolution was 1024&#xd7;1024, the test set had the lowest FPS value, at 62.71. When the image resolution was 512&#xd7;512, the model took the least amount of time from training to stopping, at 52.16 hours. Overall, the performance was best when the image resolution was 640&#xd7;640 because although the FPS value for this image resolution was not the highest, the mAP value was the highest, at 96.8%.</p>
<p>Comparing the YOLOv5x and YOLOv5s models, as seen in <xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref>, the YOLOv5s model performed better than the YOLOv5x model, both in terms of mAP values and FPS values. The model performed best when the image resolution was 640&#xd7;640 with a mAP value of 96.8% and an FPS value of 71.43.</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Supplementary tests</title>
<p>The best performance of YOLOv5s was found in the previous study comparison, and its authors updated YOLOv8 version in early 2023, so this paper conducts a supplementary experiment to compare the YOLOv5s model in this paper with the latest YOLOv8s model, and its comparison results are shown in <xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref>.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Comparison of model test results between YOLOv5s and YOLOv8s.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="left">Model</th>
<th valign="middle" colspan="4" align="center">AP (%)</th>
<th valign="middle" rowspan="2" align="center">P (%)</th>
<th valign="middle" rowspan="2" align="center">R (%)</th>
<th valign="middle" rowspan="2" align="center">mAP (%)</th>
<th valign="middle" rowspan="2" align="center">FPS</th>
</tr>
<tr>
<th valign="middle" align="center">0</th>
<th valign="middle" align="center">1</th>
<th valign="middle" align="center">2</th>
<th valign="middle" align="center">3</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">YOLOv5s</td>
<td valign="middle" align="center">96.1</td>
<td valign="middle" align="center">95.3</td>
<td valign="middle" align="center">97.7</td>
<td valign="middle" align="left">98.1</td>
<td valign="middle" align="left">97.8</td>
<td valign="middle" align="left">93.4</td>
<td valign="middle" align="left">96.8</td>
<td valign="middle" align="center">71.43</td>
</tr>
<tr>
<td valign="middle" align="left">YOLOv8s</td>
<td valign="middle" align="center">96.4</td>
<td valign="middle" align="center">97.1</td>
<td valign="middle" align="center">98.1</td>
<td valign="middle" align="left">98.3</td>
<td valign="middle" align="left">97.6</td>
<td valign="middle" align="left">93.9</td>
<td valign="middle" align="left">97.5</td>
<td valign="middle" align="center">67.75</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As shown in <xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref>, the YOLOv5s model achieves the mAP value of 96.8% and the FPS value of 71.43 on the test set. The YOLOv8s model achieved the mAP value of 97.5% and the FPS value of 67.75 on the test set. Compared to the YOLOv5s model, the mAP value of YOLOv8s improved by 0.7% and the FPS value decreased by 3.68. Although YOLOv8s improved 0.7 percentage points in detection precision over YOLOv5s, the detection speed dropped by 3.68. So all in all, YOLOv5s performed better than YOLOv8s.</p>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>YOLOv5 model based on the Android mobile platform</title>
<p>To facilitate real-time detection in the field, the YOLOv5 model with the best test results was deployed to Android mobile. Based on the test results presented in <xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref>, we chose to deploy the trained YOLOv5s models of each image resolution in <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref> to the Android mobile platform one by one and compare their detection effects. The model files saved after training were first converted into the corresponding files, then the code was debugged in Android studio and finally deployed to the Android mobile platform, and the test machine used in this experiment was a Redmi 5 Plus (Android Version:8.1.0; GPU Model: Adreno 506; Operating Memory:3GB; Storage Capacity:32GB). After deployment, the Android Package Kit (APK) for each resolution had an application size of 1.62 GB and required 16.38 KB of user data.</p>
<p>In this experiment, 20 original images of &#x2018;Zhongmiansuo49&#x2019; cultivar plants from the image data collected in 2018 and 2019 were reselected for mobile platform detection, and the results of the mobile platform detection for different image resolutions are shown in <xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8</bold>
</xref>; <xref ref-type="table" rid="T6">
<bold>Table&#xa0;6</bold>
</xref>. The total number of leaves detected at different resolutions were between 35 and 50. The confusion matrix indicates whether the model confounds different categories by comparing the actual infestation grades of the blades in the mobile platform test data with the predicted infestation grades. As shown in <xref ref-type="table" rid="T6">
<bold>Table&#xa0;6</bold>
</xref>, when the image resolution was 256&#xd7;256, the accuracy of the mobile platform was the highest, at 81.0%, and the detection speed was also the fastest, with an FPS value of 6.98. Therefore, the detection of the mobile platform is the best at this image resolution.</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Confusion matrix of detection results for different image resolutions.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1200901-g008.tif"/>
</fig>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Detection results of the mobile platform at different image resolutions.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Image size</th>
<th valign="middle" align="center">Accuracy (%)</th>
<th valign="middle" align="center">FPS</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">1024&#xd7;1024</td>
<td valign="middle" align="center">80.6</td>
<td valign="middle" align="center">6.76</td>
</tr>
<tr>
<td valign="middle" align="center">640&#xd7;640</td>
<td valign="middle" align="center">75.6</td>
<td valign="middle" align="center">6.53</td>
</tr>
<tr>
<td valign="middle" align="center">512&#xd7;512</td>
<td valign="middle" align="center">79.5</td>
<td valign="middle" align="center">6.79</td>
</tr>
<tr>
<td valign="middle" align="center">256&#xd7;256</td>
<td valign="middle" align="center">81.0</td>
<td valign="middle" align="center">6.98</td>
</tr>
<tr>
<td valign="middle" align="center">128&#xd7;128</td>
<td valign="middle" align="center">71.4</td>
<td valign="middle" align="center">6.87</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Evaluation of model performance in the field environment</title>
<p>Separately selecting 20 cotton images from the &#x2018;Zhongmiansuo49&#x2019; and &#x2018;Xinluzhong66&#x2019; cultivars from the new data collected in 2022, the accuracy and usefulness of the YOLOv5 model, which had been successfully deployed on the Android platform, were evaluated. The test results are shown in <xref ref-type="fig" rid="f9">
<bold>Figure&#xa0;9</bold>
</xref>; <xref ref-type="table" rid="T7">
<bold>Table&#xa0;7</bold>
</xref>, and an example of the detection results is shown in <xref ref-type="fig" rid="f10">
<bold>Figure&#xa0;10</bold>
</xref>.</p>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Confusion matrix of the detection results for infested leaves of two different cultivars: <bold>(A)</bold> &#x2018;Zhongmiansuo49&#x2019;, <bold>(B)</bold> &#x2018;Xinluzhong66&#x2019;.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1200901-g009.tif"/>
</fig>
<table-wrap id="T7" position="float">
<label>Table&#xa0;7</label>
<caption>
<p>Mobile platform detection results for infested leaves of two different cultivars.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">cultivars</th>
<th valign="middle" align="center">Accuracy (%)</th>
<th valign="middle" align="center">FPS</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">&#x2018;Zhongmiansuo49&#x2019;</td>
<td valign="middle" align="center">84.6</td>
<td valign="middle" align="center">8.61</td>
</tr>
<tr>
<td valign="middle" align="left">&#x2018;Xinluzhong66&#x2019;</td>
<td valign="middle" align="center">85.2</td>
<td valign="middle" align="center">8.19</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p>Examples of detection results for infested leaves of two different cultivars <bold>(A)</bold> &#x2018;Zhongmiansuo49&#x2019;, <bold>(B)</bold> &#x2018;Xinluzhong66&#x2019;.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1200901-g010.tif"/>
</fig>
<p>By comparing the actual infestation levels and predicted infestation levels of the leaves in the mobile platform test data (<xref ref-type="fig" rid="f9">
<bold>Figure&#xa0;9</bold>
</xref>), it can be seen that the adjacent levels are easily confused with each other, and that level 3 is confused the least, probably because the leaf features at levels 0 and 1 are similar, and that the curl of the leaves at level 3 is very obvious and easy to judge. As presented in <xref ref-type="table" rid="T7">
<bold>Table&#xa0;7</bold>
</xref>, the mobile-side detection accuracy for infested leaves of the &#x2018;Zhongmiansuo49&#x2019; cultivar was 84.6%, and the detection speed FPS value was 8.61; the mobile-side detection accuracy for infested leaves of the &#x2018;Xinluzhong66&#x2019; cultivar was 85.2%, and the detection speed FPS value was 8.19. As shown in <xref ref-type="table" rid="T7">
<bold>Table&#xa0;7</bold>
</xref>, the accuracy and usefulness of the YOLOv5 model deployed on the Android platform are relatively ideal and can provide a more convenient and faster means for field investigators to use the model.</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>For <italic>A. gossypii</italic> infestation class identification, other researchers have more often studied multispectral (<xref ref-type="bibr" rid="B42">Zeng et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B43">Fu et&#xa0;al., 2022</xref>) and hyperspectral (<xref ref-type="bibr" rid="B44">Feng and Liu, 2020</xref>) cotton images collected by unmanned aerial vehicle (UAV), while our study was based on cotton images collected by smartphones. The methods of the two approaches are different; the cotton images collected by UAV can qualitatively determine the degree of <italic>A. gossypii</italic> infestation at a macro level, with the degree of <italic>A. gossypii</italic> infestation reflected by the spectral curve characteristics of the cotton canopy, while the cotton images taken by smartphones in this paper can quantitatively determine the degree of <italic>A. gossypii</italic> infestation of a single cotton plant, with more accurate results. The object detection models constructed in this study were all able to recognize <italic>A. gossypii</italic> infestation levels, the mAP value for the best YOLOv5 model reached 96.8%, and its FPS value reached 71.43.</p>
<p>After comparing the test results of the three models, it was found that the mAP values of the SSD model were much lower than those of the other two models. By reviewing the model debugging details, it appears that this result may be due to the use of a static learning rate or an excessively high learning rate setting. To compare the three models, the learning rate must be uniformly set to a static learning rate; thus, the same learning rate was set for all three models in this paper.</p>
<p>The objectives of this study were to compensate for the shortcomings of traditional <italic>A. gossypii</italic> survey methods, to enhance the efficiency of <italic>A. gossypii</italic> infestation detection and to expand the application of object detection algorithms. Most of the current field applications of pesticides in production are quantitative, which can lead to overuse of pesticides, thus increasing production costs and simultaneously causing environmental pollution. The best model identified in this study achieves real-time and rapid recognition of the degree of infestation of <italic>A. gossypii</italic> to help mitigate the abovementioned problem. Successfully deploying the model to the mobile platform, and subsequently deploying the model to plant protection UAVs and pesticide application tractors to establish a precision pesticide application technology system for controlling <italic>A. gossypii</italic> infestation will provide technical support for precise pesticide application, which will enhance the utilization rate of pesticides, reduce the cost of agricultural production and improve the ecological conditions of the environment.</p>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusion</title>
<p>This study used smartphones to quickly and easily collect images of cotton seedlings. Three classical object detection models to achieve fast recognition of <italic>A. gossypii</italic> infestation levels were constructed. The three models were tested, and it was found that the YOLOv5 model had the best performance, with mAP values 8.3% and 34.2% higher than those of the Faster R-CNN and SSD models, respectively, and FPS values that were 51.29 and 54.09 higher than those of the Faster R-CNN and SSD models, respectively, with higher precision and faster detection speed. Based on further testing guided by these results, it was determined that the comprehensive performance of the YOLOv5s model was better than that of the YOLOv5x model at different image resolutions, and that the best performance was achieved when the image resolution was 640&#xd7;640. And the comparison with the latest YOLOv8s showed that the YOLOv5s performed better than the YOLOv8s. Regarding detection speed and mobility, we successfully deployed the YOLOv5s model to the Android mobile platform, and after testing, it was found that the detection effect on mobile was the best when the image resolution was 256&#xd7;256. The accuracy was 0.4%, 5.4%, 1.5%, and 9.6% higher at this image resolution than at several other resolutions, and the FPS values were 0.22, 0.45, 0.19, and 0.11 higher than at the other image resolutions, respectively. In addition to images from the &#x2018;Zhongmiansuo49&#x2019; cultivar, the model in this study was also used tested on images from the &#x2018;Xinluzhong66&#x2019; cultivar, with a final accuracy of 85.2% and an FPS value of 8.19, indicating that the <italic>A. gossypii</italic> infestation level recognition model presented in this paper can be used for the detection of this pest in other cotton cultivars. The <italic>A. gossypii</italic> infestation level recognition model established in this study can provide a faster and more convenient technical mean for <italic>A. gossypii</italic> infestation monitoring, preventing the outbreak of the insect pest in advance and achieving precise prevention and cure, which in turn can help enhance the yield and quality of cotton.</p>
</sec>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>All authors have made significant contributions to this research. XX, YL and HQ conceived the ideas and designed the methodology. JS, QH and YC conducted the experiments. XX and JS performed the data acquisition and processed and analyzed the data. LL, TS, RD, YL and HQ performed the supervision. XX, JS and HQ wrote and edited the paper. All authors discussed and wrote the manuscript and gave final approval for publication. YL and HQ acquired the funding.</p>
</sec>
</body>
<back>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>This study was supported by the Xinjiang Joint Funds of the National Natural Science Foundation of China (U2003119) and China National Key R&amp;D Program during the 14th Five-year Plan Period (2022YFD1400302).</p>
</sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ahmad</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Y. Y.</given-names>
</name>
<name>
<surname>Yue</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Deep learning based detector YOLOv5 for identifying insect pests</article-title>. <source>Appl. Sci.</source> <volume>12</volume>, <fpage>10167</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/app121910167</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Arnal</surname> <given-names>B. J. G.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Digital image processing techniques for detecting, quantifying, and classifying plant diseases</article-title>. <source>SpringerPlus.</source> <volume>2</volume>, <fpage>660</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/2193-1801-2-660</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="other">GB/T 15799-2011, rules for monitoring and forecast of the cotton aphid (<italic>Aphis gossypii</italic> Glover). </citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chu</surname> <given-names>P. Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Z. J.</given-names>
</name>
<name>
<surname>Lammers</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>R. F.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X. M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Deep learning-based apple detection using a suppression mask R-CNN</article-title>. <source>Pattern Recogn. Lett.</source> <volume>147</volume>, <fpage>206</fpage>&#x2013;<lpage>211</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.patrec.2021.04.022</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dong</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Jiao</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Du</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>CRA-Net: A channel recalibration feature pyramid network for detecting small pests</article-title>. <source>Comput. Electron. Agr.</source> <volume>191</volume>, <fpage>106518</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2021.106518</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fan</surname> <given-names>Q. L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L. P.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>G. Y.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>M. F.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Field effects of 5% acetamiprid on <italic>Aphis gossypii</italic> Glover</article-title>. <source>J. Shanxi Agr. Sci.</source> <volume>41</volume>, <fpage>89</fpage>&#x2013;<lpage>91</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3969/j.issn.1002-2481.2013.01.21</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Feng</surname> <given-names>Y. A.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>W. J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Research on the hyperspectral image recognition method based on deep learning</article-title>. <source>Basic. Clin. Pharmacol.</source> <volume>126</volume>, <fpage>32</fpage>-<lpage>33</lpage>.</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fu</surname> <given-names>H. C.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>H. Q.</given-names>
</name>
<name>
<surname>Song</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Y. F.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Z. H.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>S. J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Cotton aphid infestation monitoring using Sentinel-2 MSI imagery coupled with derivative of ratio spectroscopy and random forest algorithm</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.1029529</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gu</surname> <given-names>Y. H.</given-names>
</name>
<name>
<surname>Yin</surname> <given-names>H. L.</given-names>
</name>
<name>
<surname>Jin</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Park</surname> <given-names>J. H.</given-names>
</name>
<name>
<surname>Yoo</surname> <given-names>S. J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Image-based hot pepper disease and pest diagnosis using transfer learning and fine-tuning</article-title>. <source>Front. Plant Sci.</source> <volume>12</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2021.724487</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>He</surname> <given-names>K. M.</given-names>
</name>
<name>
<surname>Gkioxari</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Doll&#xe1;r. P. and Girshick</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>&#x201c;Mask R-CNN,&#x201d;</article-title>,&#x201d; in <conf-name>International Conference on Computer Vision</conf-name>, <conf-loc>Venice, Italy</conf-loc>, <conf-date>2017 Oct 22-29</conf-date>. <fpage>2980</fpage>&#x2013;<lpage>2988</lpage>.</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Herron</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Powis</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Rophail</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Baseline studies and preliminary resistance survey of Australian populations of cotton aphid <italic>Aphis gossypii</italic> Glover (Hemiptera: Aphididae)</article-title>. <source>Aust. J. Entomol.</source> <volume>39</volume>, <fpage>33</fpage>&#x2013;<lpage>38</lpage>. doi: <pub-id pub-id-type="doi">10.1046/j.1440-6055.2000.00134.x</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>R. J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>G. M.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>D. Y.</given-names>
</name>
<name>
<surname>Qian</surname> <given-names>Y. Q.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>The identification of corn leaf diseases based on transfer learning and data augmentation</article-title>,&#x201d; in <conf-name>Proceedings of the 2020 3rd International Conference on Computer Science and Software Engineering</conf-name>, <conf-loc>Beijing, China</conf-loc>, <conf-date>May 22-24;</conf-date>. <fpage>64</fpage>&#x2013;<lpage>71</lpage>.</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiao</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>C. J.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Du</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>R. and Zhang</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Adaptive feature fusion pyramid network for multi-classes agricultural pest detection</article-title>. <source>Comput. Electron. Agr.</source> <volume>195</volume>, <fpage>106827</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2022.106827</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Jocher</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2020</year>). Available at: <uri xlink:href="https://github.com/ultralytics/YOLOv5">https://github.com/ultralytics/YOLOv5</uri>.
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khan</surname> <given-names>M. A.</given-names>
</name>
<name>
<surname>Akram</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Sharif</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Javed</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Raza</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Saba</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>An automated system for cucumber leaf diseased spot detection and classification using improved saliency method and deep features selection</article-title>. <source>Multimed. Tools Appl.</source> <volume>79</volume>, <fpage>18627</fpage>&#x2013;<lpage>18656</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11042-020-08726-8</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li.</surname> <given-names>G. J.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>X. J.</given-names>
</name>
<name>
<surname>Ai</surname> <given-names>J. Y.</given-names>
</name>
<name>
<surname>Yi</surname> <given-names>Z. R.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Lemon-YOLO: An efficient object detection method for lemons in the natural environment</article-title>. <source>Pest. Manage. Sci.</source> <volume>15</volume>, <fpage>1998</fpage>&#x2013;<lpage>2009</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1049/ipr2.12171</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Anguelov</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Erhan</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Szegedy</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Reed</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>C. Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>). <article-title>&#x201c;SSD: Single shot multibox detector,&#x201d;</article-title> in <conf-name>European Conference on Computer Vision</conf-name>. <fpage>21</fpage>&#x2013;<lpage>37</lpage>.</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X. W.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Early recognition of tomato gray leaf spot disease based on MobileNetv2-YOLOv3 model</article-title>. <source>Plant Methods</source> <volume>16</volume>, <fpage>83</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13007-020-00624-2</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X. W.</given-names>
</name>
<name>
<surname>Miao</surname> <given-names>W. Q.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>G. X.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Tomato pest recognition algorithm based on improved YOLOv4</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.814681</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Y. J.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Advances in identification of food diseases and insect pests</article-title>. <source>Food Sci. Technol. Econ.</source> <volume>44</volume>, <fpage>72</fpage>&#x2013;<lpage>73</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.16465/j.gste.cn431252ts.20190415</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>X. Z.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H. Q.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>P. L.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A technical regulation for integrated control of cotton aphids in Xinjian</article-title>. <source>China Cotton.</source> <volume>49</volume>, <fpage>38</fpage>&#x2013;<lpage>41</lpage>.</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lu</surname> <given-names>S. H.</given-names>
</name>
<name>
<surname>Ye</surname> <given-names>S. J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Using an image segmentation and support vector machine method for identifying two locust species and instars</article-title>. <source>J. Integr. Agr.</source> <volume>19</volume>, <fpage>1301</fpage>&#x2013;<lpage>1313</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S2095-3119(19)62865-0</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname> <given-names>J. Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>X. L.</given-names>
</name>
<name>
<surname>Lv</surname> <given-names>L. M.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L. J.</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>J. C.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Research progress of cotton insect pests in China in recent ten years</article-title>. <source>Cotton Sci.</source> <volume>29</volume>, <fpage>100</fpage>&#x2013;<lpage>112</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.11963/1002-7807.ljycjj.20170825</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mallick</surname> <given-names>M. T.</given-names>
</name>
<name>
<surname>Biswas</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Das</surname> <given-names>A. K.</given-names>
</name>
<name>
<surname>Saha</surname> <given-names>H. N.</given-names>
</name>
<name>
<surname>Chakrabarti</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Deb</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Deep learning based automated disease detection and pest classification in Indian mung bean</article-title>. <source>Multime. Tools Appl.</source> <volume>82</volume>, <fpage>12017</fpage>&#x2013;<lpage>12041</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11042-022-13673-7</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nabilah</surname> <given-names>A. L. N.</given-names>
</name>
<name>
<surname>Roff</surname> <given-names>M. N. M.</given-names>
</name>
<name>
<surname>Chang</surname> <given-names>S. W.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Integrated analysis of machine learning and deep learning in chili pest and disease identification</article-title>. <source>J. Sci. Food. Agr.</source> <volume>101</volume>, <fpage>3582</fpage>&#x2013;<lpage>3594</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/jsfa.10987</pub-id>
</citation>
</ref>
<ref id="B1">
<citation citation-type="web">
<person-group person-group-type="author">
<collab>National Bureau of Statistics</collab>
</person-group> (<year>2022</year>) <source>Announcement on cotton production in 2022</source>. Available at: <uri xlink:href="http://www.stats.gov.cn/xxgk/sjfb/zxfb2020/202212/t20221227_1891259.html">http://www.stats.gov.cn/xxgk/sjfb/zxfb2020/202212/t20221227_1891259.html</uri>.
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qi</surname> <given-names>J. T.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X. N.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>F. R.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Tian</surname> <given-names>X. L.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>An improved YOLOv5 model based on visual attention mechanism: Application to recognition of tomato virus disease</article-title>. <source>Comput. Electron. Agr.</source> <volume>194</volume>, <fpage>106780</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2022.106780</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Redmon</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Divvala</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Farhadi</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>You only look once: unified, real-time object detection</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</conf-name>, <conf-loc>Las Vegas, NV, USA</conf-loc>, <conf-date>2016 June 27-30</conf-date>. <fpage>779</fpage>&#x2013;<lpage>788</lpage>.</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ren</surname> <given-names>S. Q.</given-names>
</name>
<name>
<surname>He</surname> <given-names>K. M.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Faster R-CNN: towards real-time object detection with region proposal networks</article-title>. <source>IEEE T. Pattern Anal.</source> <volume>39</volume>, <fpage>1137</fpage>&#x2013;<lpage>1149</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TPAMI.2016.2577031</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ross</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Fast R-CNN</article-title>. <source>Comput. Sci.</source> <volume>9</volume>, <fpage>10</fpage>&#x2013;<lpage>19</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1504.08083</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shen</surname> <given-names>Y. F.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>H. L.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J. T.</given-names>
</name>
<name>
<surname>Jian</surname> <given-names>F. J.</given-names>
</name>
<name>
<surname>Jayas</surname> <given-names>D. S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Detection of stored-grain insects using deep learning</article-title>. <source>Comput. Electron. Agr.</source> <volume>145</volume>, <fpage>319</fpage>&#x2013;<lpage>325</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2017.11.039</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>H. N.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>H. W.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>B.</given-names>
</name>
<name>
<surname>He</surname> <given-names>J. R.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>H. X.</given-names>
</name>
<name>
<surname>Geng</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>MEAN-SSD: A novel real-time detector for apple leaf diseases using improved light-weight convolutional neural networks</article-title>. <source>Comput. Electron. Agr.</source> <volume>189</volume>, <fpage>106379</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2021.106379</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="web">
<person-group person-group-type="author">
<collab>Tzutalin</collab>
</person-group> (<year>2015</year>) <source>LabelImg. Git code</source>. Available at: <uri xlink:href="https://github.com/tzutalin/labelImg">https://github.com/tzutalin/labelImg</uri>.
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>D. W.</given-names>
</name>
<name>
<surname>Deng</surname> <given-names>L. M.</given-names>
</name>
<name>
<surname>Ni</surname> <given-names>J. G.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>J. Y.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>H. F.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>Z. Z.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Recognition pest by image-based transfer learning</article-title>. <source>Pest Manage. Sci.</source> <volume>99</volume>, <fpage>4524</fpage>&#x2013;<lpage>4531</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/jsfa.9689</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>R. J.</given-names>
</name>
<name>
<surname>Jiao</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>C. J.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Du</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>S-RPN: Sampling-balanced region proposal network for small crop pest detection</article-title>. <source>Comput. Electron. Agr.</source> <volume>187</volume>, <fpage>106290</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2021.106290</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>X. W.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>X. N.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Early real-time detection algorithm of tomato diseases and pests in the natural environment</article-title>. <source>Plant Methods</source> <volume>17</volume>, <fpage>43</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13007-021-00745-2</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>X. H.</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>J. F.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Tan</surname> <given-names>W. J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Identification of crop diseases using improved convolutional neural networks</article-title>. <source>Pest Manage. Sci.</source> <volume>14</volume>, <fpage>538</fpage>&#x2013;<lpage>545</lpage>. doi: <pub-id pub-id-type="doi">10.1049/iet-cvi.2019.0136</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wei</surname> <given-names>D. P.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>J. Q.</given-names>
</name>
<name>
<surname>Luo</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Long</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H. B.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Classification of crop pests based on multi-scale feature fusion</article-title>. <source>Comput. Electron. Agr.</source> <volume>194</volume>, <fpage>106736</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2022.106736</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>D. G.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Review of typical object detection algorithms for deep learning</article-title>. <source>Comput. Eng. Appl.</source> <volume>57</volume>, <fpage>10</fpage>&#x2013;<lpage>25</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3778/j.issn.1002-8331.2012-0449</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>An evaluation of statistical approaches to text categorization</article-title>. <source>Inform. Retrieval.</source> <volume>1</volume>, <fpage>69</fpage>&#x2013;<lpage>90</lpage>. doi: <pub-id pub-id-type="doi">10.1023/A:1009982220290</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zeng</surname> <given-names>Z. M.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>W. F.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>W. B.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Target classification algorithms based on multispectral imaging: A review</article-title>,&#x201d; in <conf-name>ICMIP 2021: 2021 6th International Conference on Multimedia and Image Processing</conf-name>, <conf-loc>Zhuhai, China. New York, ACM</conf-loc>, <conf-date>2021 Jan 8-10</conf-date>.</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>J. H.</given-names>
</name>
<name>
<surname>Kong</surname> <given-names>F. T.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>J. Z.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>S. Q.</given-names>
</name>
<name>
<surname>Zhai</surname> <given-names>Z. F.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Automatic image segmentation method for cotton leaves with the disease under natural environment</article-title>. <source>J. Integr. Agr.</source> <volume>17</volume>, <fpage>1800</fpage>&#x2013;<lpage>1814</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S2095-3119(18)61915-X</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>K. K.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Q. F.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y. P.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Detecting soybean leaf disease from synthetic image using multi-feature fusion faster R-CNN</article-title>. <source>Comput. Electron. Agr.</source> <volume>183</volume>, <fpage>106064</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2021.106064</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname> <given-names>S. Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>J. Z.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Multiple disease detection method for greenhouse-cultivated strawberry based on multiscale feature fusion Faster R_CNN</article-title>. <source>Comput. Electron. Agr.</source> <volume>199</volume>, <fpage>107176</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2022.107176</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>