<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mar. Sci.</journal-id>
<journal-title>Frontiers in Marine Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mar. Sci.</abbrev-journal-title>
<issn pub-type="epub">2296-7745</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmars.2024.1365155</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Marine Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Multi-object detection and behavior tracking of sea cucumbers with skin ulceration syndrome based on deep learning</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Ge</surname><given-names>Fengli</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xuan</surname><given-names>Kui</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Lou</surname><given-names>Peng</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>*</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Li</surname><given-names>Juan</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1998256"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Jiang</surname><given-names>Lingxu</given-names>
</name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname><given-names>Jiasheng</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lin</surname><given-names>Qi</given-names>
</name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>College of Mechanical and Electrical Engineering, Qingdao Agricultural University</institution>, <addr-line>Qingdao</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>College of Management, Qingdao Agricultural University</institution>, <addr-line>Qingdao</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>College of Marine Science and Engineering, Qingdao Agricultural University</institution>, <addr-line>Qingdao</addr-line>, <country>China</country></aff>
<aff id="aff4"><sup>4</sup><institution>Fisheries Research Institute of Fujian, Key Laboratory of Cultivation and High-Value Utilization of Marine Organisms in Fujian Province</institution>, <addr-line>Xiamen</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: David Alberto Salas Salas De Le&#xf3;n, National Autonomous University of Mexico, Mexico</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Xuebo Zhang, Northwest Normal University, China</p>
<p>Ming Guo, Ningbo University, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Juan Li, <email xlink:href="mailto:lijuan291@sina.com">lijuan291@sina.com</email>; Peng Lou, <email xlink:href="mailto:loupeng@qau.edu.cn">loupeng@qau.edu.cn</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>25</day>
<month>03</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>11</volume>
<elocation-id>1365155</elocation-id>
<history>
<date date-type="received">
<day>03</day>
<month>01</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>26</day>
<month>02</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Ge, Xuan, Lou, Li, Jiang, Wang and Lin</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Ge, Xuan, Lou, Li, Jiang, Wang and Lin</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Skin ulceration syndrome of sea cucumbers is one of the most serious diseases in intensive aquaculture, and it is the most effective way of preventing the spread of this disease to detect the abnormal behavior of sea cucumbers in time and take corresponding measures. However, the detection and tracking of multi-object is a hard problem in sea cucumber behavior analysis. To solve this problem, this paper first proposes a novel one-stage algorithm SUS-YOLOv5 for multi-object detection and tracking of sea cucumbers. The proposed SUS-YOLOv5 optimizes the maximum suppression algorithm in the overlapping region of the object detection box. Next, the SE-BiFPN feature fusion structure is proposed to enhance the transmission efficiency of feature information between deep and shallow layers of the network. Then, a MO-Tracking algorithm is proposed integrated with DeepSORT to achieve real-time multi-object tracking. Experimental results show that the <italic>mAP</italic>@0.5 and <italic>mAP</italic>@0.5:0.95 of the proposed object detector reach 95.40% and 83.80%, respectively, which are 3.30% and 4.10% higher than the original YOLOv5s. Compared with the traditional SSD, YOLOv3, and YOLOv4, the <italic>mAP</italic> of SUS-YOLOv5 is improved by 5.49%, 1.57%, and 3.76%, respectively. This research can realize the multi-object detection and tracking, which lays the foundation for the prediction of skin ulceration syndrome in sea cucumbers and has a certain practical application value for improving the intelligence level of aquaculture.</p>
</abstract>
<kwd-group>
<kwd>deep learning</kwd>
<kwd>sea cucumber</kwd>
<kwd>skin ulceration syndrome</kwd>
<kwd>YOLOv5S</kwd>
<kwd>object detection</kwd>
<kwd>behavior analysis</kwd>
<kwd>object tracking</kwd>
<kwd>artificial intelligence (AI)</kwd>
</kwd-group>
<counts>
<fig-count count="14"/>
<table-count count="6"/>
<equation-count count="16"/>
<ref-count count="49"/>
<page-count count="16"/>
<word-count count="7976"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Ocean Observation</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Sea cucumbers have high edibility and medicinal value and are the precious seafood (<xref ref-type="bibr" rid="B49">Zhu et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B15">Li et&#xa0;al., 2021</xref>). According to the Food and Agriculture Organization of the United Nations, the production of sea cucumbers in the world reached 40,000 tons in 2020 (<xref ref-type="bibr" rid="B5">FAO, 2022</xref>). In recent years, as the improvement of living standards of people, the demand has been increasing for sea cucumbers, which makes the farming area of sea cucumbers have been increasing. Whereas, there are some problems in intensive aquaculture, such as the slow growth speed and high incidence of disease (<xref ref-type="bibr" rid="B36">Wang et&#xa0;al., 2021</xref>). Among the diseases of sea cucumbers, skin ulceration syndrome (SUS) is one of the most serious diseases, which is caused by the infection of <italic>Vibrio alginolyticus</italic>. <italic>Vibrio alginolyticus</italic> is characterized by rapid and widespread infection (<xref ref-type="bibr" rid="B23">Lv et&#xa0;al., 2019</xref>), which will cause significant economic losses if farmers cannot detect SUS in time and take corresponding measures. Therefore, the prevention of SUS is of great significance in the cultivation process of sea cucumbers.</p>
<p>Recently, various fields have widely applied deep learning technology (<xref ref-type="bibr" rid="B16">Li et&#xa0;al., 2023</xref>), including tea buds detection (<xref ref-type="bibr" rid="B38">Xu et&#xa0;al., 2022</xref>), crop yield estimation (<xref ref-type="bibr" rid="B11">Hu et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B6">Gao et&#xa0;al., 2022</xref>), underwater image enhancement (<xref ref-type="bibr" rid="B7">Guo et&#xa0;al., 2020</xref>), industrial control (<xref ref-type="bibr" rid="B47">Zhao et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B22">Liu et&#xa0;al., 2021</xref>), human behavior detection (<xref ref-type="bibr" rid="B41">Yu and Lee, 2015</xref>) and new energy (<xref ref-type="bibr" rid="B24">Ma et&#xa0;al., 2022</xref>) and so on. In particular, with the development of intelligent aquaculture, some scholars have started to use artificial intelligence technology to diagnose diseases in aquaculture (<xref ref-type="bibr" rid="B14">Li and Du, 2022</xref>). For example, a diagnosis system for fish disease is developed based on image processing technology (<xref ref-type="bibr" rid="B25">Park et&#xa0;al., 2007</xref>), which extracted the pathogenic areas from microscopic images of infected fish tissues and matches them quickly in a database. A 3D computer vision system is established to monitor the movement of fish and control the dissolved oxygen in aquaculture based on breeding experience (<xref ref-type="bibr" rid="B2">Bao et&#xa0;al., 2018</xref>). Deep learning technology is used to detect fish in an abnormal environment with high levels of ammonia nitrogen and a 3D behavioral trajectory of the fish is plotted, providing a new approach for animal behavior analysis (<xref ref-type="bibr" rid="B39">Xu et&#xa0;al., 2020</xref>). <xref ref-type="bibr" rid="B17">Li et&#xa0;al. (2020)</xref> propose a framework of deep learning based on Faster R-CNN for detecting, localizing, and analyzing sea cucumbers behavior trajectories, providing important information for sea cucumbers cultivation, status monitoring, and early disease warning. The above researches and related researches indicate that timely detecting of abnormal behavior of underwater animals and taking corresponding preventive measures are of great significance for ensuring the healthy and sustainable development of aquaculture. However, the existing approaches of the object tracking focus on the single object for sea cucumbers, and we have not seen the object tracking and behavior analysis researches for multiple sea cucumbers.</p>
<p>As we all know, the key to object tracking and behavior analysis is object detection. Therefore, underwater object detection technology has become an important research direction based on deep learning, and some scholars have made some active explorations in this field. For example, <xref ref-type="bibr" rid="B19">Liu et&#xa0;al. (2020)</xref> propose a real-time method to monitor the invasion condition of marine organisms that combined image processing and deep learning, which achieves the calculation of marine organism density, detection, and measurement of marine organism species. This approach provides a warning reference for the invasion of biological in the coastal area. <xref ref-type="bibr" rid="B37">Xu et&#xa0;al. (2023)</xref> propose a multi-object behavioral tracking method based on automatic coordinate matching of frame. This method tracks multiple sea cucumbers and calculates their motions for quantitative and qualitative behavioral analysis. <xref ref-type="bibr" rid="B45">Zhang et&#xa0;al. (2020)</xref> propose a deep residual network with multiple forms and stochastic gradient descent (SGD) training algorithm for recognizing sea cucumbers on the seabed, which achieves an average accuracy of 97.90%, but the network is unable to achieve high accuracy in complex scenarios. To attain accurate quantitative detection for submarine benthic animals, <xref ref-type="bibr" rid="B21">Liu and Wang (2021)</xref> propose a quantitative detection algorithm for marine benthic animals based on Faster R-CNN, which improves the recognition accuracy of marine benthic animals from 93.25% to 96.32% and provides a new way to quantitatively detect small and dense objects in the seabed. <xref ref-type="bibr" rid="B43">Zeng et&#xa0;al. (2021)</xref> propose to add the adversarial occlusion network in the basic Faster R-CNN detection algorithm, which effectively prevents the detection network from over-fitting the generated fixed features and improves the mean average precision (<italic>mAP</italic>) of the object detection by 4.2% in complex underwater environments. Nevertheless, the above research on object recognition and detection doesn&#x2019;t take into account the influence of noise, lighting, and other factors, which can affect the generalization ability of model. More importantly, the research on sea cucumbers with SUS using deep learning technology is still in the exploratory stage.</p>
<p>To solve the above problems, the behavior of sea cucumbers infected by <italic>Vibrio alginolyticus</italic> is studied in the laboratory environment. A new approach called SUS-YOLOv5 is proposed in this paper. The main contributions are fourfold: (1) to solve the problem that Non-Maximum Suppression (NMS) only considers the overlapping area of detection boxes to suppress errors, this paper proposes an improved YOLOv5s algorithm with Soft Non-Maximum Suppression (Soft-NMS), which gives different penalty degree for candidate boxes with different overlapping degrees; (2) to solve the loss problem which is caused by the different importance of feature maps from different channel during the convolution pooling process, a SUS-YOLOv5 approach is proposed to enhance the capability of feature information representation; (3) a SE-BiFPN feature fusion structure is proposed to enhance the transfer ability of feature information between the deep and shallow layers of the model, which avoids the loss of feature information and improves the detection accuracy of the object detection; and (4) the proposed SUS-YOLOv5 combined with the DeepSORT algorithm can achieve real-time multi-object tracking, compute and analyze the mean movement quantity under different infection conditions.</p>
<p>This research is structured as follows: a brief description of the experimental materials is provided and the proposed approach is outlined in section 2. In Section 3, the experimental platform is introduced and the evaluation criteria is brought in, offering a comprehensive analysis of the experimental results. Section 4 delves into the challenges encountered during the research and discusses avenues for future research. Finally, Section 5 concludes this study, summarizing the key findings and highlighting their significance.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Proposed method</title>
<p>In the series of YOLO (You Only Look Once), YOLOv1 lays the foundation for the entire series, and other versions of YOLO are improved based on the first version (<xref ref-type="bibr" rid="B29">Redmon et&#xa0;al., 2016</xref>). YOLOv1 innovatively uses the one-stage structure to accomplish the task of classification and object localization, but it has a smaller receptive field and the network losses are not specific (<xref ref-type="bibr" rid="B1">Ahmad et&#xa0;al., 2020</xref>). YOLOv2 introduces batch normalization while removing the fully-connected layer so as to further improve the model performance (<xref ref-type="bibr" rid="B31">Shi et&#xa0;al., 2021</xref>). YOLOv3 adds a detection box prediction function to YOLOv2 and uses the Darknet-53 to extract features (<xref ref-type="bibr" rid="B34">Tian et&#xa0;al., 2019</xref>). Based on the above object detection architecture, YOLOv4 incorporates many optimization algorithms in different aspects of data processing, backbone network training, activation functions, loss functions, and more (<xref ref-type="bibr" rid="B8">Guo et&#xa0;al., 2021</xref>). YOLOv5 has made some new improvements based on YOLOv4 so that its speed and accuracy have been greatly improved (<xref ref-type="bibr" rid="B42">Yuan et&#xa0;al., 2022</xref>).</p>
<p>YOLOv5 contains five network structures, i.e., YOLOv5n, YOLOv5s, YOLOv5m, YOLOv5l, and YOLOv5x, whose depth and width are progressively deepened and widened (<xref ref-type="bibr" rid="B28">Qu et&#xa0;al., 2022</xref>). Whereas, with the complexity upgrade of the YOLOv5 series, the detection accuracy improves, the detection speed decreases, and the requirements of hardware configuration become higher. Among the five kinds of YOLOv5, the YOLOv5s has a better real-time detection effect and saves the cost of training and deployment.</p>
<p>To improve the detection performance on sea cucumbers, the SUS-YOLOv5 is proposed based on YOLOv5s in this research, and the network structure of SUS-YOLOv5 is shown in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Structure of SUS-YOLOv5.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1365155-g001.tif"/>
</fig>
<p>In <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>, the parts with an asterisk are the added or improved parts, and the sections with solid red lines are added links. The main improved parts of SUS-YOLOv5 are as follows.</p>
<sec id="s2_1">
<label>2.1</label>
<title>Optimization of detection boxes</title>
<p>To select the best detection box from a set of overlapping boxes, the NMS used in YOLOv5s only considers the overlapping area of the detection frame, which often leads to false suppression and reduces the effect of object detection. To solve this above problem, the research proposes adding the Soft-NMS module to realize the NMS function, which accepts the intersection ratio of two object detection candidate boxes through the Gaussian function and makes different degrees of punishment. The Soft-NMS combines different penalties to modify the confidence of the object so that the best detection box is selected from a set of overlapping boxes.</p>
<p>NMS is mainly used for post-processing the output of the object detection model. In the prediction stage of object detection, many candidate anchor boxes will be output, among which some prediction detection boxes are overlapped around the same object. At this time, NMS can be used to merge similar detection boxes of the same object to remove redundant detection boxes and get correct detection results. The NMS processing method can be intuitively represented by the following score reset function,</p>
<disp-formula id="eq1">
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>{</mml:mo>
<mml:mtable columnalign="left" equalrows="true" equalcolumns="true">
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mtext>iou&#xa0;</mml:mtext>
<mml:mo>(</mml:mo>
<mml:mtext mathvariant="italic">&#x39b;</mml:mtext>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext mathvariant="italic">&#xa0;&#x394;</mml:mtext>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>)</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&lt;</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mtext>iou&#xa0;</mml:mtext>
<mml:mo>(</mml:mo>
<mml:mtext mathvariant="italic">&#x39b;</mml:mtext>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext mathvariant="italic">&#xa0;&#x394;</mml:mtext>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>)</mml:mo>
<mml:mo>&#x2265;</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>, where <italic>i</italic> stands for the serial number of the remaining boxes with the highest score from high to low except for the box <italic>&#x39b;</italic>. <italic>&#x39b;</italic> represents the maximum score of the detection boxes. <italic>&#x394;<sub>i</sub>
</italic> is the score of detection boxes with the slightly lower score <italic>&#x39b;</italic>. <italic>N<sub>t</sub>
</italic> stands for the threshold. <italic>s</italic> represents the confidence score.</p>
<p>However, the main drawback of NMS is that it only retains the highest confidence prediction box when objects are highly overlapped, which may lead to the deletion of similar but different objects. Therefore, the Soft-NMS is proposed to optimize the selection of detection boxes:</p>
<disp-formula id="eq2">
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>{</mml:mo>
<mml:mtable columnalign="left" equalrows="true" equalcolumns="true">
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mtext>iou&#xa0;</mml:mtext>
<mml:mo>(</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext mathvariant="italic">&#x39b;,&#xa0;&#x394;</mml:mtext>
</mml:mrow>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>)</mml:mo>
<mml:mo>&lt;</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>-</mml:mo>
<mml:mtext>iou&#xa0;</mml:mtext>
<mml:mo>(</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext mathvariant="italic">&#x39b;,&#xa0;&#x394;</mml:mtext>
</mml:mrow>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>)</mml:mo>
<mml:mo>)</mml:mo>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mtext>iou&#xa0;</mml:mtext>
<mml:mo>(</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext mathvariant="italic">&#x39b;,&#xa0;&#x394;</mml:mtext>
</mml:mrow>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>)</mml:mo>
<mml:mo>&#x2265;</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In <xref ref-type="disp-formula" rid="eq2">Equation 2</xref>, one can see that the Soft-NMS does not directly delete the remaining detection boxes with the highest confidence ratio of the detection box, but it reduces the confidence of the remaining detection boxes and retains more prediction frames (<xref ref-type="bibr" rid="B4">Bodla et&#xa0;al., 2017</xref>). In this way, the situation of mistakenly deleting overlapping objects is avoided through great suppression. <xref ref-type="disp-formula" rid="eq3"><bold>Equation 3</bold></xref> is then derived as follows:</p>
<disp-formula id="eq3">
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mtext>e</mml:mtext>
<mml:msup>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>iou&#xa0;</mml:mtext>
<mml:msup>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mtext mathvariant="italic">&#x39b;,&#xa0;</mml:mtext>
<mml:msub>
<mml:mtext mathvariant="italic">&#x394;</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mi>&#x3c2;</mml:mi>
</mml:mfrac>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mtext>,&#xa0;</mml:mtext>
<mml:mo>&#x2200;</mml:mo>
<mml:msub>
<mml:mtext mathvariant="italic">&#x394;</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>&#x2209;</mml:mo>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>P</italic> is the final set of anchor frames, and <italic>&#x3b6;</italic> is the super parameter of Soft-NMS.</p>
<p>When the threshold of NMS is reached, a sudden penalty will be taken and the penalty function is continuous. The continuous penalty function will take a penalty when the anchor frame is highly overlapped, and it will not take a penalty when there is no overlap (<xref ref-type="bibr" rid="B4">Bodla et&#xa0;al., 2017</xref>). In addition, when the overlap is low, the penalty will gradually increase. An example of detecting overlapping boxes is shown in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>An example of detection overlapping box.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1365155-g002.tif"/>
</fig>
<p>As shown in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>, there are two detection boxes in this image, highlighted in red and purple, with respective scores of 0.95 and 0.80. Notably, the purple detection box exhibits substantial overlap with the red one. When using NMS, the choice of a threshold is critical. The threshold determines which bounding boxes are retained and which are suppressed. If the threshold is too low, too many frames may be retained, increasing false detections; if the threshold is too high, correct detections may be suppressed, leading to missed detections. Therefore, experimentally determining the optimal threshold is the key to balancing false detections and missed detections. Soft-NMS is an improvement of NMS, which does not suppress overlapping frames completely but rather reduces their scores, thus avoiding the problem of hard threshold setting.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Extraction of the effective feature of SUS-YOLOv5</title>
<p>In the traditional convolution pooling process, equal importance is typically assigned to each channel of the feature map. However, various channels have varying degrees of importance, and specific problems should be analyzed specifically in practice. To solve the loss problem stemming from the varying importance of feature map for different channels during convolution pooling, SUS-YOLOv5 incorporates SENet. This addition enhances the model&#x2019;s capability to capture correlations of features and improves the representation of feature information.</p>
<p>The SENet introduces attention to the different dimensions of channels through key operations known as Squeeze and Excitation (<xref ref-type="bibr" rid="B12">Huang et&#xa0;al., 2022</xref>). Through automatic learning, the SENet makes the neural network focus on some feature channels. SENet can improve the channels of the feature map that are helpful for the current task, and it can suppress the unuseful feature channels for the current task. The SENet can bring a significant improvement in model performance with a slight increase in computing cost (<xref ref-type="bibr" rid="B9">Hu et&#xa0;al., 2018a</xref>, <xref ref-type="bibr" rid="B10">b</xref>).</p>
<p>
<xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref> shows the schematic diagram for the SENet module. Before the feature map from the backbone network is input into the SENet attention module. Before adding SENet attention, all channels in the feature map are treated equally despite of importance. However, after adding the SENet, the importance of individual feature channels becomes different. Different colors stands for different weights, which makes the neural network focus on some channels which have large weight values.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Schematic diagram of the bacterial liquid preparation process.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1365155-g003.tif"/>
</fig>
<p>In <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>, <italic>X</italic> is the input feature map, and <italic>F<sub>ex</sub>
</italic> represents the transformation of the feature map. <italic>U</italic> stands for <italic>C</italic> feature maps with <italic>H &#xd7; W</italic>, and <inline-formula>
<mml:math display="inline" id="im1">
<mml:mover accent="true">
<mml:mi>X</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
</mml:math>
</inline-formula> represents the feature map scaled by the activation function. <italic>F<sub>sq</sub>
</italic> represents the Squeeze operation, that is, as shown in <xref ref-type="disp-formula" rid="eq4"><bold>Equation 4</bold></xref>:</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Schematic diagram of the SENet module.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1365155-g004.tif"/>
</fig>
<disp-formula id="eq4">
<label>(4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>H</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>W</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:mstyle>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>j</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>z<sub>c</sub>
</italic> represents the output operated by <italic>F<sub>sq</sub>
</italic>, the subscript <italic>c</italic> represents the channel. <italic>u<sub>c</sub>
</italic> stands for the <italic>c</italic>th two-dimensional matrix of <italic>U</italic>. <italic>F<sub>ex</sub>
</italic> in <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref> is the Excitation operation that corresponds to the two fully connected operations, namely, as shown in <xref ref-type="disp-formula" rid="eq5"><bold>Equation 5</bold></xref>:</p>
<disp-formula id="eq5">
<label>(5)</label>
<mml:math display="block" id="M5">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>(</mml:mo>
<mml:mi>z</mml:mi>
<mml:mtext>,&#xa0;</mml:mtext>
<mml:mi>W</mml:mi>
<mml:mo>)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mo>(</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mi>&#x3b4;</mml:mi>
<mml:mo>(</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mi>z</mml:mi>
<mml:mo>)</mml:mo>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>s</italic> is the output operated by <italic>F<sub>ex</sub>
</italic>. <italic>&#x3b4;</italic> and <italic>&#x3c3;</italic> represent the activation function of ReLU and Sigmoid, respectively.</p>
<p>After obtaining <italic>s</italic>, SENet operates <italic>F<sub>scale</sub>
</italic>, that is, by the channel product of <italic>s<sub>c</sub>
</italic> and <italic>u<sub>c</sub>
</italic> the output feature can be obtained, as shown in <xref ref-type="disp-formula" rid="eq6"><bold>Equation 6</bold></xref>:</p>
<disp-formula id="eq6">
<label>(6)</label>
<mml:math display="block" id="M6">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>(</mml:mo>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>)</mml:mo>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Optimal feature fusion of SUS-YOLOv5</title>
<p>As the depth of network layers becomes deeper, the feature information will be lost to some extent. Therefore, multi-scale feature fusion is widely used in detection networks to improve the detection performance of networks. At present, the main feature fusion structures commonly include Feature Pyramid Networks (FPN) (<xref ref-type="bibr" rid="B18">Lin et&#xa0;al., 2017</xref>), Path Aggregation Network (<xref ref-type="bibr" rid="B20">Liu et&#xa0;al., 2018</xref>), and Bidirectional Feature Pyramid Network (<xref ref-type="bibr" rid="B33">Tan et&#xa0;al., 2020</xref>). FPN is a top-down feature pyramid structure, which is limited by unidirectional feature information transmission although it combines deep and shallow layers of features as well as multi-scale features. The Path Aggregation Network adds a bottom-up secondary fusion path based on FPN (<xref ref-type="bibr" rid="B33">Tan et&#xa0;al., 2020</xref>). BiFPN is a weighted bi-directional feature pyramid network, which realizes the simple and fast multi-scale feature fusion. To reduce the loss of feature information and improve the detection accuracy of objects, this research proposes a SE-BiFPN network structure, which combines the SENet and BiFPN structures, whose diagrammatic sketch is shown in <xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Schematic diagram of SE-BiFPN.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1365155-g005.tif"/>
</fig>
<p>Firstly, multiscale feature maps are derived from the SENet layer of the backbone network. Secondly, the first feature fusion is achieved with the lower sampling layer through a transverse connection, and the second feature fusion is achieved with the lower sampling layer and the upper sampling layer of the same scale of feature maps through a skip connection. Then, the feature map is obtained after these multi-scale fusions. Take the example of the fourth channel, the intermediate feature map and the final feature map are output as follows.</p>
<disp-formula id="eq7">
<label>(7)</label>
<mml:math display="block" id="M7">
<mml:mrow>
<mml:msubsup>
<mml:mi>O</mml:mi>
<mml:mn>4</mml:mn>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mo>&#x2202;</mml:mo>
<mml:mo>(</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c9;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:msubsup>
<mml:mi>O</mml:mi>
<mml:mn>4</mml:mn>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x3c9;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>(</mml:mo>
<mml:msubsup>
<mml:mi>O</mml:mi>
<mml:mn>5</mml:mn>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c9;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x3c9;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mi>&#x3b5;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq8">
<label>(8)</label>
<mml:math display="block" id="M8">
<mml:mrow>
<mml:msubsup>
<mml:mi>O</mml:mi>
<mml:mn>4</mml:mn>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mo>&#x2202;</mml:mo>
<mml:mo>(</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3c9;</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mo>&#xb7;</mml:mo>
<mml:msubsup>
<mml:mi>O</mml:mi>
<mml:mn>4</mml:mn>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mi>&#x3c9;</mml:mi>
<mml:mn>2</mml:mn>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mo>&#xb7;</mml:mo>
<mml:msubsup>
<mml:mi>O</mml:mi>
<mml:mn>4</mml:mn>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mi>&#x3c9;</mml:mi>
<mml:mn>3</mml:mn>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mo>&#xb7;</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>(</mml:mo>
<mml:msubsup>
<mml:mi>O</mml:mi>
<mml:mn>3</mml:mn>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3c9;</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mi>&#x3c9;</mml:mi>
<mml:mn>2</mml:mn>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mi>&#x3c9;</mml:mi>
<mml:mn>3</mml:mn>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:mi>&#x3b5;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In <xref ref-type="disp-formula" rid="eq7">Equations 7</xref> and <xref ref-type="disp-formula" rid="eq8">8</xref>, <inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mo>(</mml:mo>
<mml:mtext>g</mml:mtext>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> denotes the convolution operation on g. <italic>R</italic>(g) stands for an up-sampling or down-sampling operation on g. <inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:msubsup>
<mml:mi>O</mml:mi>
<mml:mn>4</mml:mn>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> represents the intermediate feature map between <inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:msubsup>
<mml:mi>O</mml:mi>
<mml:mn>4</mml:mn>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im5">
<mml:mrow>
<mml:msubsup>
<mml:mi>O</mml:mi>
<mml:mn>4</mml:mn>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>. <italic>&#x3c9;</italic> and <italic>&#x3b5;</italic> respectively represent the learnable weight and a preset small positive value to avoid numerical instability, which usually are set to 0.0001. The weights <italic>&#x3c9;</italic> are dynamically adjusted by the optimization algorithm during the training process, and their initial values are usually randomly generated and updated by back-propagation of the loss function. The purpose of presetting a small positive value <italic>&#x3b5;</italic> is to prevent numerical instability caused by manipulating 0 or infinity during numerical calculations. We choose 0.0001 as the value of <italic>&#x3b5;</italic>, which is an empirical value widely used in numerous experiments and is sufficient to cope with the need for numerical stability in most cases, without significantly affecting the performance of the model.</p>
<p>The SE-BiFPN network enhances the fusion ability of image shallow feature information and deep feature information. Each new feature layer of information is processed by SENet, so the SE-BiFPN can play a core role in the fusion of global and local information between the deep layers and shallow layers. In conclusion, the SE-BiFPN network structure enhances the expression ability of the feature pyramid and enables the model to achieve optimal feature fusion. After the SENet processing, the feature map in the backbone network of SUS-YOLOv5 is input into the SE-BiFPN structure multiple times to realize bidirectional multi-scale feature fusion, which improves the learning ability of the model on the whole feature and reduces the missing rate.</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Object tracking algorithm</title>
<p>The SUS-YOLOv5 integrates the DeepSORT algorithm to achieve multi-object behavior tracking of sea cucumbers, and the schematic diagram of MO-Tracking is shown in <xref ref-type="fig" rid="f6"><bold>Figure&#xa0;6</bold></xref>.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Schematic diagram of the multi-object tracking process.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1365155-g006.tif"/>
</fig>
<p>In <xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7</bold></xref>, the DeepSORT algorithm is on the basis of the SORT algorithm (<xref ref-type="bibr" rid="B3">Bewley et&#xa0;al., 2016</xref>) and introduces a re-identification model, which reduces the frequency of identity document (ID) switching by the appearance information and the movement information. In addition, DeepSORT combines the Kalman filter for movement prediction and the Hungarian algorithm for data association to solve the multi-object tracking problems.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Image processing and annotation. <bold>(A)</bold> Original image <bold>(B)</bold> Image after processing. <bold>(C)</bold> Label fabrication.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1365155-g007.tif"/>
</fig>
<p>The video of sea cucumbers is input into SUS-YOLOv5. After the object detector detects the sea cucumber objects, the DeepSORT algorithm can match the sea cucumber objects between the front and back frames based on the Kalman filter and the Hungarian algorithm, and it can also continuously track the object. When tracking the object, the movement model established by DeepSORT will calculate the Mahalanobis distance between the detection box and the filter prediction box, the distance is expressed as:</p>
<disp-formula id="eq9">
<label>(9)</label>
<mml:math display="block" id="M9">
<mml:mrow>
<mml:msup>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mtext>T</mml:mtext>
</mml:msup>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>(</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>d<sub>j</sub>
</italic> stands for the distance of the <italic>j</italic>th detection box, <italic>y<sub>i</sub>
</italic> is the predicted position of the <italic>i</italic> tracking object, and <italic>s<sub>i</sub>
</italic> stands for the covariance matrix between the detection position and the predicted position. <italic>d</italic><sup>(1)</sup>(<italic>i,j</italic>) denotes the Mahalanobis distance between the Kalman prediction result and the detection result of the motion state of the existing moving object.</p>
<p>When the uncertainty of the object motion is very low, the Mahalanobis distance represented by <xref ref-type="disp-formula" rid="eq9">Equation 9</xref> is a suitable correlation measure method. However, when the camera is moving, the correlation method of the Mahalanobis distance will fail, which results in the phenomenon of ID switch. For solving this problem, we propose to use the following second kind of correlation method to identify a feature vector for each detection block <italic>d<sub>j</sub>
</italic>, which is appointed by the DeepSORT.</p>
<disp-formula id="eq10">
<label>(10)</label>
<mml:math display="block" id="M10">
<mml:mrow>
<mml:msup>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>j</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mi>min</mml:mi>
<mml:mo>{</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>r</mml:mi>
<mml:mi>j</mml:mi>
<mml:mtext>T</mml:mtext>
</mml:msubsup>
<mml:msubsup>
<mml:mi>r</mml:mi>
<mml:mi>k</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>|</mml:mo>
<mml:msubsup>
<mml:mi>r</mml:mi>
<mml:mi>k</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>r<sub>j</sub>
</italic> is an appearance descriptor with the restriction condition ||<italic>r<sub>j</sub>
</italic>||=1. For each track frame <italic>k</italic>, the algorithm preserves the set <inline-formula>
<mml:math display="inline" id="im6">
<mml:mrow>
<mml:msub>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> of the last <italic>L<sub>k</sub>
</italic>=100 related to appearance descriptors, namely <inline-formula>
<mml:math display="inline" id="im7">
<mml:mrow>
<mml:msub>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:msubsup>
<mml:mi>r</mml:mi>
<mml:mi>k</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>}</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>. <italic>d</italic><sup>(2)</sup>(<italic>i,j</italic>) is the minimal cosine distance of the <italic>i</italic>th track and <italic>j</italic>th detection.</p>
<p>The above two kinds of metrics between motion features and appearance features can complement each other in matching problems. On one hand, the metric of the Mahalanobis distance to motion characteristics is helpful for short-term predictions. On the other hand, the cosine distance considering appearance features is a very useful metric for ID recovery after prolonged occlusion. To describe the correlation problem, the above two kinds of metrics from <xref ref-type="disp-formula" rid="eq9">Equations 9</xref> and <xref ref-type="disp-formula" rid="eq10">10</xref> are weighted and summed as shown in <xref ref-type="disp-formula" rid="eq11">Equation 11</xref>:</p>
<disp-formula id="eq11">
<label>(11)</label>
<mml:math display="block" id="M11">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi mathvariant="italic">&#x3bb;</mml:mi>
<mml:msup>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>j</mml:mi>
<mml:mo>)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="italic">&#x3bb;</mml:mi>
<mml:mo>)</mml:mo>
<mml:msup>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>j</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>c<sub>i,j</sub>
</italic> stands for the weighted sum of the Mahalanobis distance and the cosine distance. <italic>&#x3bb;</italic> is the hyperparameter that affects the association effect.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Experiment and result analysis</title>
<sec id="s3_1">
<label>3.1</label>
<title>Experimental materials</title>
<sec id="s3_1_1">
<label>3.1.1</label>
<title>Preparation of pathogenic bacteria</title>
<p><italic>Vibrio alginolyticus</italic> is a marine bacterium that is widely present in the seawater environment and it can cause lesions in sea cucumbers. Higher concentrations of <italic>Vibrio alginolyticus</italic> may increase the chances of infection in sea cucumbers, especially in aquaculture environments where confined spaces and high-density culture conditions may accelerate the spread of the pathogen. As the concentration of <italic>Vibrio alginolyticus</italic> increases, its pathogenicity to sea cucumbers usually intensifies. In this research, we use <italic>Vibrio alginolyticus</italic> to infect sea cucumbers. Both the adult sea cucumbers and the <italic>Vibrio alginolyticus</italic> strain are obtained from the laboratory of the School of Marine Science and Engineering at Qingdao Agricultural University. To prepare the bacterial suspension, <italic>Vibrio alginolyticus</italic> is first inoculated into Luria Bertani broth and incubated until the logarithmic growth phase of <italic>Vibrio alginolyticus</italic> in a constant temperature shaker at 30 &#xb0;C and 200 rpm. Two distinct bacterial suspension concentrations are prepared, with values of 1 &#xd7; 10<sup>11</sup> CFU/mL and 1 &#xd7;10<sup>9</sup> CFU/mL, respectively. These bacterial suspensions are respectively added to two fish tanks containing 10 L of water, then the final dilutions are obtained with 1 &#xd7; 10<sup>6</sup> CFU/mL and 1 &#xd7; 10<sup>9</sup> CFU/mL, respectively. The research is conducted in three experimental environments: normal environment, low concentration, and high concentration. <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref> shows the preparation process of the bacterial solution.</p>
</sec>
<sec id="s3_1_2">
<label>3.1.2</label>
<title>Sample collection</title>
<p>The water temperature for experimental sea cucumbers is kept at 18&#xb0;C. The weight and size specifications of sea cucumbers are 14.40 &#xb1; 4.20 g/each and 6.65 &#xb1; 1.15 cm/each, respectively. In this research, twelve adult sea cucumber samples are collected. <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref> shows the parameter specifications for each sea cucumber.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Sample specifications of sea cucumbers.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" colspan="2" align="center">Groups<break/>Specifications</th>
<th valign="middle" align="center">Normal environment</th>
<th valign="middle" align="center">Low<break/>concentration</th>
<th valign="middle" align="center">High<break/>concentration</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="2" align="center"><bold>No. 1 Sample</bold>
</td>
<td valign="middle" align="center"><bold>Length (cm)</bold>
</td>
<td valign="middle" align="center">6.00</td>
<td valign="middle" align="center">7.90</td>
<td valign="middle" align="center">5.50</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>Weight (g)</bold>
</td>
<td valign="middle" align="center">14.80</td>
<td valign="middle" align="center">18.40</td>
<td valign="middle" align="center">10.20</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center"><bold>No. 2 Sample</bold>
</td>
<td valign="middle" align="center"><bold>Length (cm)</bold>
</td>
<td valign="middle" align="center">5.60</td>
<td valign="middle" align="center">6.50</td>
<td valign="middle" align="center">6.50</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>Weight (g)</bold>
</td>
<td valign="middle" align="center">13.60</td>
<td valign="middle" align="center">7.10</td>
<td valign="middle" align="center">18.60</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center"><bold>No. 3 Sample</bold>
</td>
<td valign="middle" align="center"><bold>Length (cm)</bold>
</td>
<td valign="middle" align="center">5.70</td>
<td valign="middle" align="center">7.10</td>
<td valign="middle" align="center">6.10</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>Weight (g)</bold>
</td>
<td valign="middle" align="center">16.00</td>
<td valign="middle" align="center">14.20</td>
<td valign="middle" align="center">13.10</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center"><bold>No. 4 Sample</bold>
</td>
<td valign="middle" align="center"><bold>Length (cm)</bold>
</td>
<td valign="middle" align="center">5.90</td>
<td valign="middle" align="center">8.10</td>
<td valign="middle" align="center">7.80</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>Weight (g)</bold>
</td>
<td valign="middle" align="center">16.40</td>
<td valign="middle" align="center">12.30</td>
<td valign="middle" align="center">14.70</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_1_3">
<label>3.1.3</label>
<title>Experimental platform construction</title>
<p>Sea cucumbers are infected by the immersion infection in this experiment. A total of three sets of experiments were set up in three cubic aquarium with all side lengths of 40 cm. Every cubic aquarium has four sea cucumbers. To realistically simulate the living environment of sea cucumbers without additional effects on their behavior, the experiments were conducted during daytime without light compensation. The camera platform is set to record the behaviors of sea cucumbers. The platform consisted of three cameras, a digital video recorder, a switch, a display, and three fish tanks. The actual experimental scenario is shown in <xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8</bold></xref>.</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Actual experimental scenario.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1365155-g008.tif"/>
</fig>
</sec>
<sec id="s3_1_4">
<label>3.1.4</label>
<title>Image data acquisition and annotation</title>
<p>In this study, the dataset used for object detection is obtained by taking frames from the video recorded in the experiment with an image resolution of 618 &#xd7; 618 pixels. The experimental platform captured video data, but the model training requires image data. In addition, due to the slow movement of the sea cucumber, the original video was edited at 20 frames per second. After the manual screening, a total of 1,000 sea cucumber images are taken, where an example image is shown in <xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7A</bold></xref>. This study takes into account that light and noise variations in sea cucumber aquaculture environments can reduce the quality of image quality and decrease the detection of sea cucumber objects. Therefore, to simulate the real sea cucumber aquaculture environment to improve the robustness of sea cucumber detection models, some image processing methods are used to enhance the generalization ability, such as increasing noise, reducing brightness, and reducing contrast on the original image, and an example of the processing effect is shown in <xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7B</bold></xref>. This research uses the LabelImg annotation tool to annotate images. When labeling, the rectangle closest to the sea cucumber is used as the real box, and the coordinates of the center point are obtained. The effect of labeling is shown in <xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7C</bold></xref>. The input size of the model is set to 640 &#xd7; 640 pixels which can be set in multiples of 32.</p>
</sec>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Experimental platform</title>
<p>The experiment is conducted on an experimental platform equipped with Intel (R) Core (TM) i7-6800K CPU @ 3.40GHz (64G RAM) and NVIDIA GeForce RTX 2080Ti 48G. We use CUDA 11.1.0, CUDNN 11.1, and Python 3.8.8 as the configuration environment of software. <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref> shows the detail training parameters.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Setting of training parameter.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Parameter</th>
<th valign="middle" align="center">Value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center"><bold>Weight decay</bold>
</td>
<td valign="middle" align="center">0.0005</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>Momentum</bold>
</td>
<td valign="middle" align="center">0.95</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>Epochs</bold>
</td>
<td valign="middle" align="center">300</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>Initial learning rate</bold>
</td>
<td valign="middle" align="center">0.01</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>Image size</bold>
</td>
<td valign="middle" align="center">640 &#xd7; 640 pixels</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>Thresh</bold>
</td>
<td valign="middle" align="center">0.5</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>Batch size</bold>
</td>
<td valign="middle" align="center">8</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>Optimizer</bold>
</td>
<td valign="middle" align="center">SGD</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Evaluation indexes of model performance</title>
<p>The loss function in YOLOv5s is employed to measure the discrepancy between the model&#x2019;s predictions and the actual ground truth, and it comprises three losses: object loss, detection box loss and classification loss. To avoid overfitting and underfitting during the training process, the loss values are monitored for the training and validation sets and the best model is selected based on these values.</p>
<p>The Intersection over Union (IoU) metric is often employed to appraise the accuracy of the model&#x2019;s prediction. IoU gives the overlap ratio between the predicted detection boxes and the ground truth detection boxes. Using this metric, the values of precision (<italic>P</italic>) and recall (<italic>R</italic>) can be computed based on the model&#x2019;s predictions. The expressions for <italic>R</italic>, <italic>P</italic>, and loss are shown in <xref ref-type="disp-formula" rid="eq12"><bold>Equations 12</bold></xref>&#x2013;<xref ref-type="disp-formula" rid="eq14"><bold>14</bold></xref>.</p>
<disp-formula id="eq12">
<label>(12)</label>
<mml:math display="block" id="M12">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq13">
<label>(13)</label>
<mml:math display="block" id="M13">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq14">
<label>(14)</label>
<mml:math display="block" id="M14">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>j</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>l<sub>bbox</sub>
</italic>, <italic>l<sub>object</sub>
</italic> and <italic>l<sub>classification</sub>
</italic> represent detection box loss, object loss, and classification loss, respectively. <italic>TP</italic> presents the number of positive samples correctly predicted as positive samples, <italic>FP</italic> is the number of negative samples incorrectly predicted as positive samples, and <italic>FN</italic> stands for the number of positive samples incorrectly predicted as negative samples.</p>
<p>The Average Precision (<italic>AP</italic>) serves as an evaluative metric for assessing the accuracy of an detection model for object. <italic>AP</italic> is determined by calculating the area under the Precision-Recall (P-R) curve, which is generated by plotting precision and recall values across various threshold levels of the model&#x2019;s confidence scores. <italic>AP</italic> is the average value of precision at different recall levels, and it represents the ability of the model to correctly detect objects and avoid false positives. The <italic>mAP</italic> is the average of the <italic>AP</italic> values calculated for each category in the dataset. the <italic>mAP</italic> is a commonly used evaluation metric for object detection models, as it provides a single performance score that takes into account the detection accuracy for all object categories in the dataset. A higher <italic>mAP</italic> value indicates better performance of the model. In summary, <italic>AP</italic> and <italic>mAP</italic> are important evaluation metrics for object detection models, and they are computed based on the P-R curve. They are defined as shown in <xref ref-type="disp-formula" rid="eq15"><bold>Equations 15</bold></xref> and <xref ref-type="disp-formula" rid="eq16"><bold>16</bold></xref>:</p>
<disp-formula id="eq15">
<label>(15)</label>
<mml:math display="block" id="M15">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>=</mml:mo>
<mml:mstyle displaystyle="true">
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x222b;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mn>1</mml:mn>
</mml:msubsup>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
<mml:mtext>d</mml:mtext>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq16">
<label>(16)</label>
<mml:math display="block" id="M16">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>R</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>R</mml:mi>
</mml:msub>
</mml:mrow>
</mml:munder>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>(</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>Q<sub>R</sub>
</italic> stands for the number of categories.</p>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Experimental results of object detection</title>
<p>The training results of the model is shown in <xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9</bold></xref>, including the training loss curve, verification loss curve, <italic>mAP</italic>@0.5:0.95 curve and <italic>mAP</italic>@0.5 curve. From <xref ref-type="fig" rid="f9"><bold>Figures&#xa0;9A, B</bold></xref>, one can see that the loss function value of training and verification of SUS-YOLOv5 gradually decreases with the increase of the number of iterations and finally stabilizes at a value close to 0. In this process, no obvious overfitting phenomenon occurs in the model. From <xref ref-type="fig" rid="f9"><bold>Figures&#xa0;9C, D</bold></xref>, one can see that the <italic>mAP</italic>@0.5 and <italic>mAP</italic>@0.5:0.95 indicators of SUS-YOLOv5 have obvious advantages over other models, and the detection performance of object has been improved.</p>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Training results of different models. <bold>(A)</bold> Training loss for the training set; <bold>(B)</bold> Verification loss for the validation set; <bold>(C)</bold> <italic>mAP</italic>@0.5; <bold>(D)</bold> <italic>mAP</italic>@0.5:0.95.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1365155-g009.tif"/>
</fig>
<p>In the experimental process, several methods for improving detection performance are tested and their results are compared in <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Ablation experiments.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Results<break/>Models</th>
<th valign="middle" align="center">GFLOPS<break/>(G)</th>
<th valign="middle" align="center">Size of the <break/>model (MB)</th>
<th valign="middle" align="center"><italic>mAP@</italic>0.5<break/>(%)</th>
<th valign="middle" align="center"><italic>mAP@</italic>0.5:0.95<break/>(%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center"><bold>YOLOv5s+SENet</bold>
</td>
<td valign="middle" align="center">15.8</td>
<td valign="middle" align="center">13.8</td>
<td valign="middle" align="center">94.00</td>
<td valign="middle" align="center">81.10</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>YOLOv5s+SE-BiFPN</bold>
</td>
<td valign="middle" align="center">16.4</td>
<td valign="middle" align="center">14.0</td>
<td valign="middle" align="center">94.70</td>
<td valign="middle" align="center">81.40</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>YOLOv5s+Soft-NMS</bold>
</td>
<td valign="middle" align="center">15.8</td>
<td valign="middle" align="center">13.7</td>
<td valign="middle" align="center">95.00</td>
<td valign="middle" align="center">83.10</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>YOLOv5s+SENet+Soft-NMS</bold>
</td>
<td valign="middle" align="center">16.5</td>
<td valign="middle" align="center">14.1</td>
<td valign="middle" align="center">94.60</td>
<td valign="middle" align="center">81.30</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>YOLOv5s+Soft-NMS+SE</bold>
</td>
<td valign="middle" align="center">15.8</td>
<td valign="middle" align="center">13.8</td>
<td valign="middle" align="center">94.60</td>
<td valign="middle" align="center">82.90</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>YOLOv5s+Soft-NMS+SE-BiFPN</bold>
</td>
<td valign="middle" align="center">16.4</td>
<td valign="middle" align="center">14.0</td>
<td valign="middle" align="center">95.00</td>
<td valign="middle" align="center">83.00</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>YOLOv5s</bold>
</td>
<td valign="middle" align="center">15.8</td>
<td valign="middle" align="center">13.7</td>
<td valign="middle" align="center">92.10</td>
<td valign="middle" align="center">79.70</td>
</tr>
<tr>
<td valign="middle" align="center"><bold>SUS-YOLOv5</bold>
</td>
<td valign="middle" align="center">16.5</td>
<td valign="middle" align="center">14.1</td>
<td valign="middle" align="center">95.40</td>
<td valign="middle" align="center">83.80</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>From <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>, the proposed SUS-YOLOv5 has the highest accuracy whose <italic>mAP</italic>@0.5 is 0.954 and the <italic>mAP</italic>@0.5:0.95 is 0.838, which are 3.3% and 4.1% higher than the original YOLOv5s, respectively. Although the SUS-YOLOv5 model is more relatively complex than the original YOLOv5s, the size of the SUS-YOLOv5 model only increases by 0.4 MB compared with 14.1 MB of the YOLOv5s. The above results demonstrate that that SUS-YOLOv5 achieves a favorable balance between model size and detection accuracy.</p>
<p>The detection results are shown in <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10</bold></xref> by using YOLOv5s and SUS-YOLOv5 on the sea cucumber dataset, respectively. To facilitate an understanding of the detection performance of the models, this research visualizes the 80 &#xd7; 80 detection layer of the models by using heatmaps. Heatmaps are usually used to highlight key areas with colors of different depths in object detection. Generally, the brighter the color of the heatmap, the more confident the model is in detecting the object.</p>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p>Comparison of the detection results. <bold>(A)</bold> Detection result of YOLOv5s; <bold>(B)</bold> Detection result of SUS-YOLOv5.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1365155-g010.tif"/>
</fig>
<p>From <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10A</bold></xref>, it can be seen that the YOLOv5s misses an object in the upper right corner, whose anchor box has a lower score. In contrast, from <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10B</bold></xref>, it can be seen that SUS-YOLOv5 successfully detects all sea cucumber objects and obtains higher confidence scores. In addition, the SUS-YOLOv5 has higher brightness than YOLOv5s in the object area, which indicates that SUS-YOLOv5 has a higher focus on the object area.</p>
<p>To further compare the performances of YOLOv5s and SUS-YOLOv5, the P-R curves are given in the training process of the sea cucumber object detection, shown in <xref ref-type="fig" rid="f11"><bold>Figure&#xa0;11</bold></xref>.</p>
<fig id="f11" position="float">
<label>Figure&#xa0;11</label>
<caption>
<p>P-R curves of YOLOv5 and SUS-YOLOv5. <bold>(A)</bold> YOLOv5s; <bold>(B)</bold> SUS-YOLOv5.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1365155-g011.tif"/>
</fig>
<p>From <xref ref-type="fig" rid="f11"><bold>Figure&#xa0;11</bold></xref>, it can be observed that the SUS-YOLOv5 appears higher detection accuracy for sea cucumbers infected with Vibrio alginolyticus compared to YOLOv5s. Additionally, the improvements made on YOLOv5s are effective because the SENet attention mechanism, optimized Non-Maximum Suppression algorithm, and enhanced feature fusion capability improve the overall performance of the model.</p>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Comparison with other object detection models</title>
<p>To access the performance of SUS-YOLOv5, this research compares it with traditional object detection models, including YOLOv3, YOLOv4, SSD, Faster-RCNN, EfficientDet, and YOLOv5s. The results of the comparison are shown in <xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>. From <xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>, one can see that the <italic>mAP</italic>@0.5 of SUS-YOLOv5 achieves 95.40% among the one-stage object detection algorithms, which is not only higher than other YOLO series algorithms but also 6.30% and 10.94% higher than <italic>mAP</italic>@0.5 of SSD and EfficientDet, respectively. The accuracy of SUS-YOLOv5 can meet the requirements of sea cucumber detection.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Comparison of the performances of the seven models.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Models</th>
<th valign="middle" align="center"><italic>P</italic>
<break/>(%)</th>
<th valign="middle" align="center"><italic>R</italic>
<break/>(%)</th>
<th valign="middle" align="center"><italic>mAP</italic>@0.5<break/>(%)</th>
<th valign="middle" align="center">Detection time<break/>(s)</th>
<th valign="middle" align="center">Size of the model<break/>(MB)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">SSD</td>
<td valign="middle" align="center">90.28</td>
<td valign="middle" align="center">86.10</td>
<td valign="middle" align="center">89.91</td>
<td valign="middle" align="center">0.020</td>
<td valign="middle" align="center">90.60</td>
</tr>
<tr>
<td valign="middle" align="center">Faster-RCNN</td>
<td valign="middle" align="center">63.73</td>
<td valign="middle" align="center">98.70</td>
<td valign="middle" align="center">95.46</td>
<td valign="middle" align="center">0.100</td>
<td valign="middle" align="center">108.00</td>
</tr>
<tr>
<td valign="middle" align="center">EfficientDet</td>
<td valign="middle" align="center">99.91</td>
<td valign="middle" align="center">77.83</td>
<td valign="middle" align="center">84.46</td>
<td valign="middle" align="center">0.197</td>
<td valign="middle" align="center">15.00</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv3</td>
<td valign="middle" align="center">97.69</td>
<td valign="middle" align="center">59.93</td>
<td valign="middle" align="center">93.83</td>
<td valign="middle" align="center">0.382</td>
<td valign="middle" align="center">235.00</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv4</td>
<td valign="middle" align="center">93.51</td>
<td valign="middle" align="center">61.25</td>
<td valign="middle" align="center">91.64</td>
<td valign="middle" align="center">0.236</td>
<td valign="middle" align="center">244.00</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv5s</td>
<td valign="middle" align="center">94.20</td>
<td valign="middle" align="center">94.20</td>
<td valign="middle" align="center">92.10</td>
<td valign="middle" align="center">0.014</td>
<td valign="middle" align="center">13.70</td>
</tr>
<tr>
<td valign="middle" align="center">SUS-YOLOv5</td>
<td valign="middle" align="center">96.00</td>
<td valign="middle" align="center">96.00</td>
<td valign="middle" align="center">95.40</td>
<td valign="middle" align="center">0.016</td>
<td valign="middle" align="center">14.10</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To make the performances of different detection algorithms more visual and clear, <xref ref-type="fig" rid="f12"><bold>Figure&#xa0;12</bold></xref> shows the comparison results of performance for different detection algorithms in the form of a histogram.</p>
<fig id="f12" position="float">
<label>Figure&#xa0;12</label>
<caption>
<p>Comparative evaluation of various detection algorithms&#x2019; performance.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1365155-g012.tif"/>
</fig>
<p>As seen in the above figure, the <italic>mAP</italic>@0.5 of Faster-RCNN is the highest in the seven models and it is 0.06% higher than that of SUS-YOLOv5, but the model size of SUS-YOLOv5 reduces 86.94% than that of Faster-RCNN. As we all know, the large size of the model makes it difficult to deploy the model in embedded devices. As shown in <xref ref-type="fig" rid="f12"><bold>Figure&#xa0;12</bold></xref>, the processing time of SUS-YOLOv5 is only 0.016s for a single image, satisfying the real-time detection requirement for sea cucumber objects. On the other hand, the <italic>P</italic> and <italic>R</italic> of SUS-YOLOv5 both increase by 1.80% compared with those before the improvement. In summary, the SUS-YOLOv5 achieves a good balance among detection accuracy, detection speed, and model size, which can lay a good foundation for the following sea cucumber object tracking task.</p>
</sec>
<sec id="s3_6">
<label>3.6</label>
<title>Object tracking and behavior analysis</title>
<p>This research analyzes the behavioral changes of sea cucumbers in the early stage of suffering from SUS disease to observe their behavior within one hour after immersion by setting different experiment groups (normal environment, the low concentration of <italic>Vibrio alginolyticus</italic>, and the high concentration of <italic>Vibrio alginolyticus</italic>). In the sea cucumber object tracking experiment, the MO-Tracking algorithm combines the ideas of cascade matching and object re-identification to effectively avoid the object ID loss problem and the ID switching problem caused by object occlusion. The ID identification results of the MO-Tracking algorithm are shown in <xref ref-type="fig" rid="f13"><bold>Figure&#xa0;13</bold></xref>.</p>
<fig id="f13" position="float">
<label>Figure&#xa0;13</label>
<caption>
<p>ID identification results of the MO-Tracking algorithm.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1365155-g013.tif"/>
</fig>
<p>From <xref ref-type="fig" rid="f13"><bold>Figure&#xa0;13</bold></xref>, one can see that the MO-Tracking algorithm assigns an ID to each sea cucumber and accurately locks the identity of each sea cucumber based on the object&#x2019;s ID, while the corresponding coordinate information is continuously output.</p>
<p>After obtaining the coordinate information of sea cucumber objects by using the MO-Tracking algorithm, the pixel coordinates are transformed proportionally to obtain the actual coordinates of sea cucumbers in the 40 &#xd7;40 &#xd7;40 cm<sup>3</sup> fish tank. <xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref> shows the detailed statistical results of sea cucumber movement quantity in the three experiments.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Movement quantity statistics of sea cucumbers during the first hour.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Groups</th>
<th valign="middle" align="center">No.</th>
<th valign="middle" align="center">Movement quantity (cm)</th>
<th valign="middle" align="center">Mean movement quantity (cm)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="4" align="center"><bold>Group 1</bold><break/><bold>(Normal environment)</bold>
</td>
<td valign="middle" align="center">ID 1</td>
<td valign="middle" align="center">19.28</td>
<td valign="middle" rowspan="4" align="center">33.17</td>
</tr>
<tr>
<td valign="middle" align="center">ID 2</td>
<td valign="middle" align="center">32.98</td>
</tr>
<tr>
<td valign="middle" align="center">ID 3</td>
<td valign="middle" align="center">63.67</td>
</tr>
<tr>
<td valign="middle" align="center">ID 4</td>
<td valign="middle" align="center">16.74</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center"><bold>Group 2</bold><break/><bold>(Low concentration)</bold>
</td>
<td valign="middle" align="center">ID 1</td>
<td valign="middle" align="center">15.95</td>
<td valign="middle" rowspan="4" align="center">56.48</td>
</tr>
<tr>
<td valign="middle" align="center">ID 2</td>
<td valign="middle" align="center">77.38</td>
</tr>
<tr>
<td valign="middle" align="center">ID 3</td>
<td valign="middle" align="center">81.21</td>
</tr>
<tr>
<td valign="middle" align="center">ID 4</td>
<td valign="middle" align="center">51.37</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center"><bold>Group 3</bold><break/><bold>(High concentration)</bold>
</td>
<td valign="middle" align="center">ID 1</td>
<td valign="middle" align="center">135.44</td>
<td valign="middle" rowspan="4" align="center">94.36</td>
</tr>
<tr>
<td valign="top" align="center">ID 2</td>
<td valign="top" align="center">55.14</td>
</tr>
<tr>
<td valign="top" align="center">ID 3</td>
<td valign="top" align="center">122.04</td>
</tr>
<tr>
<td valign="top" align="center">ID 4</td>
<td valign="top" align="center">64.80</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To more intuitively reflect the movement status of sea cucumbers, the trajectories of movement for sea cucumbers are plotted based on the obtained coordinates of the objects in the three groups. <xref ref-type="fig" rid="f14"><bold>Figure&#xa0;14</bold></xref> shows the results.</p>
<fig id="f14" position="float">
<label>Figure&#xa0;14</label>
<caption>
<p>Movement trajectories of sea cucumbers. <bold>(A)</bold> Normal environment; <bold>(B)</bold> Low concentration of <italic>Vibrio alginolyticus</italic>; <bold>(C)</bold> High concentration of <italic>Vibrio alginolyticus</italic>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1365155-g014.tif"/>
</fig>
<p>Combining <xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref> and <xref ref-type="fig" rid="f14"><bold>Figure&#xa0;14</bold></xref>, we can conduct a detailed analysis of the movement status of sea cucumbers within one hour after the experiment. <xref ref-type="fig" rid="f14"><bold>Figure&#xa0;14A</bold></xref> shows the trajectories of the sea cucumbers in group 1 (normal environment). From <xref ref-type="fig" rid="f14"><bold>Figure&#xa0;14A</bold></xref>, it can be seen that: besides ID 3 which shows obvious movement, the other three sea cucumbers stay stationary near their original locations in the normal environment. According to <xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref>, the movement quantity of ID 3 is 63.67 cm, the mean movement quantity of the other three sea cucumbers is 22.99 cm, and the overall mean movement quantity is 33.17 cm. <xref ref-type="fig" rid="f14"><bold>Figure&#xa0;14B</bold></xref> shows the trajectories of sea cucumbers in group 2 (low concentration of <italic>Vibrio alginolyticus</italic>). ID 3 and ID 2 are the ones with the stronger reaction, and the movement quantity reaches 81.21 cm and 77.38 cm, respectively, while ID 1 has the least strong reaction. The overall mean movement quantity is 56.48 cm in group 2, which has an increase of 23.31 cm compared to that in the normal environment. <xref ref-type="fig" rid="f14"><bold>Figure&#xa0;14C</bold></xref> shows the behavior trajectories of sea cucumbers in group 3 (high concentration of <italic>Vibrio alginolyticus</italic>). It can be seen that the movement quantity of sea cucumbers increases significantly and the movement quantity of ID 1 is the largest which reaches 135.44 cm. The movement quantity of ID 3 increases by 40.83 cm and 58.37 cm compared with that in group 2 and group 1, respectively. The mean movement quantity of sea cucumbers reaches 94.36 cm in group 3, which is obviously higher than those of the other two groups.</p>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Movement quantity statistics of sea cucumbers during the first three hours of experiments.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" rowspan="2" align="center">Groups</th>
<th valign="top" rowspan="2" align="center">No.</th>
<th valign="top" colspan="4" align="center">Movement quantity (cm)</th>
</tr>
<tr>
<th valign="middle" align="center">A</th>
<th valign="middle" align="center">B</th>
<th valign="middle" align="center">C</th>
<th valign="middle" align="center">D</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" rowspan="4" align="center"><bold>Group 1</bold><break/><bold>(Normal environment)</bold>
</td>
<td valign="top" align="center">ID 1</td>
<td valign="top" align="center">19.28</td>
<td valign="top" align="center">8.35</td>
<td valign="top" align="center">8.59</td>
<td valign="top" align="center">12.07</td>
</tr>
<tr>
<td valign="top" align="center">ID 2</td>
<td valign="top" align="center">32.98</td>
<td valign="top" align="center">8.05</td>
<td valign="top" align="center">7.08</td>
<td valign="top" align="center">16.04</td>
</tr>
<tr>
<td valign="top" align="center">ID 3</td>
<td valign="top" align="center">63.67</td>
<td valign="top" align="center">25.67</td>
<td valign="top" align="center">17.39</td>
<td valign="top" align="center">35.58</td>
</tr>
<tr>
<td valign="top" align="center">ID 4</td>
<td valign="top" align="center">16.74</td>
<td valign="top" align="center">9.46</td>
<td valign="top" align="center">8.70</td>
<td valign="top" align="center">11.63</td>
</tr>
<tr>
<td valign="top" rowspan="4" align="center"><bold>Group 2</bold><break/><bold>(Low concentration)</bold>
</td>
<td valign="top" align="center">ID 1</td>
<td valign="top" align="center">15.95</td>
<td valign="top" align="center">3.50</td>
<td valign="top" align="center">11.84</td>
<td valign="top" align="center">10.43</td>
</tr>
<tr>
<td valign="top" align="center">ID 2</td>
<td valign="top" align="center">77.38</td>
<td valign="top" align="center">6.81</td>
<td valign="top" align="center">7.35</td>
<td valign="top" align="center">30.51</td>
</tr>
<tr>
<td valign="top" align="center">ID 3</td>
<td valign="top" align="center">81.21</td>
<td valign="top" align="center">59.08</td>
<td valign="top" align="center">94.14</td>
<td valign="top" align="center">78.14</td>
</tr>
<tr>
<td valign="top" align="center">ID 4</td>
<td valign="top" align="center">51.37</td>
<td valign="top" align="center">4.88</td>
<td valign="top" align="center">5.08</td>
<td valign="top" align="center">20.44</td>
</tr>
<tr>
<td valign="top" rowspan="4" align="center"><bold>Group 3</bold><break/><bold>(High concentration)</bold>
</td>
<td valign="top" align="center">ID 1</td>
<td valign="top" align="center">135.44</td>
<td valign="top" align="center">57.52</td>
<td valign="top" align="center">61.68</td>
<td valign="top" align="center">84.88</td>
</tr>
<tr>
<td valign="top" align="center">ID 2</td>
<td valign="top" align="center">55.14</td>
<td valign="top" align="center">9.04</td>
<td valign="top" align="center">12.93</td>
<td valign="top" align="center">25.70</td>
</tr>
<tr>
<td valign="top" align="center">ID 3</td>
<td valign="top" align="center">122.04</td>
<td valign="top" align="center">126.52</td>
<td valign="top" align="center">100.05</td>
<td valign="top" align="center">116.20</td>
</tr>
<tr>
<td valign="top" align="center">ID 4</td>
<td valign="top" align="center">64.80</td>
<td valign="top" align="center">26.59</td>
<td valign="top" align="center">56.48</td>
<td valign="top" align="center">49.29</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In the whole experiment, sea cucumbers do not appear a strong reaction or death in low concentrations of <italic>Vibrio alginolyticus</italic>, and they only appear with some symptoms such as shaking their heads and spitting out their guts. To further explore infection in the situation of sea cucumbers in different concentrations of <italic>Vibrio alginolyticus</italic>, the mean movement quantity of sea cucumbers is calculated in the first three hours of each experiment, as shown in <xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref>.</p>
<p>In <xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref>, A, B, and C represent the movement quantity in the first, second, and third hours, respectively, and D represents the mean movement quantity in the first three hours. From <xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref>, it can be seen that ID 1 and ID 4 in group 2 do not appear significant differences in mean movement quantity compared to sea cucumbers in group 1. The occurrence of this phenomenon may be because of the concentration of <italic>Vibrio alginolyticus</italic> is low and the bacterial inoculation adopts the immersion infection method in the experiment, so that the sea cucumbers may be incompletely infected with <italic>Vibrio alginolyticus</italic>. When sea cucumbers are exposed to high concentrations of <italic>Vibrio alginolyticus</italic>, their reactions become more fidgety, and more symptoms occur, such as shaking their heads, blackening their tentacles, spitting out their guts, fully extending their trunk, and floating their heads. On the 5th day of these experiments, the high-concentration group of sea cucumbers begins to appear dead.</p>
<p>According to the analysis for the experimental results, sea cucumbers become irritable in the early stage of infection with <italic>Vibrio alginolyticus</italic>, and the movement quantity increases significantly compared with the normal environment. Therefore, in the early infection stages of sea cucumbers with <italic>Vibrio alginolyticus</italic>, the increase in the movement quantity of sea cucumbers can be used as one of the characteristics to determine whether the sea cucumber is diseased.</p>
<p>According to the experimental results of sea cucumbers infection with Vibrio alginolyticus, one can classify the infection level into three categories: low infection level (mean movement quantity ranging from 25 cm/h to 45 cm/h, but not including 45 cm/h), moderate infection level (mean movement quantity ranging from 45 cm/h to 85 cm/h, but not including 85 cm/h), and severe infection level (mean movement quantity greater than 85 cm/h). We can judge the infection level by the movement quantity of sea cucumber, then a alarm of the infection level can be provided to facilitate the appropriate measures to be taken.</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussions and future work</title>
<p>Since the body of sea cucumbers is very flexible (<xref ref-type="bibr" rid="B30">Ru et&#xa0;al., 2021</xref>), even if sea cucumbers appear to be stationary from the trajectory perspective, they still have a certain movement quantity. From the perspective of the entire movement process, the movement quantity is almost negligible (<xref ref-type="bibr" rid="B32">Sun et&#xa0;al., 2018</xref>).</p>
<p>We also observe that sea cucumbers suffering from SUS appear ulceration on their body surfaces in the experiments. These affected areas often appear white or yellow and often emit a foul odor by SUS. If this problem is not solved promptly, the bacteria may continue to multiply and spread to the other parts of sea cucumbers, which will cause the condition to further worsen. Therefore, We will use the above phenomenon as a level of discrimination for infecting <italic>Vibrio alginolyticus</italic> and give a timely warning in future work.</p>
<p>During the sample collection process, sea cucumbers for experiments are in the same specifications as much as possible to eliminate interference factors. Whereas, the expansion and contraction of the sea cucumber&#x2019;s body make it difficult to obtain accurate trunk length and mass information because the trunk of sea cucumbers can shrink or extend. To ensure the measurement accuracy of sea cucumbers, this research measured their length after they are kept stationary for a while. Additionally, the sea cucumber can absorb water, which causes its weight to fluctuate, but these fluctuations hardly affect the results of the experiments.</p>
<p>To deploy the model to embedded devices in the future, the next step is to further optimize the SUS-YOLOv5 model and implement compression of the model using lightweight techniques, such as pruning and knowledge distillation (<xref ref-type="bibr" rid="B27">Poyatos et&#xa0;al., 2022</xref>). Based on the detection objects, the genetic algorithm (<xref ref-type="bibr" rid="B35">Wang et&#xa0;al., 2022</xref>), Bayesian optimization algorithm (<xref ref-type="bibr" rid="B13">Lan et&#xa0;al., 2022</xref>), and particle swarm optimization algorithm (<xref ref-type="bibr" rid="B22">Liu et&#xa0;al., 2021</xref>) may be used to determine hyperparameters of SUS-YOLOv5.</p>
<p>On the other hand, generative adversarial networks (<xref ref-type="bibr" rid="B46">Zhao et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B48">Zheng et&#xa0;al., 2022</xref>) can be used to expand the dataset, increase sample diversity, and improve the generalization ability of model. Furthermore, the movement quantity of sea cucumbers in real-time can be monitored long-term as an early warning system for sea cucumber diseases. When the movement quantity of sea cucumbers is abnormal, a timely warning will be given.</p>
<p>In fact, the proposed SUS-YOLOv5 can directly detect the sea cucumber and the other some marine organisms in this paper. Whereas, our proposed algorithm cannot be directly applied to all image processing and object detection scenarios, such as synthetic aperture sonar (SAS) (<xref ref-type="bibr" rid="B40">Yang, 2023</xref>; <xref ref-type="bibr" rid="B44">Zhang, 2023</xref>) and radar (SAR) (<xref ref-type="bibr" rid="B26">Pinheiro et&#xa0;al., 2015</xref>). We believe that our algorithm can be indirectly applied to these fields or the further improved algorithms on the basis of our algorithm can be extended to apply to these fields.</p>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusions</title>
<p>This research proposes a two-stage algorithm MO-Tracking for sea cucumbers&#x2019; multi-object detection and tracking, which optimizes the NMS algorithm and designs a new feature fusion structure. These improvements enhance the non-maximum suppression and feature fusion abilities. According to the proposed SUS-YOLOv5 object detection algorithm, the <italic>mAP</italic>@0.5 and <italic>mAP</italic>@0.5:0.95 reach 95.40% and 83.8%, respectively. In addition, this research finds through object tracking experiments that the behavior of sea cucumbers shows obvious abnormalities after being infected with <italic>Vibrio alginolyticus</italic>, i.e., a significant increase in the mean movement quantity. The results obtained by the proposed method can be used as an important criterion for determining whether sea cucumbers suffer from disease. This research provides a method for health monitoring of intensively cultivated sea cucumbers, which has practical significance for promoting the development of smart fisheries.</p>
</sec>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary materials, further inquiries can be directed to the corresponding author/s.</p>
</sec>
<sec id="s7" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>The manuscript presents research on animals that do not require ethical approval for their study.</p>
</sec>
<sec id="s8" sec-type="author-contributions">
<title>Author contributions</title>
<p>FG: Investigation, Formal analysis, Writing &#x2013; original draft. KX: Software, Formal Analysis, Validation, Data curation, Writing &#x2013; original draft. PL: Writing &#x2013; review &amp; editing. JL: Methodology, Conceptualization, Supervision, Writing &#x2013; review &amp; editing. LJ: Data curation, Investigation, Writing &#x2013; original draft. JW: Investigation, Writing &#x2013; review &amp; editing. QL: Conceptualization, Investigation, Writing &#x2013; review &amp; editing.</p>
</sec>
</body>
<back>
<sec id="s9" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. The research work is financially supported by the key project of Shandong Provincial Natural Science Foundation (ZR2020KC027), the project of National Natural Science Foundation of China (32073029), Shandong Province Modern Agricultural Technology System (SDAIT-22-19) and the Open Program of Key Laboratory of Cultivation and High-value Utilization of Marine Organisms in Fujian Province (2021fjscq08).</p>
</sec>
<sec id="s10" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ahmad</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yahya</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Ahmad</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Nazir</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Haq</surname> <given-names>A. U.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Object detection through modified YOLO neural network</article-title>. <source>Sci. Program.</source> <volume>2020</volume>, <elocation-id>8403262</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1155/2020/8403262</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bao</surname> <given-names>Y. J.</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>C. Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Gu</surname> <given-names>J. L.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Representation of freshwater aquaculture fish behavior in low dissolved oxygen condition based on 3D computer vision32</article-title>. <source>Modern Physics Letters B</source>. <volume>32</volume>(<issue>34n36</issue>), <fpage>1840090</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1142/s0217984918400900</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Bewley</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Ge</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Ott</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Ramos</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Upcroft</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Simple online and realtime tracking</article-title>,&#x201d; in <conf-name>2016 IEEE International Conference on Image Processing (ICIP)</conf-name>. <publisher-name>IEEE</publisher-name>, <fpage>3464</fpage>&#x2013;<lpage>3468</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ICIP.2016.7533003</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bodla</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Singh</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Chellappa</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Davis</surname> <given-names>L. S.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Soft-NMS &#x2014; Improving object detection with one line of code</article-title>. <source>Proceedings of the IEEE international conference on computer vision</source>., <fpage>5562</fpage>&#x2013;<lpage>5570</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1704.04503</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="book">
<person-group person-group-type="author">
<collab>Food and Agriculture Organization of the United Nations</collab>
</person-group> (<year>2020</year>). <article-title>The State of World Fisheries and Aquaculture 2020</article-title>. Available at: <uri xlink:href="https://www.fao.org/documents/card/en?details=CC7493EN">https://www.fao.org/documents/card/en?details=CC7493EN</uri>.</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Fang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>G.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>A novel apple fruit detection and counting methodology based on deep learning and trunk tracking in modern orchard</article-title>. <source>Comput. Electron. Agric.</source> <volume>197</volume>, <elocation-id>107000</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2022.107000</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Song</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Gu</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Spiral generative network for image extrapolation</article-title>. <source>Comput. Vision &#x2013; ECCV</source> <volume>2020</volume>, <fpage>701</fpage>&#x2013;<lpage>717</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-3-030-58529-7_41</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Qian</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Real-time railroad track components inspection based on the improved YOLOv4 framework</article-title>. <source>Automat Constr.</source> <volume>125</volume>, <elocation-id>103596</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.autcon.2021.103596</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Gu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Dai</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2018</year>a). <article-title>Relation networks for object detection</article-title>. <source>Proceedings of the IEEE conference on computer vision and pattern recognition</source>., <fpage>3588</fpage>&#x2013;<lpage>3597</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR.2018.00378</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2018</year>b). &#x201c;<article-title>Squeeze-and-excitation networks</article-title>,&#x201d; in <conf-name>2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition</conf-name>. <fpage>7132</fpage>&#x2013;<lpage>7141</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR.2018.00745</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Real-time detection of uneaten feed pellets in underwater images for aquaculture using an improved YOLO-V4 network</article-title>. <source>Comput. Electron. Agric.</source> <volume>185</volume>, <elocation-id>106135</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2021.106135</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>An improved neural network based on SENet for sleep stage classification</article-title>. <source>IEEE J. Biomed. Health Inf.</source> <volume>26</volume>, <fpage>4948</fpage>&#x2013;<lpage>4956</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JBHI.2022.3157262</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lan</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Tomczak</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Roijers</surname> <given-names>D. M.</given-names>
</name>
<name>
<surname>Eiben</surname> <given-names>A. E.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Time efficiency in optimization with a bayesian-Evolutionary algorithm</article-title>. <source>Swarm Evol. Comput.</source> <volume>69</volume>, <elocation-id>100970</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.swevo.2021.100970</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Du</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Recent advances of deep learning algorithms for aquacultural machine vision systems with emphasis on fish</article-title>. <source>Artif. Intell. Rev.</source> <volume>55</volume>, <fpage>4077</fpage>&#x2013;<lpage>4116</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10462-021-10102-3</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Qi</surname> <given-names>Y. X.</given-names>
</name>
<name>
<surname>Song</surname> <given-names>Z. Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Z. B.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>Y. T.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Assessment of the nutritional value of cultured sea cucumber Apostichopus japonicus</article-title>. <source>J. Aquat. Food Prod. Technol.</source> <volume>30</volume>, <fpage>868</fpage>&#x2013;<lpage>879</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/10498850.2021.1949769</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Deng</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Xiao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Deep learning for visual recognition and detection of aquatic animals: A review</article-title>. <source>Rev. Aquac.</source> <volume>15</volume>, <fpage>409</fpage>&#x2013;<lpage>433</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/raq.12726</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>L. X.</given-names>
</name>
<name>
<surname>Xiao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Deng</surname> <given-names>L. M.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>Z. Z.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Detection and analysis of behavior trajectory for sea cucumbers based on deep learning</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>18832</fpage>&#x2013;<lpage>18840</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/Access.6287639</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Lin</surname> <given-names>T. Y.</given-names>
</name>
<name>
<surname>Doll&#xe1;r</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Hariharan</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Belongie</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Feature pyramid networks for object detection</article-title>,&#x201d; in <conf-name>2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>. <fpage>936</fpage>&#x2013;<lpage>944</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR.2017.106</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Meng</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Zong</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Jellyfish recognition and density calculation based on image processing and deep learning</article-title>,&#x201d; in <conf-name>2020 Chinese Control And Decision Conference (CCDC)</conf-name>. <fpage>922</fpage>&#x2013;<lpage>927</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CCDC49329.2020.9164388</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Qi</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Qin</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Jia</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Path aggregation network for instance segmentation</article-title>,&#x201d; in <conf-name>2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition</conf-name>. <fpage>8759</fpage>&#x2013;<lpage>8768</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR.2018.00913</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A quantitative detection algorithm based on improved faster R-CNN for marine benthos</article-title>. <source>Ecol. Inf.</source> <volume>61</volume>, <elocation-id>101228</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ecoinf.2021.101228</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zeng</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Alsaadi</surname> <given-names>F. E.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A PSO-based deep learning approach to classifying patients from emergency departments</article-title>. <source>Int. J. Mach. Learn. Cybern.</source> <volume>12</volume>, <fpage>1939</fpage>&#x2013;<lpage>1948</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s13042-021-01285-w</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lv</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Shao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Divergent proteomics response of Apostichopus japonicus suffering from skin ulceration syndrome and pathogen infection</article-title>. <source>Comp. Biochem. Physiol. Part D: Genomics Proteomics</source> <volume>30</volume>, <fpage>196</fpage>&#x2013;<lpage>205</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.cbd.2019.03.003</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Real-time personalized health status prediction of lithium-ion batteries using deep transfer learning</article-title>. <source>Energy Environ. Sci.</source> <volume>15</volume>, <fpage>4083</fpage>&#x2013;<lpage>4094</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1039/D2EE01676A</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Park</surname> <given-names>J. S.</given-names>
</name>
<name>
<surname>Oh</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2007</year>). &#x201c;<article-title>Fish disease diagnosis system based on image processing of pathogens&#x2019; Microscopic images</article-title>,&#x201d; in <conf-name>2007 Frontiers in the Convergence of Bioscience and Information Technologies</conf-name>. <publisher-name>IEEE</publisher-name>, <fpage>878</fpage>&#x2013;<lpage>883</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/FBIT.2007.157</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Pinheiro</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Rodriguez-Cassola</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Prats-Iraola</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Reigber</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Krieger</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Moreira</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Reconstruction of coherent pairs of synthetic aperture radar data acquired in interrupted mode</article-title>,&#x201d; in <source>IEEE Transactions on Geoscience and Remote Sensing</source>, vol. <volume>53</volume> (<issue>4</issue>), <fpage>1876</fpage>&#x2013;<lpage>1893</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2014.2350255</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Poyatos</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Molina</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Martinez</surname> <given-names>A. D.</given-names>
</name>
<name>
<surname>Del Ser</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Herrera</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>EvoPruneDeepTL: An evolutionary pruning model for transfer learning based deep neural networks</article-title>. <source>Neural Networks</source>. <volume>158</volume>, <fpage>59</fpage>&#x2013;<lpage>82</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.neunet.2022.10.011</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>L.-Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>S.-Y.</given-names>
</name>
<name>
<surname>Yin</surname> <given-names>H.-N.</given-names>
</name>
<name>
<surname>Yi</surname> <given-names>T.-M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>An improved YOLOv5 method for large objects detection with multi-scale feature cross-layer fusion network</article-title>. <source>Image Vision Comput.</source> <volume>125</volume>, <elocation-id>104518</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.imavis.2022.104518</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Redmon</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Divvala</surname> <given-names>S. K.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R. B.</given-names>
</name>
<name>
<surname>Farhadi</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>You only look once: unified, real-time object detection</article-title>. <source>Proceedings of the IEEE conference on computer vision and pattern recognition.</source>, <fpage>779</fpage>&#x2013;<lpage>788</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR.2016.91</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ru</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Physiological traits of income breeding strategy in the sea cucumber Apostichopus japonicus</article-title>. <source>Aquaculture</source> <volume>539</volume>, <elocation-id>736646</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.aquaculture.2021.736646</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shi</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Nie</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>WJITC</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Multi-object recognition method based on improved YOLOv2 Model</article-title>. <source>Information Technology and Control</source> <volume>50</volume>(<issue>1</issue>), <fpage>13</fpage>&#x2013;<lpage>27</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5755/j01.itc.50.1.25094</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Pan</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Effect of water temperature on diel feeding, locomotion behaviour and digestive physiology in the sea cucumber Apostichopus japonicus</article-title>. <source>J. Exp. Biol.</source> <volume>221</volume>, <fpage>jeb177451</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1242/jeb.177451</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Tan</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Pang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Le</surname> <given-names>Q. V.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>EfficientDet: scalable and efficient object detection</article-title>,&#x201d; in <conf-name>2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>. <fpage>10778</fpage>&#x2013;<lpage>10787</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR42600.2020.01079</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tian</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Apple detection during different growth stages in orchards using the improved YOLO-V3 model</article-title>. <source>Comput. Electron. Agric.</source> <volume>157</volume>, <fpage>417</fpage>&#x2013;<lpage>426</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2019.01.012</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Jin</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Apple stem/calyx real-time recognition using YOLO-v5 algorithm for fruit automatic loading system</article-title>. <source>Postharvest Biol. Technol.</source> <volume>185</volume>, <elocation-id>111808</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.postharvbio.2021.111808</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Chang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Response of bacterial community in sea cucumber Apostichopus japonicus intestine, surrounding water and sediment subjected to high-temperature stress</article-title>. <source>Aquaculture</source> <volume>535</volume>, <elocation-id>736353</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.aquaculture.2021.736353</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Xuan</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Intelligent recognition and behavior tracking of sea cucumber infected with <italic>Vibrio alginolyticus</italic> based on machine vision</article-title>. <source>Aquacult. Eng.</source> <volume>103</volume>, <elocation-id>102368</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.aquaeng.2023.102368</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Shang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Detection and classification of tea buds based on deep learning</article-title>. <source>Comput. Electron. Agric.</source> <volume>192</volume>, <elocation-id>106547</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2021.106547</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Ge</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Analysis of behavior trajectory based on deep learning in ammonia environment for fish</article-title>. <source>Sensors</source> <volume>20</volume> (<issue>16</issue>), <fpage>4425</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s20164425</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>An imaging algorithm for high-resolution imaging sonar system</article-title>. <source>Multimed Tools Appl</source>., <fpage>1</fpage>&#x2013;<lpage>17</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11042-023-16757-0</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Human motion based intent recognition using a deep dynamic neural model</article-title>. <source>Robotics Autonomous Syst.</source> <volume>71</volume>, <fpage>134</fpage>&#x2013;<lpage>149</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.robot.2015.01.001</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yuan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Real-time recognition and warning of mask wearing based on improved YOLOv5 R6.1</article-title>. <source>Int. J. Intell. Syst.</source> <volume>37</volume>, <fpage>9309</fpage>&#x2013;<lpage>9338</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/int.22994</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zeng</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Underwater target detection based on Faster R-CNN and adversarial occlusion network</article-title>. <source>Eng. Appl. Artif. Intell.</source> <volume>100</volume>, <elocation-id>104190</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.engappai.2021.104190</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>An efficient method for the simulation of multireceiver SAS raw signal</article-title>. <source>Multimed Tools Appl</source>., <fpage>1</fpage>&#x2013;<lpage>18</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11042-023-16992-5</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Deep learning for sea cucumber detection using stochastic gradient descent algorithm</article-title>. <source>Eur. J. Remote Sens.</source> <volume>53</volume>, <fpage>53</fpage>&#x2013;<lpage>62</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/22797254.2020.1715265</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Xin</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Unpaired underwater image synthesis with a disentangled representation for underwater depth map prediction</article-title>. <source>Sensors</source> <volume>21</volume>, <elocation-id>3268</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s21093268</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Adaptive neural consensus tracking for nonlinear multiagent systems using finite-time command filtered backstepping</article-title>. <source>IEEE Trans. Syst. Man Cybern.: Syst.</source> <volume>48</volume>, <fpage>2003</fpage>&#x2013;<lpage>2012</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TSMC.2017.2743696</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Not every sample is efficient: Analogical generative adversarial network for unpaired image-to-image translation</article-title>. <source>Neural Networks</source> <volume>148</volume>, <fpage>166</fpage>&#x2013;<lpage>175</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.neunet.2022.01.013</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Sulfated fucan/fucosylated chondroitin sulfate-dominated polysaccharide fraction from low-edible-value sea cucumber ameliorates type 2 diabetes in rats: New prospects for sea cucumber polysaccharide based-hypoglycemic functional food</article-title>. <source>Int. J. Biol. Macromol.</source> <volume>159</volume>, <fpage>34</fpage>&#x2013;<lpage>45</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ijbiomac.2020.05.043</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>
