<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mater.</journal-id>
<journal-title>Frontiers in Materials</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mater.</abbrev-journal-title>
<issn pub-type="epub">2296-8016</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1351938</article-id>
<article-id pub-id-type="doi">10.3389/fmats.2024.1351938</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Materials</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>An modified intelligent real-time crack detection method for bridge based on improved target detection algorithm and transfer learning</article-title>
<alt-title alt-title-type="left-running-head">Yang et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fmats.2024.1351938">10.3389/fmats.2024.1351938</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Yang</surname>
<given-names>Yang</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/1481940/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Long</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Yao</surname>
<given-names>Gang</given-names>
</name>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Du</surname>
<given-names>Hongbo</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Yuxiao</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wu</surname>
<given-names>Linjun</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
</contrib-group>
<aff>
<institution>Key Laboratory of New Technology for Construction of Cities in Mountain Area</institution>, <institution>School of Civil Engineering</institution>, <institution>Chongqing University</institution>, <addr-line>Chongqing</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1868636/overview">Jialuo He</ext-link>, Washington State University, United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1948339/overview">Zidong Xu</ext-link>, Southeast University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2590779/overview">Xiaohua Li</ext-link>, Chongqing University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2600817/overview">Junlin Heng</ext-link>, University of Birmingham, United Kingdom</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Gang Yao, <email>yaogang@cqu.edu.cn</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>05</day>
<month>02</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>11</volume>
<elocation-id>1351938</elocation-id>
<history>
<date date-type="received">
<day>07</day>
<month>12</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>09</day>
<month>01</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Yang, Li, Yao, Du, Chen and Wu.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Yang, Li, Yao, Du, Chen and Wu</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>The combination of UAV camera and intelligent algorithm is a promising method for non-contact bridge crack detection. In this paper, an inspection tool based on UAV Image Acquisition Technology (UAVIAT) and Improved Intelligent Target Detection Technology (IITDT) called Improved Intelligent Real-Time Crack Detection Method for Bridges (IIRTCDMB) is proposed for efficient crack detection. The contributions of this paper are (1) The Squeeze-Excitement (SE) attention module is integrated into the target detection algorithm - You Only Look Once version 7 (YOLOv7) model to improve the learning ability of the feature channel. A Focal-efficient intersection over union (Focal-EIoU) loss function is also introduced to improve the regression accuracy of the model. As a result, a new crack image detection algorithm, YOLOv7-CD, is proposed. (2) A training process based on two-stage transfer learning (TSTL) is established, and hyper-parameter optimization of YOLOv7-CD is carried out. The feasibility and excellent performance of the proposed method are verified by applying it on the Cuntan Yangtze River Bridge. The results show that the average precision (AP) of the YOLOv7-CD model is improved by 3.19% compared with the original YOLOv7 model. After TSTL and hyperparameter optimization, the AP of the YOLOv7-CD model for bridge crack detection reaches 98.01%, which is higher than that of the popular target detection models. The IIRTCDMB proposed in this paper can acquire bridge surface images more safely and efficiently, and provide inspectors with more accurate structural crack information with lower computational and hardware requirements, which can provide technical support for the assessment of structural safety conditions and the formulation of maintenance programs.</p>
</abstract>
<kwd-group>
<kwd>bridge crack detection</kwd>
<kwd>target detection algorithm</kwd>
<kwd>transfer learning</kwd>
<kwd>hyperparameter optimization</kwd>
<kwd>unmanned aerial vehicle</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Structural Materials</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Crack disease is one of the most common diseases in concrete bridges, which has an essential impact on bridges&#x2019; structural stability and traffic capacity. Therefore, visually inspecting structures is important for bridge operation and maintenance (<xref ref-type="bibr" rid="B7">Ge et al., 2020</xref>; <xref ref-type="bibr" rid="B24">Saidin et al., 2022</xref>). With the increase in the service life of bridges, the demand for crack detection is also increasing (<xref ref-type="bibr" rid="B21">Mohan and Poobal, 2018</xref>). Currently, manual inspection and bridge inspection vehicle inspection are the main methods for bridge inspection. Manual detection has high risk, strong subjectivity, and long periods. At the same time, the bridge inspection vehicle method is not conducive to traffic safety, has limited applicable conditions, and is expensive (<xref ref-type="bibr" rid="B30">Tomiczek et al., 2019</xref>).</p>
<p>Recently, the rapid development of unmanned aerial vehicles (UAVs) has made collecting images of bridge conditions trivial (<xref ref-type="bibr" rid="B22">Perry et al., 2020</xref>). Its high-resolution camera can quickly and safely obtain high-definition images, which serves as an important basis for subsequent analysis of bridge defects. It has been confirmed that the UAS-based bridge inspection is faster and more objective than the existing technology (<xref ref-type="bibr" rid="B15">Kim et al., 2022</xref>). Unmanned aerial vehicles (UAVs) are now a viable option for augmenting bridge inspections (<xref ref-type="bibr" rid="B13">Khaloo et al., 2018</xref>; <xref ref-type="bibr" rid="B16">Li et al., 2024</xref>). Using UAVs for bridge appearance inspection not only has high accuracy and low cost (<xref ref-type="bibr" rid="B26">Seo et al., 2018</xref>) but can also perform all-around inspections (<xref ref-type="bibr" rid="B25">Sanchez-Cuevas et al., 2019</xref>; <xref ref-type="bibr" rid="B32">Wang et al., 2020</xref>; <xref ref-type="bibr" rid="B43">Yao et al., 2023a</xref>) and reduce the danger in the inspection work (<xref ref-type="bibr" rid="B19">Liu et al., 2020</xref>; <xref ref-type="bibr" rid="B12">Kao et al., 2022</xref>).</p>
<p>Currently, there are some things that could be improved in using traditional digital image processing methods to detect cracks. In the crack identification and extraction process, the main methods are the gray threshold segmentation method based on the gray difference between the crack area and the background (<xref ref-type="bibr" rid="B39">Xuhang et al., 2011</xref>) and the Canny iterative method based on the linear features of cracks (<xref ref-type="bibr" rid="B36">Xu et al., 2013</xref>). The gray threshold segmentation method can only give the general position of the cracks, and the positioning accuracy is insufficient. The Canny iterative method is susceptible to background clutter, resulting in a high rate of false detection and missed detection. With the evolution of machine learning technology, some scholars have implemented crack detection using crack multi-features combined with three statistical classification methods, namely, Support Vector Machine, AdaBoost, and Random Forest, respectively (<xref ref-type="bibr" rid="B23">Prasanna et al., 2016</xref>). However, these methods require the manual design of crack features, resulting in poor adaptability and scalability of the algorithm. The tensor voting algorithm (<xref ref-type="bibr" rid="B8">Guan et al., 2015</xref>) that enhances crack features by utilizing the linear difference between cracks and background noise has good results in detecting linear cracks, but it is not sensitive to complex cracks and may miss width information.</p>
<p>Deep learning has shown excellent performance in image recognition (<xref ref-type="bibr" rid="B14">Kim et al., 2018</xref>; <xref ref-type="bibr" rid="B34">Wei et al., 2019</xref>; <xref ref-type="bibr" rid="B2">Chen et al., 2020</xref>; <xref ref-type="bibr" rid="B28">Sun et al., 2021</xref>; <xref ref-type="bibr" rid="B41">Yang et al., 2021</xref>; <xref ref-type="bibr" rid="B1">Chen et al., 2023</xref>). Crack detection algorithms based on deep learning can be divided into two categories. The first category uses object detection networks for crack localization and identification, while the other uses semantic segmentation models for pixel-level recognition of crack images. Research has shown that the You Only Look Once (YOLO) series algorithm performs outstandingly among many object detection networks (<xref ref-type="bibr" rid="B4">Du et al., 2021</xref>). In the YOLO model, integrating the attention module or improving the feature extraction network can enhance the sensitivity of the model to the target features (<xref ref-type="bibr" rid="B48">Yao et al., 2019</xref>; <xref ref-type="bibr" rid="B42">Yang et al., 2022a</xref>; <xref ref-type="bibr" rid="B18">Liu et al., 2022</xref>; <xref ref-type="bibr" rid="B52">Zhang et al., 2023a</xref>; <xref ref-type="bibr" rid="B11">Kao et al., 2023</xref>); combining the depth-separable convolution or replacing the lightweight feature extraction network, a lightweight target detection network for real-time detection of cracks on the structure surface can be obtained (<xref ref-type="bibr" rid="B53">Zhang et al., 2020a</xref>; <xref ref-type="bibr" rid="B47">Yao et al., 2021a</xref>; <xref ref-type="bibr" rid="B40">Yang et al., 2022b</xref>; <xref ref-type="bibr" rid="B50">Zhang et al., 2022</xref>; <xref ref-type="bibr" rid="B51">Zhang et al., 2023b</xref>; <xref ref-type="bibr" rid="B10">Jin et al., 2023</xref>); introducing the focal loss function or transfer learning can improve the recognition accuracy of the model. Deep learning has been gradually applied to bridge crack detection (<xref ref-type="bibr" rid="B49">Zhang et al., 2020b</xref>; <xref ref-type="bibr" rid="B46">Yao et al., 2021b</xref>; <xref ref-type="bibr" rid="B29">Teng et al., 2022</xref>). However, there are still problems, such as difficulty in obtaining crack images, excessive training parameters of network models, long inference time, and low detection accuracy.</p>
<p>In order to obtain bridge surface crack images more efficiently and improve the model&#x2019;s detection accuracy of cracks, this study proposed an improved intelligent real-time crack detection method for bridges (IIRTCDMB) based on UAVIAT and improved intelligent target detection technology (IITDT). First, the appearance image of the bridge to be detected was obtained through UAVs. Then, the SE attention module was introduced into the YOLOv7 model to enhance the feature extraction ability of cracks, and the Focal-efficient intersection over union (Focal-EIoU) loss function was used to balance positive and negative samples and accelerate loss convergence. The improved model was defined as YOLOv7-CD. To further improve the average precision (AP), the improved model underwent two-stage transfer learning (TSTL) training. The initial training was performed with the COCO2017 dataset to obtain the initial training weights. Then, the publicly available CRACK500 dataset (<xref ref-type="bibr" rid="B6">Eisenbach et al., 2017</xref>) was used for pre-training with different hyperparameters to obtain pre-training weights. Finally, the training is performed on the bridge crack dataset (QL_CRACK dataset), and the error is reduced by adjusting the values of hyperparameters to make the model more suitable for bridge crack detection.</p>
</sec>
<sec id="s2">
<title>2 Methodologies</title>
<p>The IIRTCDMB proposed in this article mainly includes high-definition image acquisition by UAVs and automatic localization and recognition of bridge cracks based on the YOLOv7-CD model. The specific process is shown in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>The diagram of the improved intelligent real-time crack detection method for bridges.</p>
</caption>
<graphic xlink:href="fmats-11-1351938-g001.tif"/>
</fig>
<p>In the UAVIAT, bridge information review, site risk assessment, flight plan development, and daily environmental monitoring will be conducted. Then, the UAV will be selected, and parameter settings such as shooting distance and flight route will be completed. After camera calibration, the bridge structure&#x2019;s appearance will be captured, and an automatic naming program will be written to assign position and number to each image. The image quality will be judged based on the proposed image quality evaluation function. The images with qualified quality will be stored in the database according to the preset cycle, and finally, the bridge crack dataset will be obtained. The YOLOv7-CD model will be trained with TSTL and hyperparameter optimization. Finally, cracks will be detected in the images, and their corresponding locations in the actual bridge will be obtained.</p>
<sec id="s2-1">
<title>2.1 Image acquisition method</title>
<sec id="s2-1-1">
<title>2.1.1 UAV selection</title>
<p>Currently, the UAVs mainly used for crack detection include DJ Mavic 2 Pro, DJ M210-RTK, DJ Mini 2, and DJ Mavic Air 2 (<xref ref-type="bibr" rid="B38">Xu et al., 2023a</xref>). Due to the large span of bridges inspected by UAVs, the main parameters to consider are hovering time and hovering accuracy. Additionally, since it involves storing appearance images of large-span bridges, there is also a high requirement for memory. Through an analysis of the parameters of the four UAVs, DJ Mavic Air 2 is the best option. Its hover time is 33 min, hover accuracy is &#xb1; 0.1, and memory size is 8 GB. To ensure consistent experimental conditions, the distance between the UAV and the target surface is fixed at 5.0m, and the normal direction of the lens is perpendicular to the target surface. The images will be taken in clear weather conditions and natural lighting. To ensure sufficient image data, the UAV will cover the entire bridge deck and tower surface, with a 50% overlap in the images taken.</p>
</sec>
<sec id="s2-1-2">
<title>2.1.2 Camera calibration</title>
<p>In the process of picture-taking and crack detection of concrete bridges, a geometric model of camera imaging is required to determine the interrelationship between the three-dimensional geometric position of a point on the surface of the bridge structure and its corresponding point in the image. In the UAVIAT, the calibration of camera parameters is the key link, and its calculation process and calibration results directly affect the authenticity of the crack detection results. The conversion model of the ground coordinate system to the pixel coordinate system is introduced below.<list list-type="simple">
<list-item>
<p>1) Transformation from ground coordinate system to camera coordinate system.</p>
</list-item>
</list>
</p>
<p>In order to accurately describe the motion trajectory of the UAV and obtain its position information, a transformation model from the ground coordinate system <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>G</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>G</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mi>G</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> to the camera coordinate system <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> has been established. They represent the angle of camera rotation around the X<sub>G,</sub> Y<sub>G,</sub> and Z<sub>G</sub> axes, respectively (<xref ref-type="fig" rid="F2">Figure 2</xref>). Then, the rotation matrix R from the ground coordinate system to the image coordinate system can be obtained, as shown in Eq. <xref ref-type="disp-formula" rid="e1">1</xref>.<disp-formula id="e1">
<mml:math id="m3">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mn>1</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mi>cos</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mi>sin</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>sin</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mi>cos</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>cos</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>sin</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mn>1</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>sin</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mi>cos</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>cos</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>&#x3b5;</mml:mi>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mi>sin</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>&#x3b5;</mml:mi>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>sin</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>&#x3b5;</mml:mi>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mi>cos</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>&#x3b5;</mml:mi>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mn>1</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>UAV motion state description parameters: <bold>(A)</bold> Rotation around XG axis; <bold>(B)</bold> Rotation around YG axis; <bold>(C)</bold> Rotation around ZG axis.</p>
</caption>
<graphic xlink:href="fmats-11-1351938-g002.tif"/>
</fig>
<p>In addition to the rotation transformation, there is also a translation transformation between the UAV and the ground. Therefore, the coordinate transformation matrix from the ground coordinate system to the camera coordinate system is given by Eq. <xref ref-type="disp-formula" rid="e2">2</xref>.<disp-formula id="e2">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mi>R</mml:mi>
</mml:mtd>
<mml:mtd>
<mml:mi>T</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mi>O</mml:mi>
</mml:mtd>
<mml:mtd>
<mml:mn>1</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>where <inline-formula id="inf3">
<mml:math id="m5">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>x</mml:mi>
</mml:msub>
</mml:mtd>
<mml:mtd>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>y</mml:mi>
</mml:msub>
</mml:mtd>
<mml:mtd>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>z</mml:mi>
</mml:msub>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the translation transformation matrix. <inline-formula id="inf4">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>x</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf5">
<mml:math id="m7">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>y</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf6">
<mml:math id="m8">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>z</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represent the displacement of the UAV in the X<sub>G</sub>, Y<sub>G,</sub> and Z<sub>G</sub> directions, respectively.<list list-type="simple">
<list-item>
<p>2) Transformation from camera coordinate system to image coordinate system.</p>
</list-item>
</list>
</p>
<p>As shown in <xref ref-type="fig" rid="F3">Figure 3A</xref>, the transformation from the camera coordinate system to the image coordinate system <inline-formula id="inf7">
<mml:math id="m9">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>Y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>Z</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> follows the pinhole imaging principle. The Z<sub>C</sub> axis of the camera coordinate system is on the same line as the <italic>Z</italic>-axis of the image coordinate system. P is a point in space, P<sub>1</sub> is its projection point in the plane X<sub>C</sub>O<sub>C</sub>Y<sub>C</sub>, P<sub>2</sub> is the imaging point of P in the XOY plane of the image coordinate system, f is the focal length, and z is the distance from point P to point P<sub>1</sub>. The conversion matrix F from camera coordinates to image coordinates is given by Eq. <xref ref-type="disp-formula" rid="e3">3</xref>.<disp-formula id="e3">
<mml:math id="m10">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mo>/</mml:mo>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mo>/</mml:mo>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mn>1</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
<list list-type="simple">
<list-item>
<p>3) Transformation from image coordinate system to pixel coordinate system.</p>
</list-item>
</list>
</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Coordinate transformation: <bold>(A)</bold> Transformation from camera coordinate system to image coordinate system; <bold>(B)</bold> Transformation from image coordinate system to pixel coordinate system.</p>
</caption>
<graphic xlink:href="fmats-11-1351938-g003.tif"/>
</fig>
<p>The pixel coordinate system reflects the arrangement of pixels in the CMOS chip of the camera, as shown in <xref ref-type="fig" rid="F3">Figure 3B</xref>. The image coordinate system and the pixel coordinate system <inline-formula id="inf8">
<mml:math id="m11">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> are in a translation relationship, and the transformation matrix D between them is shown in Eq. <xref ref-type="disp-formula" rid="e4">4</xref>.<disp-formula id="e4">
<mml:math id="m12">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>/</mml:mo>
<mml:mi>d</mml:mi>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>/</mml:mo>
<mml:mi>d</mml:mi>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mn>1</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>where <inline-formula id="inf9">
<mml:math id="m13">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> is the coordinate of the image coordinate system origin in the pixel coordinate system; <inline-formula id="inf10">
<mml:math id="m14">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf11">
<mml:math id="m15">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represent the physical dimensions of the pixel in the <italic>X</italic> and <italic>Y</italic> directions, respectively.</p>
<p>In summary, the formula for transforming the ground coordinate system to the pixel coordinate system is shown in Eq. <xref ref-type="disp-formula" rid="e5">5</xref>. Eq. <xref ref-type="disp-formula" rid="e6">6</xref> represents the calculation of the intrinsic parameter matrix of the camera.<disp-formula id="e5">
<mml:math id="m16">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mi>u</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mi>v</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>1</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>D</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>F</mml:mi>
<mml:mi>g</mml:mi>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>G</mml:mi>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>G</mml:mi>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mi>G</mml:mi>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>1</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mi>g</mml:mi>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>G</mml:mi>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>G</mml:mi>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mi>G</mml:mi>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>1</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
<disp-formula id="e6">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>D</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>F</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mo>/</mml:mo>
<mml:mi>z</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mo>/</mml:mo>
<mml:mi>z</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mn>1</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>where <inline-formula id="inf12">
<mml:math id="m18">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the homogeneous coordinate in the pixel coordinate system, and <inline-formula id="inf13">
<mml:math id="m19">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>G</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>G</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mi>G</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the homogeneous coordinate in the ground coordinate system. M<sub>1</sub> is the external parameter matrix of the camera, and M<sub>2</sub> is the internal parameter matrix of the camera.</p>
</sec>
<sec id="s2-1-3">
<title>2.1.3 Image quality assessment</title>
<p>In this article, entropy and sharpness are utilized as two image quality parameters to assess the quality of the images captured from the UAV.10 (<xref ref-type="bibr" rid="B5">Duque et al., 2018</xref>). The formula for judging the image quality is given by Eq. <xref ref-type="disp-formula" rid="e7">7</xref> and Eq. <xref ref-type="disp-formula" rid="e8">8</xref>. The criteria for judging the image quality are:<list list-type="simple">
<list-item>
<p>1) The image sharpness is greater than the average value of sharpness.</p>
</list-item>
<list-item>
<p>2) The image sharpness is less than the average value of sharpness, but the image entropy is less than the average value of entropy.</p>
</list-item>
</list>
</p>
<p>The image satisfying any of these points is considered a qualified image.<disp-formula id="e7">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2265;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mn>8</mml:mn>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>/</mml:mo>
<mml:mi>d</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
<disp-formula id="e8">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mn>8</mml:mn>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:msub>
<mml:mfrac>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mn>255</mml:mn>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2061;</mml:mo>
<mml:msub>
<mml:mi>log</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>where N is the total number of captured bridge images, <inline-formula id="inf14">
<mml:math id="m22">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the sharpness of the <italic>k</italic>th image, <inline-formula id="inf15">
<mml:math id="m23">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the total number of pixels in the image, <inline-formula id="inf16">
<mml:math id="m24">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the amplitude of the grayscale variation, and <inline-formula id="inf17">
<mml:math id="m25">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the distance increment between pixels. <inline-formula id="inf18">
<mml:math id="m26">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>/</mml:mo>
<mml:mi>d</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the gradient vector between pixels, which is calculated based on the pixel values of the eight neighboring pixels for each pixel. <inline-formula id="inf19">
<mml:math id="m27">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the occurrence probability of the gray value <inline-formula id="inf20">
<mml:math id="m28">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> in the image, which is obtained from the grayscale histogram.</p>
</sec>
</sec>
<sec id="s2-2">
<title>2.2 Crack detection</title>
<sec id="s2-2-1">
<title>2.2.1 YOLOv7-based crack</title>
<p>The bridge surface images that meet the quality requirements are input into the crack detection model YOLOv7. In the beginning, the input image enters the main feature extraction network Backbone, which performs crack feature extraction by convolutional normalization and activation function. In the feature extraction process, the image will be compressed in height and width first, then the channel expansion will be performed, and finally, the three effective crack feature layers will be formed. Then, the three effective crack feature layers obtained at Backbone will be fused by the FPN framework, which aims to combine the crack feature information at different scales for enhanced feature extraction of the image. FPN performs up-sampling and down-sampling on the crack features to achieve the fusion of crack features. After passing through the Backbone and FPN, three enhanced effective crack feature layers will be obtained. Each crack feature layer has a width, height, and number of channels. At this point, the crack feature map can be viewed as a collection of feature points, with three prior boxes at each feature point, each of which has the same number of crack features as the number of channels. Eventually, the RepConv structure is introduced in the Head part to equivalent the complex residual structure to a normal 3 &#xd7; 3 convolution. This can reduce the complexity of the network while ensuring the same prediction performance.</p>
</sec>
<sec id="s2-2-2">
<title>2.2.2 Attention module</title>
<p>The task of locating and identifying cracks focuses on details such as background color and crack position. Therefore, in order to enhance the model&#x2019;s perception of crack features and improve the detection effect on small targets, three SE channel attention modules were added to the YOLOv7 model. The SE attention module has three steps:<list list-type="simple">
<list-item>
<p>1) Using adaptive global average pooling to compress the length and width of the crack feature layer, leaving only the information of the channel dimension C.</p>
</list-item>
<list-item>
<p>2) Continuously using two fully connected layers to perform self-attention on the channel information and obtaining a feature map with a dimension of 1&#x2a;1&#x2a;C.</p>
</list-item>
<list-item>
<p>3) Performing activation by channel-wise multiplication with weight coefficients on the feature map with channel attention (1&#x2a;1&#x2a;C) and the original input feature map (H&#x2a;W&#x2a;C), and finally outputting a feature map with channel attention.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s2-2-3">
<title>2.2.3 Loss functions</title>
<p>Neural network model training is the process of optimizing the parameters in the network and reducing the losses using a backpropagation algorithm. Loss is the penalty for inaccurate predictions during the training process and describes the difference between the model&#x2019;s predicted results and the actual results. Eq. <xref ref-type="disp-formula" rid="e9">9</xref> is the formula for calculating the loss function of YOLOv7 during the training process.<disp-formula id="e9">
<mml:math id="m29">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>where <inline-formula id="inf21">
<mml:math id="m30">
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the total loss of the model, <inline-formula id="inf22">
<mml:math id="m31">
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the bounding box regression loss used to measure the deviation between the predicted and ground truth crack boxes, <inline-formula id="inf23">
<mml:math id="m32">
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the confidence loss used to measure the accuracy of crack localization, and <inline-formula id="inf24">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the classification loss used to measure the accuracy of predicting the presence of cracks. The calculation formulas for <inline-formula id="inf25">
<mml:math id="m34">
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf26">
<mml:math id="m35">
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are shown in Eq. <xref ref-type="disp-formula" rid="e10">10</xref> and Eq. <xref ref-type="disp-formula" rid="e11">11</xref>.<disp-formula id="e10">
<mml:math id="m36">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mi>B</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:msubsup>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msubsup>
<mml:mi>C</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:msubsup>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mover accent="true">
<mml:msubsup>
<mml:mi>C</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:msubsup>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>C</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>log</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:msubsup>
<mml:mi>C</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:msubsup>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mspace width="2.5em"/>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3bb;</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mi>B</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:msubsup>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msubsup>
<mml:mi>C</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:msubsup>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mover accent="true">
<mml:msubsup>
<mml:mi>C</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:msubsup>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>C</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>log</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:msubsup>
<mml:mi>C</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:msubsup>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>
<disp-formula id="e11">
<mml:math id="m37">
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:msubsup>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msubsup>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:msubsup>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mover accent="true">
<mml:msubsup>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:msubsup>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>log</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:msubsup>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:msubsup>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>where <inline-formula id="inf27">
<mml:math id="m38">
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf28">
<mml:math id="m39">
<mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> are the priori box and the feature map scale. <inline-formula id="inf29">
<mml:math id="m40">
<mml:mrow>
<mml:msubsup>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf30">
<mml:math id="m41">
<mml:mrow>
<mml:msubsup>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> denote crack and no crack in the <italic>j</italic>th prior box of the <italic>i</italic>th grid. <inline-formula id="inf31">
<mml:math id="m42">
<mml:mrow>
<mml:msubsup>
<mml:mi>C</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf32">
<mml:math id="m43">
<mml:mrow>
<mml:mover accent="true">
<mml:msubsup>
<mml:mi>C</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:msubsup>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> denote the confidence of the predicted and labeled box. <inline-formula id="inf33">
<mml:math id="m44">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3bb;</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the weight coefficient. <inline-formula id="inf34">
<mml:math id="m45">
<mml:mrow>
<mml:msubsup>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf35">
<mml:math id="m46">
<mml:mrow>
<mml:mover accent="true">
<mml:msubsup>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:msubsup>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> denote the classification probability of the predicted and labeled box.</p>
<p>When calculating the regression loss, the EIoU was used instead of complete intersection over union (CIoU) to consider the effects of overlapping area, centroid distance, and aspect ratio of the target and prediction boxes simultaneously. The EIoU takes into account the width and height loss, which minimizes the difference between the width and height of the target and prediction boxes, thereby accelerating convergence and improving regression accuracy. Since cracks are small and occupy a small proportion of the background, the focal loss function is introduced to balance the proportion of foreground and background data samples (<xref ref-type="bibr" rid="B17">Lin et al., 2020</xref>; <xref ref-type="bibr" rid="B31">Wang et al., 2023a</xref>). Finally, the Focal-EIoU loss function is obtained, as shown in Eq. <xref ref-type="disp-formula" rid="e12">12</xref> and Eq. <xref ref-type="disp-formula" rid="e13">13</xref>.<disp-formula id="e12">
<mml:math id="m47">
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mi>&#x3c1;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>w</mml:mi>
</mml:msub>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>w</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>w</mml:mi>
</mml:msub>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>
<disp-formula id="e13">
<mml:math id="m48">
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>E</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:msup>
<mml:mi>U</mml:mi>
<mml:mi>&#x3b3;</mml:mi>
</mml:msup>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>where <inline-formula id="inf36">
<mml:math id="m49">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the ratio of the intersection of the area of the target box and the predicted box to the concatenated set. <inline-formula id="inf37">
<mml:math id="m50">
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf38">
<mml:math id="m51">
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf39">
<mml:math id="m52">
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> are the coordinates of the center point, width, and height of the predicted box, respectively. <inline-formula id="inf40">
<mml:math id="m53">
<mml:mrow>
<mml:msup>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf41">
<mml:math id="m54">
<mml:mrow>
<mml:msup>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf42">
<mml:math id="m55">
<mml:mrow>
<mml:msup>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> are the coordinates of the center point, width, and height of the target box, respectively. <inline-formula id="inf43">
<mml:math id="m56">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3c1;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the distance between two center coordinates. <inline-formula id="inf44">
<mml:math id="m57">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>w</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf45">
<mml:math id="m58">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the minimum width and height of the outlier boxes of the target and prediction boxes. <inline-formula id="inf46">
<mml:math id="m59">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a parameter controlling the degree of outlier suppression.</p>
</sec>
<sec id="s2-2-4">
<title>2.2.4 YOLOv7-CD model</title>
<p>The YOLOv7 model, which integrates the SE attention module and the Focal-EIoU Loss, is named YOLOv7-CD, as shown in <xref ref-type="fig" rid="F4">Figure 4</xref>. The red part indicates the added SE attention module, and the red arrow represents the changed computation path.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>YOLOv7-CD model architecture.</p>
</caption>
<graphic xlink:href="fmats-11-1351938-g004.tif"/>
</fig>
</sec>
</sec>
<sec id="s2-3">
<title>2.3 Two-stage transfer learning</title>
<p>The hyperparameters for extracting different features in the same neural network model have good interoperability. In order to improve the training efficiency and prediction accuracy of the model, TSTL is used in this study. The transfer learning process is shown in <xref ref-type="fig" rid="F5">Figure 5</xref>. In Stage 1, the initial training weights are obtained by initializing the model parameters on the COCO2017 dataset. In Stage 2, the model backbone network is frozen, and the batch normalization layer is not updated during feature transfer to reduce the model error and to ensure the transfer effect. The pre-trained model parameters are adjusted by training on the publicly available CRACK500 dataset, which is a pavement crack dataset suitable for target detection. In Stage 3, freeze training and then thaw training is performed on the QL_CRACK dataset.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Two-stage transfer learning training diagram.</p>
</caption>
<graphic xlink:href="fmats-11-1351938-g005.tif"/>
</fig>
</sec>
</sec>
<sec id="s3">
<title>3 Experiments</title>
<sec id="s3-1">
<title>3.1 Image acquisition</title>
<p>The selected research object of this article is the Cuntan Yangtze River Bridge, which starts from Huangjuewan Interchange in the south, crosses the Yangtze River, and ends at Happy Valley Interchange in the north. The total length of the line is 1.6 km, and the main bridge is 880 m long and 42 m wide.</p>
<p>The DJ Mavic Air 2 UAV was used for bridge crack image acquisition in the experiment, which has high vertical hovering accuracy and horizontal hovering accuracy (<xref ref-type="bibr" rid="B45">Yao et al., 2022</xref>). The resolution of captured photos can reach up to 8000&#x2a;6000, and its high pixels can meet the data requirements of the experiment. In addition, the excellent endurance and stable flight speed of this UAV can ensure the efficiency of image acquisition.</p>
<p>The flight path of DJ Mavic Air 2 is shown in <xref ref-type="fig" rid="F6">Figure 6</xref>.<list list-type="simple">
<list-item>
<p>1) Flew along the path A-B-C-D on the upper bridge deck, then flew to the lower bridge deck to shoot along the same route. After shooting the bridge deck, retrieved the UAV.</p>
</list-item>
<list-item>
<p>2) Flew from the top point E on one side of the bridge tower pylon to the bottom endpoint F, shot the four sides of the pylon back and forth, then flew to another pylon and flew up from the bottom endpoint G to the top point H. Repeated the appeal operation until all the pylons were shot and retrieved the UAV (this path was E-F-G-H).</p>
</list-item>
<list-item>
<p>3) The bridge pier shooting path (I-J-K-L) was similar to the bridge tower. Started flying from the top point I on one side of a bridge pier to the bottom endpoint J. After shooting four sides, flew to the next pier and repeated the aforementioned shooting operation. Finally, retrieved the UAV.</p>
</list-item>
</list>
</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>UAV shooting route diagram.</p>
</caption>
<graphic xlink:href="fmats-11-1351938-g006.tif"/>
</fig>
<p>After each retrieval of the UAV, the acquired image information was read, and the images were numbered in the order in which they were taken. To obtain clear images of cracks, raw images with occlusions were consciously avoided, and images with occlusions were carefully screened and removed during the dataset creation process.</p>
</sec>
<sec id="s3-2">
<title>3.2 Dataset creation</title>
<p>In this experiment, a total of 466 raw images were acquired by DJ Mavic Air 2. In order to improve the training efficiency of the model, this article does some processing on the raw images. A raw image with a resolution of 8000 &#xd7; 6000 was segmented into 713 sample images of 256 &#xd7; 256, and then 10000 crack images that meet the criteria are selected as training sample images among the sample images according to the image quality ranking from high to low to create the QL_CRACK dataset. To determine the location of the crack images in the bridge structure, the images were named according to &#x201c;bridge structure - raw image number - segmented image number&#x201d;. The bridge structure includes a bridge deck (BD), a bridge tower (BT), and a bridge pier (BP). After obtaining the QL_CRACK dataset, Lamblmg software was used to annotate the image crack areas, and 10,000 annotation files in XML format were obtained after annotation. The COCO2017 public dataset and the publicly available CRACK500 crack dataset were downloaded from the internet as the datasets for transfer learning, with a total of 163,957 images in the COCO2017 dataset and 3,368 images in the CRACK500 dataset. To evaluate the generalization ability of the YOLOv7-CD model, 80% of the 10,000 images were used as the training and validation sets (with 80% of the training set and 20% of the validation set), and 20% were used as the test set according to the five-fold cross-validation principle.</p>
</sec>
<sec id="s3-3">
<title>3.3 Model training</title>
<p>The training process of the network model in this study was implemented in the Pytorch deep learning framework built in Windows 11, NVIDIA GeForce RTX 3060 was used for the GPU, AMD Ryzen 7 5800H with Radeon Graphics at 3.20 GHz was used for the CPU, CUDA11.0 and CUDNN8.0 were selected for the calculation platform, RAM specification was 16 GB, and the Deep learning framework was built by PyTorch 1.7.1. Development environment was based on Visual Studio Code 1.73, python3.9.</p>
<p>When training the model, the batch size can only be set to 2, 4, and 8 due to the limitation of the experimental platforms, and a small batch size setting can lead to a large model error and slow down the training speed during the batch normalization operation (<xref ref-type="bibr" rid="B9">Ioffe and Szegedy, 2015</xref>; <xref ref-type="bibr" rid="B35">Wu and He, 2018</xref>). However, the TSTL approach adopted in this study can solve this problem well. As shown in <xref ref-type="fig" rid="F5">Figure 5</xref>, the approach first trained on the COCO2017 dataset to obtain the initial weights; then froze the backbone network and trained 50 epochs on the CRACK500 dataset to obtain the pre-training weights; and finally froze the backbone network to train 50 epochs on the QL_CRACK dataset and then unfroze it to train 250 epochs.</p>
<p>In order to compare the performance of the YOLOv7-CD model under different conditions, a total of 16 sets of working conditions were set up for comparison experiments, which were (SGD10<sup>&#x2212;2.0</sup>)LR-(2 or 4 or 8)BS-(0)TL, (Adam10<sup>&#x2212;2.0</sup>)LR-(2 or 4 or 8)BS-(0)TL, (SGD10<sup>&#x2212;5.0</sup>)LR-(4)BS-(0) TL, (SGD10<sup>&#x2212;4.0</sup>)LR-(4)BS-(0)TL, (SGD10<sup>&#x2212;3.0</sup>)LR-(4)BS-(0)TL, (SGD10<sup>&#x2212;1.0</sup>)LR-(4)BS-(0)TL, (Adam10<sup>&#x2212;5.0</sup>)LR-(4)BS-(0)TL, (Adam10<sup>&#x2212;4.0</sup>)LR-(4)BS-(0)TL, (Adam10<sup>&#x2212;3.0</sup>)LR-(4)BS-(0)TL, (Adam10<sup>&#x2212;1.0</sup>)LR-(4)BS-(0)TL and (Adam10<sup>&#x2212;3.0</sup>)LR-(4)BS-(1or2)TL. &#x201c;LR&#x201d; and &#x201c;BS&#x201d; represent the learning rate and batch size, respectively. &#x201c;SGD&#x201d; and &#x201c;Adam&#x201d; are shorthand for stochastic gradient descent algorithm and adaptive descent algorithm, respectively. The corresponding parameters are in the front brackets. For example, &#x201c;(SGD10<sup>&#x2212;2.0</sup>)LR-(2)BS&#x201d; indicates that the SGD learning rate optimization algorithm is chosen with an initial learning rate of 10<sup>&#x2212;2</sup> and a batch size of 2. &#x201c;(0)TL&#x201d; denotes TSTL training, &#x201c;(1)TL&#x201d; denotes no second stage of transfer learning training, and &#x201c;(2)TL&#x201d; denotes no TSTL was performed.</p>
</sec>
</sec>
<sec sec-type="results|discussion" id="s4">
<title>4 Results and discussion</title>
<p>In this article, Precision, Recall, F1, and AP were selected as accuracy evaluation indexes, and frames per second (FPS) were selected as model inference speed evaluation indexes to analyze the performance of the YOLOv7-CD model.</p>
<p>Before introducing the evaluation metrics, we should first introduce the confusion matrix. The confusion matrix itself is a rough evaluation of the prediction results, which can give us a macro understanding of the prediction results and the original data. We will also use the data in the confusion matrix to calculate the evaluation index.</p>
<p>The confusion matrix has four compartments that contain all the possible scenarios of the prediction result when we make a binary prediction.</p>
<p>True Positive (abbreviated as TP) means that the sample is actually Positive and the model predicts the sample as Positive.</p>
<p>True Negative (abbreviated as TN) means that the sample is actually Negative and the model predicts the sample as Negative.</p>
<p>False Positive (abbreviated as FP) means that the sample is actually Negative, but the model predicts it as Positive.</p>
<p>False Negative (abbreviated as FN) means that the sample is actually Positive, but the model predicts it to be Negative.</p>
<p>Precision, also known as the check rate, indicates the proportion of samples predicted to be Positive that are actually Positive. Precision can be seen as a measure of quality. Higher precision means that an algorithm returns more relevant results than irrelevant ones.</p>
<p>The formula is:<disp-formula id="e14">
<mml:math id="m60">
<mml:mrow>
<mml:mtext>Precision</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mtext>TP</mml:mtext>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
<mml:mo>&#x2b;</mml:mo>
<mml:mtext>FP</mml:mtext>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>
</p>
<p>Recall, also known as the rate of checking for completeness, indicates the proportion of the number of actual positive samples in the positive sample that the prediction results in a positive sample to the proportion of positive samples in the full sample. Recall can be seen as a measure of quantity. High recall means that an algorithm returns most of the relevant results (whether or not irrelevant ones are also returned).</p>
<p>The formula is:<disp-formula id="e15">
<mml:math id="m61">
<mml:mrow>
<mml:mtext>Recall</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mtext>TP</mml:mtext>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
<mml:mo>&#x2b;</mml:mo>
<mml:mtext>FN</mml:mtext>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>
</p>
<p>AP is the area under the Precision-recall curve, usually the better a classifier is, the higher the AP value is.</p>
<p>F1 is a weighted average of precision and recall.</p>
<p>The formula for F1 is as follows:<disp-formula id="e16">
<mml:math id="m62">
<mml:mrow>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="normal">P</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi mathvariant="normal">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="normal">R</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(16)</label>
</disp-formula>
</p>
<sec id="s4-1">
<title>4.1 Comparative results of YOLOv7 and YOLOv7-CD</title>
<p>To verify the effectiveness of the improvement of the YOLOv7 model, the AP and FPS before and after the improvement with default parameters (batch size &#x3d; 4, initial learning rate &#x3d; 0.01 and SGD learning rate optimization algorithm) were compared, and the comparison results are shown in <xref ref-type="table" rid="T1">Table 1</xref>. Although the inference speed did not change after the model improvement, Precision, Recall, F1, and AP were improved. Among them, Precision improved by 5.50%, Recall improved by 4.24%, F1 improved by 0.09, and AP improved by 3.19%. This indicates that integrating the SE attention module and introducing the Focal-EIoU loss function in the YOLOv7 model can improve detection accuracy. The AP value before model improvement is 94.60% and the AP value after model improvement is 97.79%.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Performance comparison before and after model improvement.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Method</th>
<th align="center">Precision (%)</th>
<th align="center">Recall (%)</th>
<th align="center">F1</th>
<th align="center">AP (%)</th>
<th align="center">FPS</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">YOLOv7</td>
<td align="center">89.20</td>
<td align="center">92.07</td>
<td align="center">0.86</td>
<td align="center">94.60</td>
<td align="center">52</td>
</tr>
<tr>
<td align="center">YOLOv7-CD</td>
<td align="center">94.70</td>
<td align="center">96.31</td>
<td align="center">0.95</td>
<td align="center">97.79</td>
<td align="center">52</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s4-2">
<title>4.2 Hyperparameter optimization results</title>
<p>The parameters of the neural network are the internal variables of the neural network model, such as weights (w) and bias (b), and they can be obtained by training. The hyperparameters of the neural network are the external parameters of the model, such as learning rate, batch size, number of hidden layers, number of hidden layer units, activation function, momentum, etc. These parameters cannot be obtained from training and must be set manually, and they will affect the values of the obtained parameters w and b. The hyperparameters of the neural network have an important impact on the prediction results, and the appropriate hyperparameters will greatly improve the accuracy and efficiency of the network model.</p>
<p>In structured data, optimizing the batch size and learning rate are effective ways to achieve good performance in deep learning networks. Within a certain range, increasing batch size can improve convergence stability and reduce training time, but as the batch size increases, the number of iterations per epoch decreases, and the model&#x2019;s accuracy decreases accordingly. The impact of the learning rate on model performance is reflected in two aspects: the size of the initial learning rate and the optimization algorithm for the learning rate. The initial learning rate usually has an optimal value. When the initial learning rate is too small, the model converges slowly, and when it is too large, the model does not converge. The convergence of the model will be different when different learning rate optimization algorithms are chosen. In this article, two optimizers, SGD and Adam, were used to investigate (<xref ref-type="bibr" rid="B27">Shafi and Assad, 2023</xref>).</p>
<p>In order to make the model more suitable for bridge crack detection, this method conducts comparison experiments on three variables, batch size, learning rate, and optimization algorithm. In the experiments, the minimum learning rate was always 0.01 times the initial learning rate. A loss function is used to determine the convergence of the model during the hyperparameter optimization.</p>
<sec id="s4-2-1">
<title>4.2.1 Batch size optimization</title>
<p>The larger the batch size, the higher the GPU performance requirements and the batch size is usually a power of 2 (<xref ref-type="bibr" rid="B3">Dong et al., 2021</xref>). Therefore, in this experiment, the batch size was set to 2, 4, and 8 based on the actual hardware configuration. And the performance of two learning rate optimization algorithms, SGD and Adam, was compared simultaneously. To ensure the reliability of batch size optimization, the initial learning rate was set to 0.01, and 50 epochs were trained in the freezing phase and 250 epochs in the thawing phase on the QL_CRACK dataset. <xref ref-type="fig" rid="F7">Figure 7</xref> shows the relationship between Precision, Recall, F1, and AP with different batch sizes for different learning rate optimization algorithms. The blue color represents the SGD optimization algorithm, and the grey color represents the Adam optimization algorithm. The results are also summarized in <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Variation of Precision, Recall, F1, and AP with batch size for different learning rate optimization algorithms: <bold>(A)</bold> Variation of Precision with batch size; <bold>(B)</bold> Variation of Recall with batch size; <bold>(C)</bold> Variation of F1 with batch size; <bold>(D)</bold> Variation of AP with batch size.</p>
</caption>
<graphic xlink:href="fmats-11-1351938-g007.tif"/>
</fig>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Precision, Recall, F1, and AP for different working conditions.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Number</th>
<th align="center">Working condition</th>
<th align="center">Precision (%)</th>
<th align="center">Recall (%)</th>
<th align="center">F1</th>
<th align="center">AP (%)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">1</td>
<td align="center">(SGD10<sup>&#x2212;2.0</sup>)LR-(2)BS-(0)TL</td>
<td align="center">94.11</td>
<td align="center">95.66</td>
<td align="center">0.95</td>
<td align="center">96.71</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">(SGD10<sup>&#x2212;2.0</sup>)LR-(4)BS-(0)TL</td>
<td align="center">94.70</td>
<td align="center">96.31</td>
<td align="center">0.95</td>
<td align="center">97.79</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">(SGD10<sup>&#x2212;2.0</sup>)LR-(8)BS-(0)TL</td>
<td align="center">95.51</td>
<td align="center">95.66</td>
<td align="center">0.96</td>
<td align="center">97.82</td>
</tr>
<tr>
<td align="center">4</td>
<td align="center">(Adam10<sup>&#x2212;2.0</sup>)LR-(2)BS-(0)TL</td>
<td align="center">93.92</td>
<td align="center">94.86</td>
<td align="center">0.94</td>
<td align="center">96.88</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">(Adam10<sup>&#x2212;2.0</sup>)LR-(4)BS-(0)TL</td>
<td align="center">94.99</td>
<td align="center">96.56</td>
<td align="center">0.96</td>
<td align="center">97.66</td>
</tr>
<tr>
<td align="center">6</td>
<td align="center">(Adam10<sup>&#x2212;2.0</sup>)LR-(8)BS-(0)TL</td>
<td align="center">94.40</td>
<td align="center">96.80</td>
<td align="center">0.96</td>
<td align="center">97.76</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Combined with <xref ref-type="fig" rid="F7">Figure 7</xref> and <xref ref-type="table" rid="T2">Table 2</xref>, it can be seen that Precision is the largest for Number 3 and Number 5, with 95.51% and 94.99%, respectively; Recall is the largest for Number 5 and Number 6, with 96.56% and 96.80%, respectively; F1 is the largest for Number 3, Number 5 and Number 6, all with 0.96. When the batch size is 4 and 8, the precision, recall, and F1 are all higher than when the batch size is 2.</p>
<p>When the batch size is increased from 2 to 4, the AP for SGD and Adam optimization algorithms improved by 1.08% and 0.76%, respectively, while when the batch size is increased from 4 to 8, the AP only improved by 0.03% and 0.1%, respectively. This indicates that the AP obtained by different optimization algorithms (SGD and Adam) have similar trends with the batch size, both of which have a large change in the batch size from 2 to 4 and a small change in the batch size from 4 to 8. The AP for the SGD and Adam optimization algorithms are the largest at a batch size of 8, but the difference with a batch size of 4 is small. Considering both the saving of video memory and the speed of training, the batch size of the freezing phase was set to 8, and the training batch size of the thawing phase was set to 4 during the experiment.</p>
</sec>
<sec id="s4-2-2">
<title>4.2.2 Learning rate optimization</title>
<p>The learning rate affects how fast the algorithm converges to the regionally minimal value. A suitable learning rate allows the algorithm to descend in the direction of the maximum gradient in appropriate steps, and the learning rate can be effectively optimized by the decreasing gradient of the loss function. Since the YOLOv7 model uses the learning rate optimization algorithm, only the initial learning rate and the optimization algorithm can be considered in the optimization (<xref ref-type="bibr" rid="B33">Wang et al., 2023b</xref>).</p>
<p>In this manuscript, the range of the initial learning rate was set from 10<sup>&#x2212;5.0</sup> to 10<sup>&#x2212;1.0,</sup> with a step size of 10 set in sequence (<xref ref-type="bibr" rid="B20">Mayr et al., 2018</xref>; <xref ref-type="bibr" rid="B37">Xu et al., 2023b</xref>; <xref ref-type="bibr" rid="B44">Yao et al., 2023b</xref>). The minimum learning rate was set to 0.01 times the initial learning rate. The batch size follows the optimal solution mentioned above (batch size &#x3d; 4), and the Epoch is set to 100. <xref ref-type="table" rid="T3">Table 3</xref> shows the training loss under different working conditions. <xref ref-type="fig" rid="F8">Figures 8A, B</xref> show the convergence of the loss function values with the initial learning rate for the SGD and Adam optimization algorithms, respectively. From <xref ref-type="fig" rid="F8">Figure 8A</xref>, it can be seen that the loss function converges fastest when the initial learning rate is set to 10<sup>&#x2212;3</sup> when the SGD optimization algorithm is used, and the loss function fluctuates more in the early stage when the initial learning rate is 10<sup>&#x2212;1</sup>, indicating that the learning rate is set too large at this time and the model does not converge well. From <xref ref-type="fig" rid="F8">Figure 8B</xref>, it can be seen that the convergence curves of the loss function are smooth when the Adam optimization algorithm is used, but in the convergence process, the loss function values of each working condition have the following relationships: Number 14&#x3e;Number 5, 11and 12&#x3e;No. 14. When the initial learning rate is 10<sup>&#x2212;3</sup>, the model convergence effect is obviously better than other working conditions.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Train loss under different working conditions.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Number</th>
<th align="center">Working condition</th>
<th align="center">Train loss</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">7</td>
<td align="center">(SGD10<sup>&#x2212;5.0</sup>)LR-(4)BS-(0)TL</td>
<td align="center">0.0221</td>
</tr>
<tr>
<td align="center">8</td>
<td align="center">(SGD10<sup>&#x2212;4.0</sup>)LR-(4)BS-(0)TL</td>
<td align="center">0.0187</td>
</tr>
<tr>
<td align="center">9</td>
<td align="center">(SGD10<sup>&#x2212;3.0</sup>)LR-(4)BS-(0)TL</td>
<td align="center">0.0175</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">(SGD10<sup>&#x2212;2.0</sup>)LR-(4)BS-(0)TL</td>
<td align="center">0.0194</td>
</tr>
<tr>
<td align="center">10</td>
<td align="center">(SGD10<sup>&#x2212;1.0</sup>)LR-(4)BS-(0)TL</td>
<td align="center">0.0224</td>
</tr>
<tr>
<td align="center">11</td>
<td align="center">(Adam10<sup>&#x2212;5.0</sup>)LR-(4)BS-(0)TL</td>
<td align="center">0.0135</td>
</tr>
<tr>
<td align="center">12</td>
<td align="center">(Adam10<sup>&#x2212;4.0</sup>)LR-(4)BS-(0)TL</td>
<td align="center">0.0130</td>
</tr>
<tr>
<td align="center">13</td>
<td align="center">(Adam10<sup>&#x2212;3.0</sup>)LR-(4)BS-(0)TL</td>
<td align="center">0.0100</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">(Adam10<sup>&#x2212;2.0</sup>)LR-(4)BS-(0)TL</td>
<td align="center">0.0129</td>
</tr>
<tr>
<td align="center">14</td>
<td align="center">(Adam10<sup>&#x2212;1.0</sup>)LR-(4)BS-(0)TL</td>
<td align="center">0.0175</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Learning rate optimization process: <bold>(A)</bold> loss variation of SGD optimization algorithm with different initial learning rates; <bold>(B)</bold> loss variation of Adam optimization algorithm with different initial learning rates; <bold>(C)</bold> Variation of the loss function with initial learning rate.</p>
</caption>
<graphic xlink:href="fmats-11-1351938-g008.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="F8">Figure 8C</xref> shows the variation of the loss function with the initial learning rate when the SGD and Adam optimization algorithms were used. Combining <xref ref-type="fig" rid="F8">Figure 8C</xref> and <xref ref-type="table" rid="T3">Table 3</xref>, it can be seen that the minimum loss is 0.0175 for an initial learning rate of 10<sup>&#x2212;3</sup>, and the maximum is 0.0224 for an initial learning rate of 10<sup>&#x2212;1</sup> when the SGD optimization algorithm was used. And the minimum loss is 0.0100 for an initial learning rate of 10<sup>&#x2212;3</sup>, and the maximum is 0.0175 for an initial learning rate of 10<sup>&#x2212;1</sup> when the Adam optimization algorithm was used. The trend of the loss curve with the initial learning rate is similar for the SGD and Adam optimization algorithms, both decreasing first and then increasing, and there is an optimal initial learning rate. In addition, the loss functions of the Adam optimization algorithm are lower than those of the SGD optimization algorithm. The optimal result is that the learning rate optimization algorithm is set to Adam, and the initial learning rate is set to 0.001.</p>
</sec>
</sec>
<sec id="s4-3">
<title>4.3 Transfer learning results</title>
<p>In order to demonstrate the enhancement effect brought by the TSTL approach, a comparison experiment was conducted. The AP under different working conditions are counted in <xref ref-type="table" rid="T4">Table 4</xref>. &#x201c;(0)TL&#x201d; represents TSTL, &#x201c;(1)TL&#x201d; represents only transfer learning stage 1, and &#x22; (2)TL&#x201d; represents no transfer learning. The AP of YOLOv7-CD is 98.01% with TSTL, 97.75% with only transfer learning stage 1, and 96.23% with the model without transfer learning.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>AP under different transfer learning conditions.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Number</th>
<th align="center">Working condition</th>
<th align="center">AP (%)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">13</td>
<td align="center">(Adam10<sup>&#x2212;3.0</sup>)LR-(4)BS-(0)TL</td>
<td align="center">98.01</td>
</tr>
<tr>
<td align="center">15</td>
<td align="center">(Adam10<sup>&#x2212;3</sup>.0)LR-(4)BS-(1)TL</td>
<td align="center">97.75</td>
</tr>
<tr>
<td align="center">16</td>
<td align="center">(Adam10<sup>&#x2212;3.0</sup>)LR-(4)BS-(2)TL</td>
<td align="center">96.23</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>AP of the YOLOv7-CD model with TSTL is 98.01%, AP of the YOLOv7-CD model with only transfer learning stage 1 is 97.75%, AP of the YOLOv7-CD model without transfer learning is 96.23%. For more visual observation of the effect of transfer learning, <xref ref-type="fig" rid="F9">Figure 9</xref> shows the convergence of the loss function for different transfer learning cases. It can be seen from <xref ref-type="fig" rid="F9">Figure 9</xref> that there is a relationship of &#x201c;(2)TL &#x3c; (1)TL &#x3c; (0)TL&#x201d; in the training loss during convergence, indicating that both phases of transfer learning can reduce the value of the loss function of the model and thus improve the model accuracy.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>YOLOv7-CD model convergence process under different transfer learning cases.</p>
</caption>
<graphic xlink:href="fmats-11-1351938-g009.tif"/>
</fig>
</sec>
<sec id="s4-4">
<title>4.4 Crack detection results</title>
<p>In YOLO, confidence is a value between 0 and 1 that indicates how sure the model is about the detected target (<xref ref-type="bibr" rid="B56">Yang et al., 2022c</xref>; <xref ref-type="bibr" rid="B58">Yang et al., 2022d</xref>; <xref ref-type="bibr" rid="B55">Yang et al., 2023a</xref>; <xref ref-type="bibr" rid="B57">Yang et al., 2023b</xref>; <xref ref-type="bibr" rid="B59">Yang et al., 2023c</xref>; <xref ref-type="bibr" rid="B54">Yang et al., 2024</xref>). This method has a good effect on bridge crack detection, and some of the detection results are shown in <xref ref-type="fig" rid="F10">Figure 10</xref>. The naming rule in the figure is &#x201c;bridge structure location - original image number - segmented image number&#x201d; For example, &#x201c;BR-095&#x2013;164&#x201d; means the crack is located in the 164th segmented image of the 95th captured image of the bridge deck. This is a good way to determine the location of the crack in the bridge structure. The red box in the image indicates the location of the crack, and the confidence level is marked in the lower left corner.</p>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>Bridge crack detection results: <bold>(A)</bold> BD-095-164; <bold>(B)</bold> BD-095-168; <bold>(C)</bold> BD-107-017; <bold>(D)</bold> BD-105-13; <bold>(E)</bold> BP-400-093; <bold>(F)</bold> BP-400-094; <bold>(G)</bold> BT-380-283; <bold>(H)</bold> BT-380-059.</p>
</caption>
<graphic xlink:href="fmats-11-1351938-g010.tif"/>
</fig>
<p>From <xref ref-type="fig" rid="F10">Figure 10</xref>, it can be seen that YOLOv7-CD model has a good detection effect on transverse cracks, vertical cracks, and oblique cracks. The mean confidence rate is 0.83, ranging from 0.60 to 0.96. The cracks in <xref ref-type="fig" rid="F10">Figures 10A, E</xref>, and <xref ref-type="fig" rid="F10">Figure 10H</xref> all have certain curvature, similar to U-shape, and their confidence levels are above 0.9. For the oblique cracks, whether it is the southwest-northeast-trending crack in <xref ref-type="fig" rid="F10">Figures 10B, G</xref> or the northwest-southeast-trending crack &#x201c;crack-2&#x201d; in <xref ref-type="fig" rid="F10">Figure 10D</xref>, the confidence level is above 0.8. In contrast, for the transverse crack in <xref ref-type="fig" rid="F10">Figure 10F</xref> and the vertical crack in <xref ref-type="fig" rid="F10">Figure 10C</xref>, the confidence levels are below 0.8. The lowest confidence level is for transverse and vertical cracks, higher for diagonal cracks, and the highest confidence level is for U-shaped cracks, indicating that the model believes that bridge cracks should be more irregularly oriented through crack feature learning. Two cracks are present in <xref ref-type="fig" rid="F10">Figure 10D</xref>, and the model accurately identifies the number and extent of cracks with confidence levels of 0.70 and 0.83. For fine cracks, the confidence level is 0.92 in <xref ref-type="fig" rid="F10">Figure 10A</xref> and 0.64 in <xref ref-type="fig" rid="F10">Figure 10C</xref>; for wide cracks, the confidence level is 0.93 in <xref ref-type="fig" rid="F10">Figure 10E</xref> and 0.79 in <xref ref-type="fig" rid="F10">Figure 10F</xref>. This indicates that the width of the crack does not have a significant effect on the confidence level of crack detection. Overall, YOLOv7-CD can quickly and accurately locate and identify cracks in the acquired images.</p>
</sec>
<sec id="s4-5">
<title>4.5 Comparison of different models</title>
<p>The improved model in this study was compared with five currently common target detection networks (YOLOv4, YOLOv5m, YOLOv4-tiny, and MobileNet-SSD) for experiments. The AP, number of parameters (&#x23;Param), computation volume (FLOPs), and FPS were used as evaluation metrics, and the comparison results are tallied in <xref ref-type="table" rid="T5">Table 5</xref>.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Performance comparison of different target detection models.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Model</th>
<th align="center">AP (%)</th>
<th align="center">&#x23;Param (M)</th>
<th align="center">FLOPs (GMacs)</th>
<th align="center">FPS</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">YOLOv4</td>
<td align="center">95.50</td>
<td align="center">64.00</td>
<td align="center">63.92</td>
<td align="center">16</td>
</tr>
<tr>
<td align="center">YOLOv5m</td>
<td align="center">85.58</td>
<td align="center">21.40</td>
<td align="center">51.30</td>
<td align="center">39</td>
</tr>
<tr>
<td align="center">YOLOv4-tiny</td>
<td align="center">72.22</td>
<td align="center">5.90</td>
<td align="center">4.31</td>
<td align="center">56</td>
</tr>
<tr>
<td align="center">MobileNet-SSD</td>
<td align="center">84.28</td>
<td align="center">8.85</td>
<td align="center">12.40</td>
<td align="center">48</td>
</tr>
<tr>
<td align="center">YOLOv7-CD</td>
<td align="center">98.01</td>
<td align="center">37.65</td>
<td align="center">17.04</td>
<td align="center">52</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>From <xref ref-type="table" rid="T5">Table 5</xref>, the AP of YOLOv4, YOLOv5m, YOLOv4-tiny, MobileNet-SSD, and YOLOv7-CD are 95.5%, 85.58%, 72.22%, 84.28%, and 98.01%, respectively. The prediction accuracy of YOLOv7-CD is higher than the remaining four target detection models. The parameters of YOLOv4, YOLOv5m, YOLOv4-tiny, MobileNet-SSD, and YOLOv7-CD are 64, 21.4, 5.9, 8.85, and 37.65 M, respectively; the computational volume were 63.92, 51.3, 4.31, 12.4 and 17.04 GMacs, respectively; the ratio of parameter number to the computational volume were 1.00, 0.42, 1.37, 0.71, and 2.21, respectively.</p>
<p>YOLOv7-CD has the largest ratio of parameters to computation volume, and its higher number of parameters maintains a lower computation volume, which will ensure its fast inference capability to a certain extent. The FPS of YOLOv4, YOLOv5m, YOLOv4-tiny, MobileNet-SSD, and YOLOv7-CD are 16, 39, 56, 48, and 52, respectively. YOLOv4-tiny has the fastest inference speed of 56, followed by 52 for YOLOv7-CD. In terms of inference speed, YOLOv7-CD is 7.6% lower than YOLOv4-tiny, but in terms of AP, YOLOv7-CD is 25.79% higher than YOLOv4-tiny. Therefore, among the five object detection models, YOLOv7-CD has the best comprehensive performance in both prediction accuracy and inference speed.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s5">
<title>5 Conclusion</title>
<p>In response to the current problems in intelligent bridge crack detection, such as difficulties in acquiring high-quality crack images, long inference time of network models, and detection accuracy to be improved, this article proposes an IIRTCDMB based on UAVIAT and IITDT.</p>
<p>The method proposed in this article can effectively detect cracks with different morphologies and complex backgrounds and has strong robustness as well as background noise filtering capability, which can reduce the problems of noise interference and blurring of UAV images due to the influence of environmental conditions. The adopted crack image naming method can quickly find out the actual location where the cracks appear and maintain them in time.</p>
<p>In order to verify the excellent performance of this method, it was applied to the Cuntan Yangtze River Bridge, and the following conclusions were obtained:<list list-type="simple">
<list-item>
<p>1) The YOLOv7-CD model, which integrates the SE attention module and introduces the Focal EIOU loss function, has an AP improvement of 3.19% compared to the original YOLOv7 model. The comparison experiments of transfer learning show that both stages of transfer learning can reduce the loss of model convergence.</p>
</list-item>
<list-item>
<p>2) The hyperparameter optimization of the YOLOv7-CD model can reduce the model loss to a certain degree and improve the accuracy of the model in detecting bridge cracks. The model performs best when the batch size is 8, the initial learning rate is 0.001, and the learning rate optimization algorithm is Adam. Because the AP difference between the batch size of 4 and 8 is small, the batch size of the freezing phase is set to 8, and the batch size of the thawing phase is set to 4 in order to save computational performance.</p>
</list-item>
<list-item>
<p>3) In the crack localization and identification results, the confidence level of horizontal and vertical cracks is the lowest, the confidence level of oblique cracks is higher, and the confidence level of U-shaped cracks is the highest, which indicates that the model believes that the bridge cracks should be more irregularly oriented through crack feature learning. The average value of the crack detection confidence is 0.83. Overall, the YOLOv7-CD model can quickly and accurately perform crack location and identification on the acquired images.</p>
</list-item>
<list-item>
<p>4) Under the conditions of this article, the AP of the YOLOv7-CD model is 98.01%, and the FPS is 52. Its comprehensive performance is all the better than the current popular target detection models YOLOv4, YOLOv5m, YOLOv4-tiny, and MobileNet-SSD.</p>
</list-item>
</list>
</p>
<p>In conclusion, the method proposed in this paper solves the current problems of difficult crack image acquisition and high cost of image labeling while improving the performance of the model. The model can focus on the relationship between pixels, improve the robustness of the model, and reduce the time cost of sample labeling. In the future, the model can be further optimized toto improve the segmentation ability of the model for crack edges and small cracks, and to improve the generalization ability of the model.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The data analyzed in this study is subject to the following licenses/restrictions: The datasets used and analyzed during the current study are available from the corresponding author on reasonable request. Requests to access these datasets should be directed to GY, <email>yaogang@cqu.edu.cn</email>.</p>
</sec>
<sec id="s7">
<title>Author contributions</title>
<p>YY: Writing&#x2013;review and editing, Conceptualization, Methodology. LL: Writing&#x2013;original draft, Formal Analysis, Software. GY: Writing&#x2013;review and editing, Conceptualization, Methodology. HD: Writing&#x2013;original draft. YC: Writing&#x2013;original draft. LW: Writing&#x2013;original draft.</p>
</sec>
<sec sec-type="funding-information" id="s8">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This research was funded by Chongqing City Infrastructure Construction Investment Co., Ltd. Grant number CQCT-JSA-GC-2021-0140 and Central Universities Basic Research Operating Expenses (2023CDJXY-031).</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The reviewer [XL] declared a shared affiliation with the authors to the handling editor at the time of review.</p>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This research was funded by Chongqing City Infrastructure Construction Investment Co., Ltd. Grant number CQCT-JSA-GC-2021-0140 and Central Universities Basic Research Operating Expenses (2023CDJXY-031). The funder had the following involvement in the study: data collection and analysis.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>L. K.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>W. X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhai</surname>
<given-names>C. C.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>X. L.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>L. L.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Convolutional neural networks (CNNs)-based multi-category damage detection and recognition of high-speed rail (HSR) reinforced concrete (RC) bridges using test images</article-title>. <source>Eng. Struct.</source> <volume>276</volume>, <fpage>115306</fpage>. <pub-id pub-id-type="doi">10.1016/j.engstruct.2022.115306</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>X. X.</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>C. C.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Bridge damage detection and recognition based on deep learning</article-title>. <source>J. Phys. Conf. Ser.</source> <volume>1626</volume> <fpage>012151</fpage> <pub-id pub-id-type="doi">10.1088/1742-6596/1626/1/012151</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dong</surname>
<given-names>X. P.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>J. B.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W. G.</given-names>
</name>
<name>
<surname>Shao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ling</surname>
<given-names>H. B.</given-names>
</name>
<name>
<surname>Porikli</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Dynamical hyperparameter optimization via deep reinforcement learning in tracking</article-title>. <source>Ieee Trans. Pattern Analysis Mach. Intell.</source> <volume>43</volume> (<issue>5</issue>), <fpage>1515</fpage>&#x2013;<lpage>1529</lpage>. <pub-id pub-id-type="doi">10.1109/tpami.2019.2956703</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Du</surname>
<given-names>Y. C.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Z. H.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>F. W.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Kang</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Pavement distress detection and classification based on YOLO network</article-title>. <source>Int. J. Pavement Eng.</source> <volume>22</volume> (<issue>13</issue>), <fpage>1659</fpage>&#x2013;<lpage>1672</lpage>. <pub-id pub-id-type="doi">10.1080/10298436.2020.1714047</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Duque</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Seo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wacker</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Bridge deterioration quantification protocol using UAV</article-title>. <source>J. Bridge Eng.</source> <volume>23</volume> (<issue>10</issue>). <pub-id pub-id-type="doi">10.1061/(asce)be.1943-5592.0001289</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Eisenbach</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Stricker</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Seichter</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Amende</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Debes</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Sesselmann</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). &#x201c;<article-title>How to get pavement distress detection ready for deep learning? A systematic approach</article-title>,&#x201d; in <conf-name>Proceedings of the 2017 International Joint Conference on Neural Networks (IJCNN)</conf-name>, <conf-loc>Anchorage, AK, USA</conf-loc>, <conf-date>May 2017</conf-date>.</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ge</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Dan</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>An accurate and robust monitoring method of full-bridge traffic load distribution based on YOLO-v3 machine vision</article-title>. <source>Struct. Control Health Monit.</source> <volume>27</volume> (<issue>12</issue>). <pub-id pub-id-type="doi">10.1002/stc.2636</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guan</surname>
<given-names>H. Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>Y. T.</given-names>
</name>
<name>
<surname>Chapman</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H. Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Iterative tensor voting for pavement crack extraction using mobile laser scanning data</article-title>. <source>Ieee Trans. Geoscience Remote Sens.</source> <volume>53</volume> (<issue>3</issue>), <fpage>1527</fpage>&#x2013;<lpage>1537</lpage>. <pub-id pub-id-type="doi">10.1109/tgrs.2014.2344714</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Ioffe</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Szegedy</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Batch normalization: accelerating deep network training by reducing internal covariate shift</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/1502.03167">https://arxiv.org/abs/1502.03167</ext-link>.</comment>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jin</surname>
<given-names>Q. L.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>Q. B.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>N. A.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>Y. F.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A deep learning and morphological method for concrete cracks detection</article-title>. <source>J. Circuits Syst. Comput.</source> <volume>32</volume>. <pub-id pub-id-type="doi">10.1142/s0218126623502717</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kao</surname>
<given-names>S. P.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>Y. C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>F. L.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Combining the YOLOv4 deep learning model with UAV imagery processing technology in the extraction and quantization of cracks in bridges</article-title>. <source>Sensors</source> <volume>23</volume> (<issue>5</issue>), <fpage>2572</fpage>. <pub-id pub-id-type="doi">10.3390/s23052572</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kao</surname>
<given-names>S.-P.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>F.-L.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>J.-S.</given-names>
</name>
<name>
<surname>Tsai</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chu</surname>
<given-names>Y.-D.</given-names>
</name>
<name>
<surname>Hung</surname>
<given-names>P.-S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Bridge crack inspection efficiency of an unmanned aerial vehicle system with a laser ranging module</article-title>. <source>Sensors</source> <volume>22</volume> (<issue>12</issue>), <fpage>4469</fpage>. <pub-id pub-id-type="doi">10.3390/s22124469</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khaloo</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Lattanzi</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Cunningham</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Dell&#x27;Andrea</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Riley</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Unmanned aerial vehicle inspection of the Placer River Trail Bridge through image-based 3D modelling</article-title>. <source>Struct. Infrastructure Eng.</source> <volume>14</volume> (<issue>1</issue>), <fpage>124</fpage>&#x2013;<lpage>136</lpage>. <pub-id pub-id-type="doi">10.1080/15732479.2017.1330891</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>I. H.</given-names>
</name>
<name>
<surname>Jeon</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Baek</surname>
<given-names>S. C.</given-names>
</name>
<name>
<surname>Hong</surname>
<given-names>W. H.</given-names>
</name>
<name>
<surname>Jung</surname>
<given-names>H. J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Application of crack identification techniques for an aging concrete bridge inspection using an unmanned aerial vehicle</article-title>. <source>Sensors</source> <volume>18</volume> (<issue>6</issue>), <fpage>1881</fpage>. <pub-id pub-id-type="doi">10.3390/s18061881</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>I.-H.</given-names>
</name>
<name>
<surname>Yoon</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>J. H.</given-names>
</name>
<name>
<surname>Jung</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cho</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Jung</surname>
<given-names>H.-J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A comparative study of bridge inspection and condition assessment between manpower and a UAS</article-title>. <source>Drones</source> <volume>6</volume> (<issue>11</issue>), <fpage>355</fpage>. <pub-id pub-id-type="doi">10.3390/drones6110355</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Xue</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Qin</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Multiobject real-time automatic detection method for production quality control of prefabricated laminated slabs</article-title>. <source>J. Constr. Eng. Manag.</source> <volume>150</volume> (<issue>3</issue>), <fpage>05023017</fpage>. <pub-id pub-id-type="doi">10.1061/jcemd4.coeng-14089</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>T.-Y.</given-names>
</name>
<name>
<surname>Goyal</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Girshick</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Dollar</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Focal loss for dense object detection</article-title>. <source>IEEE Trans. pattern analysis Mach. Intell.</source> <volume>42</volume> (<issue>2</issue>), <fpage>318</fpage>&#x2013;<lpage>327</lpage>. <pub-id pub-id-type="doi">10.1109/tpami.2018.2858826</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>L. S.</given-names>
</name>
<name>
<surname>Ke</surname>
<given-names>C. Y.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Research on pedestrian detection algorithm based on MobileNet-YoLo</article-title>. <source>Comput. Intell. Neurosci.</source> <volume>2022</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1155/2022/8924027</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Y.-F.</given-names>
</name>
<name>
<surname>Nie</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>J.-S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.-G.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Image-based crack assessment of bridge piers using unmanned aerial vehicles and three-dimensional scene reconstruction</article-title>. <source>Computer-Aided Civ. Infrastructure Eng.</source> <volume>35</volume> (<issue>5</issue>), <fpage>511</fpage>&#x2013;<lpage>529</lpage>. <pub-id pub-id-type="doi">10.1111/mice.12501</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mayr</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Klambauer</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Unterthiner</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Steijaert</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wegner</surname>
<given-names>J. K.</given-names>
</name>
<name>
<surname>Ceulemans</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Large-scale comparison of machine learning methods for drug target prediction on ChEMBL</article-title>. <source>Chem. Sci.</source> <volume>9</volume> (<issue>24</issue>), <fpage>5441</fpage>&#x2013;<lpage>5451</lpage>. <pub-id pub-id-type="doi">10.1039/c8sc00148k</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mohan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Poobal</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Crack detection using image processing: a critical review and analysis</article-title>. <source>Alexandria Eng. J.</source> <volume>57</volume> (<issue>2</issue>), <fpage>787</fpage>&#x2013;<lpage>798</lpage>. <pub-id pub-id-type="doi">10.1016/j.aej.2017.01.020</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Perry</surname>
<given-names>B. J.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Atadero</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>van de Lindt</surname>
<given-names>J. W.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Streamlined bridge inspection system utilizing unmanned aerial vehicles (UAVs) and machine learning</article-title>. <source>Measurement</source> <volume>164</volume>, <fpage>108048</fpage>. <pub-id pub-id-type="doi">10.1016/j.measurement.2020.108048</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Prasanna</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Dana</surname>
<given-names>K. J.</given-names>
</name>
<name>
<surname>Gucunski</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Basily</surname>
<given-names>B. B.</given-names>
</name>
<name>
<surname>La</surname>
<given-names>H. M.</given-names>
</name>
<name>
<surname>Lim</surname>
<given-names>R. S.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Automated crack detection on concrete bridges</article-title>. <source>Ieee Trans. Automation Sci. Eng.</source> <volume>13</volume> (<issue>2</issue>), <fpage>591</fpage>&#x2013;<lpage>599</lpage>. <pub-id pub-id-type="doi">10.1109/tase.2014.2354314</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Saidin</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Jamadin</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Abdul Kudus</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mohd Amin</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Anuar</surname>
<given-names>M. A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>An overview: the application of vibration-based techniques in bridge structural health monitoring</article-title>. <source>Int. J. Concr. Struct. Mater.</source> <volume>16</volume> (<issue>1</issue>), <fpage>69</fpage>. <pub-id pub-id-type="doi">10.1186/s40069-022-00557-1</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sanchez-Cuevas</surname>
<given-names>P. J.</given-names>
</name>
<name>
<surname>Ramon-Soria</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Arrue</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Ollero</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Heredia</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Robotic system for inspection by contact of bridge beams using UAVs</article-title>. <source>Sensors</source> <volume>19</volume> (<issue>2</issue>), <fpage>305</fpage>. <pub-id pub-id-type="doi">10.3390/s19020305</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Seo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Duque</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wacker</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Drone-enabled bridge inspection methodology and application</article-title>. <source>Automation Constr.</source> <volume>94</volume>, <fpage>112</fpage>&#x2013;<lpage>126</lpage>. <pub-id pub-id-type="doi">10.1016/j.autcon.2018.06.006</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Shafi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Assad</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Exploring the relationship between learning rate, batch size, and epochs in deep learning: an experimental study</article-title>,&#x201d; in <source>Soft computing for problem solving</source> (<publisher-loc>Singapore</publisher-loc>: <publisher-name>Springer Nature Singapore</publisher-name>).</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>Y. J.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>F. J.</given-names>
</name>
<name>
<surname>Wong</surname>
<given-names>M. P.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Autonomous crack and bughole detection for concrete surface image based on deep learning</article-title>. <source>Ieee Access</source> <volume>9</volume>, <fpage>85709</fpage>&#x2013;<lpage>85720</lpage>. <pub-id pub-id-type="doi">10.1109/access.2021.3088292</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Teng</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z. C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X. D.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Improved YOLOv3-based bridge surface defect detection by combining high- and low-resolution feature images</article-title>. <source>Buildings</source> <volume>12</volume> (<issue>8</issue>), <fpage>1225</fpage>. <pub-id pub-id-type="doi">10.3390/buildings12081225</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tomiczek</surname>
<given-names>A. P.</given-names>
</name>
<name>
<surname>Whitley</surname>
<given-names>T. J.</given-names>
</name>
<name>
<surname>Bridge</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Ifju</surname>
<given-names>P. G.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Bridge inspections with small unmanned aircraft systems: case studies</article-title>. <source>J. Bridge Eng.</source> <volume>24</volume> (<issue>4</issue>). <pub-id pub-id-type="doi">10.1061/(asce)be.1943-5592.0001376</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>C. Y.</given-names>
</name>
<name>
<surname>Bochkovskiy</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Liao</surname>
<given-names>H. Y. M.</given-names>
</name>
</person-group> (<year>2023a</year>). &#x201c;<article-title>YOLOv7: trainable bag-of-freebies sets new state-of-the-art for real-time object detectors</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>, <conf-loc>Vancouver, CANADA</conf-loc>.</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>H.-F.</given-names>
</name>
<name>
<surname>Zhai</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>L.-M.</given-names>
</name>
<name>
<surname>Mu</surname>
<given-names>K.-N.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>G.-p.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Measurement for cracks at the bottom of bridges based on tethered creeping unmanned aerial vehicle</article-title>. <source>Automation Constr.</source> <volume>119</volume>, <fpage>103330</fpage>. <pub-id pub-id-type="doi">10.1016/j.autcon.2020.103330</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Z.-J.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>H.-B.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.-H.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>S.-Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.-Q.</given-names>
</name>
</person-group> (<year>2023b</year>). <article-title>Adaptive learning rate optimization algorithms with dynamic bound based on Barzilai-Borwein method</article-title>. <source>Inf. Sci.</source> <volume>634</volume>, <fpage>42</fpage>&#x2013;<lpage>54</lpage>. <pub-id pub-id-type="doi">10.1016/j.ins.2023.03.050</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wei</surname>
<given-names>F. J.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y. J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Instance-level recognition and quantification for concrete surface bughole based on deep learning</article-title>. <source>Automation Constr.</source> <volume>107</volume>, <fpage>102920</fpage>. <pub-id pub-id-type="doi">10.1016/j.autcon.2019.102920</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2018</year>). <source>Group normalization</source>.</citation>
</ref>
<ref id="B36">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>H. K.</given-names>
</name>
<name>
<surname>Tian</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S. C.</given-names>
</name>
</person-group> (<year>2013</year>). &#x201c;<article-title>Research of image segmentation algorithm applied to concrete bridge cracks</article-title>,&#x201d; in <conf-name>Proceedings of the International Conference on Information Science and Technology (ICIST)</conf-name>, <conf-loc>Yangzhou, China</conf-loc>, <conf-date>March 2013</conf-date>.</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Xing</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Tao</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Mao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2023b</year>). <article-title>Physics guided wavelet convolutional neural network for wind-induced vibration modeling with application to structural dynamic reliability analysis</article-title>. <source>Eng. Struct.</source> <volume>297</volume>, <fpage>117027</fpage>. <pub-id pub-id-type="doi">10.1016/j.engstruct.2023.117027</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2023a</year>). <article-title>Crack detection of bridge concrete components based on large-scene images using an unmanned aerial vehicle</article-title>. <source>Sensors (Basel)</source> <volume>23</volume> (<issue>14</issue>), <fpage>6271</fpage>. <pub-id pub-id-type="doi">10.3390/s23146271</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Xuhang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Jie</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Yun</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhouping</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2011</year>). &#x201c;<article-title>A new image-based method for concrete bridge bottom crack detection</article-title>,&#x201d; in <conf-name>Proceedings of the 2011 International Conference on Image Analysis and Signal Processing</conf-name>, <conf-loc>Hubei</conf-loc>, <conf-date>October 2011</conf-date>.</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2022b</year>). <article-title>Intelligent identification and detection method of prefabricated laminated slab</article-title>. <source>J. Civ. Environ. Eng.</source> <volume>44</volume> (<issue>01</issue>), <fpage>87</fpage>&#x2013;<lpage>93</lpage>. <pub-id pub-id-type="doi">10.11835/j.issn.2096-6717.2020.187</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>Y. Y.</given-names>
</name>
<name>
<surname>Ling</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R. Q.</given-names>
</name>
</person-group> (<year>2022d</year>). <article-title>Damage identification of frame structure based on approximate metropolis&#x2013;hastings algorithm and probability density evolution method</article-title>. <source>Int. J. Struct. Stab. Dyn.</source>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>Y. H.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>H. K.</given-names>
</name>
<name>
<surname>Chai</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wang</surname>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2022c</year>). <article-title>Fundamental mode shape estimation and element stiffness evaluation of girder bridges by using passing tractor-trailers</article-title>. <source>Mech. Sys. Sig. Pr.</source> <fpage>169</fpage>.</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>Y. H.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wang</surname>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2023b</year>). <article-title>Mode shape identification and damage detection of bridge by movable sensory system</article-title>. <source>IEEE trans. Intell. Transp. Syst.</source> <volume>24</volume> <fpage>1299</fpage>&#x2013;<lpage>1313</lpage>.</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>Y. Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Tan</surname>
</name>
<name>
<surname>Gao</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Bridge bearing damage identification based on statistical moment change rate</article-title>. <source>Mech. Sys. Sig. Pr.</source> <volume>206</volume>, <fpage>110898</fpage>.</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>Y. Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Xian</surname>
<given-names>H. K.</given-names>
</name>
<name>
<surname>Chai</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
</name>
<etal/>
</person-group> (<year>2023a</year>). <article-title>Baseline-free detection method for change of lateral stiffness of high-rise building based on statistical moment curvature</article-title>. <source>Struct. Control. Health Monit.</source> <fpage>4373174</fpage>.</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Post-processing of high formwork monitoring data based on the back propagation neural networks model and the autoregressive&#x2014;moving-average model</article-title>. <source>Symmetry</source> <volume>13</volume> (<issue>8</issue>), <fpage>1543</fpage>. <pub-id pub-id-type="doi">10.3390/sym13081543</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Ni</surname>
<given-names>C. S.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>W. T.</given-names>
</name>
<name>
<surname>Qin</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2022a</year>). <article-title>Three-stage pavement crack localization and segmentation algorithm based on digital image processing and deep learning techniques</article-title>. <source>Sensors</source> <volume>22</volume> (<issue>21</issue>), <fpage>8459</fpage>. <pub-id pub-id-type="doi">10.3390/s22218459</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yao</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2023a</year>). <article-title>An improved multi-objective optimization and decision-making method on construction sites layout of prefabricated buildings</article-title>. <source>Sustainability</source> <volume>15</volume> (<issue>7</issue>), <fpage>6279</fpage>. <pub-id pub-id-type="doi">10.3390/su15076279</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2023c</year>). <article-title>Research progress of SHM system for super high-rise buildings based on wireless sensor network and cloud platform</article-title>. <source>Remote Sens.</source> <volume>15</volume> (<issue>6</issue>) <fpage>1473</fpage>.</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yao</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2023b</year>). <article-title>Multi-volume variable scale bitmap data object classification algorithm architectural concrete color difference detection</article-title>. <source>J. Intelligent Constr.</source> <volume>1</volume> (<issue>2</issue>), <fpage>9180010</fpage>. <pub-id pub-id-type="doi">10.26599/jic.2023.9180010</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yao</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>W. T.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y. J.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>L. J.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Chromatic aberration identification of fair-faced concrete research based on multi-scale lightweight structured data algorithm</article-title>. <source>Front. Mater.</source> <volume>9</volume>. <pub-id pub-id-type="doi">10.3389/fmats.2022.851555</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yao</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y. J.</given-names>
</name>
<name>
<surname>Wong</surname>
<given-names>M. P.</given-names>
</name>
<name>
<surname>Lv</surname>
<given-names>X. N.</given-names>
</name>
</person-group> (<year>2021b</year>). <article-title>A real-time detection method for concrete surface cracks based on improved YOLOv4</article-title>. <source>Symmetry-Basel</source> <volume>13</volume> (<issue>9</issue>), <fpage>1716</fpage>. <pub-id pub-id-type="doi">10.3390/sym13091716</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yao</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y. J.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liao</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2021a</year>). <article-title>Lightweight neural network for real-time crack detection on concrete surface in fog</article-title>. <source>Front. Mater.</source> <volume>8</volume>. <pub-id pub-id-type="doi">10.3389/fmats.2021.798726</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yao</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Deep-learning-based bughole detection for concrete surface image</article-title>. <source>Adv. Civ. Eng.</source> <volume>2019</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1155/2019/8582963</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>C. B.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>C. C.</given-names>
</name>
<name>
<surname>Jamshidi</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020b</year>). <article-title>Concrete bridge surface damage detection using a single-stage detector</article-title>. <source>Computer-Aided Civ. Infrastructure Eng.</source> <volume>35</volume> (<issue>4</issue>), <fpage>389</fpage>&#x2013;<lpage>409</lpage>. <pub-id pub-id-type="doi">10.1111/mice.12500</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Qian</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Automated bridge surface crack detection and segmentation using computer vision-based deep learning model</article-title>. <source>Eng. Appl. Artif. Intell.</source> <volume>115</volume>, <fpage>105225</fpage>. <pub-id pub-id-type="doi">10.1016/j.engappai.2022.105225</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Qian</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2023b</year>). <article-title>Automated bridge crack detection method based on lightweight vision models</article-title>. <source>Complex and Intelligent Syst.</source> <volume>9</volume> (<issue>2</issue>), <fpage>1639</fpage>&#x2013;<lpage>1652</lpage>. <pub-id pub-id-type="doi">10.1007/s40747-022-00876-6</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y. P.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2023a</year>). <article-title>YOLOv7-RAR for urban vehicle detection</article-title>. <source>Sensors</source> <volume>23</volume> (<issue>4</issue>), <fpage>1801</fpage>. <pub-id pub-id-type="doi">10.3390/s23041801</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Y. X.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>F. H.</given-names>
</name>
</person-group> (<year>2020a</year>). <article-title>On bridge surface crack detection based on an improved YOLO v3 algorithm</article-title>. <source>J. Phys. Conf. Ser.</source> <volume>1626</volume> (<issue>1</issue>), <fpage>012151</fpage>.</citation>
</ref>
</ref-list>
</back>
</article>