<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mar. Sci.</journal-id>
<journal-title>Frontiers in Marine Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mar. Sci.</abbrev-journal-title>
<issn pub-type="epub">2296-7745</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmars.2023.1280305</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Marine Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A Wasserstein generative adversarial network with gradient penalty for active sonar signal reverberation suppression</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Wang</surname><given-names>Zhen</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Zhang</surname><given-names>Hao</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2325582"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Huang</surname><given-names>Wei</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2008456"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chen</surname><given-names>Xiao</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2205671"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Tang</surname><given-names>Ning</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>An</surname><given-names>Yuan</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2431783"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Faculty of Information Science and Engineering, Ocean University of China</institution>, <addr-line>Qingdao</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>Open Studio for Marine High Frequency Communications, Pilot National Laboratory for Marine Science and Technology</institution>, <addr-line>Qingdao</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Haixin Sun, Xiamen University, China</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Xuebo Zhang, Northwest Normal University, China; Mingzhang Zhou, Xiamen University, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Hao Zhang, <email xlink:href="mailto:zhanghao@ouc.edu.cn">zhanghao@ouc.edu.cn</email>; Wei Huang, <email xlink:href="mailto:hw@ouc.edu.cn">hw@ouc.edu.cn</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>23</day>
<month>10</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>10</volume>
<elocation-id>1280305</elocation-id>
<history>
<date date-type="received">
<day>20</day>
<month>08</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>26</day>
<month>09</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Wang, Zhang, Huang, Chen, Tang and An</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Wang, Zhang, Huang, Chen, Tang and An</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Reverberation is the primary background interference of active sonar systems in shallow water environments, affecting target position detection accuracy. Reverberation suppression is a signal processing technique used to improve the clarity and accuracy of received signals by eliminating the echoes, reverberations, and noise that occur during underwater propagation. Existing reverberation suppression methods include algorithms based on Time-Frequency domain processing, noise reduction, adaptive filtering, and spectral subtraction, but their performance in high-reverberation environments (echo of small targets) still does not meet the requirements of target detection. To address the impact of high reverberation environments, we propose a structural suppression method based on the Wasserstein gradient penalty generative adversarial network (RSWGAN-GP). The reverberation suppression generation network uses a one-dimensional convolutional network structure to process normalized time-domain signals and achieves the reconstruction of the reverberation signal through Encoder-Decoder. The proposed method is verified through accurate and effective data collection during sea trials. Comparative results show that RSWGAN-GP effectively suppresses reverberation in observation signals with multiple bright spots, improving the signal-to-reverberation ratio by approximately 10 dB compared to other excellent algorithms and enhancing the information analysis and feature extraction capabilities of active sonar signals.</p>
</abstract>
<kwd-group>
<kwd>active sonar signal</kwd>
<kwd>reverberation suppression</kwd>
<kwd>generative adversarial network (GAN)</kwd>
<kwd>high reverberation environment</kwd>
<kwd>one-dimensional convolution Frontiers</kwd>
</kwd-group>
<counts>
<fig-count count="14"/>
<table-count count="4"/>
<equation-count count="24"/>
<ref-count count="58"/>
<page-count count="19"/>
<word-count count="10572"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Ocean Observation</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Sonar can accurately detect fixed targets and determine target distance, it has poor concealment and limited range and is susceptible to reverberation interference. The detection signal emitted by active sonar encounters obstacles or targets in the propagation path of water, and is then affected by reflection, refraction, and scattering, resulting in signal delay and overlap. The signals are subsequently received at the receivers, forming echo signals with reverberation (<xref ref-type="bibr" rid="B19">Huang and Wang, 2019</xref>). The influence of reverberation on active sonar like synthetic aperture sonar (<xref ref-type="bibr" rid="B54">Zhang et&#xa0;al., 2023a</xref>; <xref ref-type="bibr" rid="B55">Zhang et&#xa0;al., 2023b</xref>), communication sonar (<xref ref-type="bibr" rid="B49">Yu et&#xa0;al., 2018</xref>), multibeam sonar (<xref ref-type="bibr" rid="B30">Neasham et&#xa0;al., 2007</xref>) and other active sonars should be completely reviewed. Underwater interface reverberation is an important factor limiting the detection performance of active sonar in shallow water environments. Unlike noise interference, reverberation has non-smooth statistical characteristics and is usually mixed with the target echo, which is challenging to distinguish (<xref ref-type="bibr" rid="B12">Faure, 1964</xref>).</p>
<p>Previous works on reverberation in signal processing mainly focus on the study of detectors under specific reverberation conditions (<xref ref-type="bibr" rid="B3">Bharathi and Mohanty, 2019</xref>) and the spatiotemporal distribution characteristics of reverberation; researchers have studied robust detection performance under various reverberation distribution conditions. Some researchers have attempted to reduce the effect of reverberation on target echo by designing a transmission waveform, such as frequency-hopping signals encoded with particular frequencies (<xref ref-type="bibr" rid="B8">Costas, 1984</xref>), Q-function sonar signals (<xref ref-type="bibr" rid="B9">Cox and Lai, 1994</xref>), and SFM signals (<xref ref-type="bibr" rid="B41">Ward, 2001</xref>). However, enough high frequency is needed to achieve a reverberation suppression effect, which leads to a low utilization rate of the low-frequency band and affects the range accuracy.</p>
<p>Some researchers study anti-reverberation processing on signals received by sonar. Marine reverberation has a strong temporal correlation with target echo signals.Target echo signals cannot be effectively found by regularly matched filtering methods as the two spectra overlap in the frequency domain. In order to improve the performance of coherent processing in reverberation, Kay et&#xa0;al. used the AR pre-whitening processing method to filter out reverberation as white noise under certain conditions (<xref ref-type="bibr" rid="B22">Kay and Salisbury, 1990</xref>). Higher gain and more effectively detected target echoes can be obtained through matched filter by Wu et&#xa0;al. (<xref ref-type="bibr" rid="B45">Wu et&#xa0;al., 2018</xref>), but local stationarity of the reverberation is required as a premise (<xref ref-type="bibr" rid="B43">Widrow et&#xa0;al., 1967</xref>), which is widely used in ALE (Adaptive Line Enhancement) algorithm (<xref ref-type="bibr" rid="B27">Ma et&#xa0;al., 2021</xref>). However, it has strict requirements for the channel environment. H.M. Ozaktas and L.B. Almeida filtered the signal based on the time-frequency focus difference between the echo and reverberation in the Fourier transform domain to achieve reverberation suppression (<xref ref-type="bibr" rid="B31">Ozaktas et&#xa0;al., 1996</xref>; <xref ref-type="bibr" rid="B53">Zhang et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B28">Mejjaoli and Omri, 2020</xref>). However, The LMS algorithm performs adaptive filtering based on the error between input and output of the channel, which has strict requirements on the channel environment. Freburger et&#xa0;al. used the principal component inversion algorithm to project the received signal into two subspaces based on the power difference between different backgrounds, thereby achieving reverberation separation (<xref ref-type="bibr" rid="B13">Freburger and Tufts, 1997</xref>). When the power of the target echo signal is similar to that of the reverberation signal, distinguishing between the two becomes difficult.</p>
<p>With the development of artificial intelligence (AI) technology, deep neural networks have brought new research ideas to solve the shallow sea sonar reverberation problem. As a hot research direction in the field of machine learning, GAN (<xref ref-type="bibr" rid="B2">Ashraf et&#xa0;al., 2021</xref>) has become a popular model in the field of deep learning due to its advantages of generating high-quality samples, learning unlabeled data, supporting multi-modal data and innovation (<xref ref-type="bibr" rid="B51">Zhan et&#xa0;al., 2019</xref>) (<xref ref-type="bibr" rid="B11">Dong and Yang, 2018</xref>). Recently, Gans have evolved from image generation to reverberation data generation <xref ref-type="bibr" rid="B18">Hu et&#xa0;al. (2023)</xref>. Gans are also used to generate spatial impulse responses, with the aim of enhancing high-quality RIRs with existing real RIRs (<xref ref-type="bibr" rid="B33">Ratnarajah et&#xa0;al., 2023</xref>). In the field of underwater acoustic engineering, it is theoretically feasible to use GAN for active sonar reverberation suppression to solve the problem of reverberation suppression under high reverberation environment.</p>
<p>This paper proposes a Wasserstein generative adversarial network model with a gradient penalty (RSWGAN-GP) to solve reverberation suppression of sonar signals. Sonar signals are different from the picture, and a one-dimensional convolutional approach is built in this paper to process the signal data. The generation side of the adversarial network is made according to the U-net network (<xref ref-type="bibr" rid="B34">Ronneberger et&#xa0;al., 2015</xref>) to encode the original reverberation signal data, and the decoder generates the anti-reverberant signal data. The discriminator uses the design idea of SkipNet (<xref ref-type="bibr" rid="B1">Abrahamyan et&#xa0;al., 2021</xref>) that the discriminator to achieve a balance of speed and accuracy. In order to realize the fast and accurate training of the countermeasure network, the structure design referred to WGAN-GP comprehensively. In the experiment, simulation is used to supplement the data set to solve the difficulty of Marine experiment data sampling and insufficient data set. Our main contributions are concluded as follows:</p>
<list list-type="simple">
<list-item>
<p>1) In order to solve the difficulty of feature extraction in a reverberation environment, we propose a Wasserstein generative adversarial network model with a gradient penalty method</p>
</list-item>
<list-item>
<p>2) Underwater active sonar reverberation simulation with echo targets is proposed to obtain many marine reverberation signals, solving the problem of insufficient training sets.</p>
</list-item>
</list>
<p>The rest of the article is organized as follows. Section 2 starts with a brief review of some related works. In Section 3, some data preliminary work is presented, which includes RSWGAN-GP reverberation data generation, signal time gain control, and automatic gain control. In Section 4, the reverberation suppression method based on RSWGAN-GP is proposed, and the generation network, discrimination network, and error loss are explained respectively. In Section 5, experiments are given to verify the effectiveness of the method, and in Section 6, conclusions are given. In active sonar, the commonly used detection signals include single continuous wave (CW), LFM, NLFM, BPSK and other signals. In this paper, the research object of active sonar signal feature enhancement is selected as the typical CW signal for research.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Related works</title>
<sec id="s2_1">
<label>2.1</label>
<title>Reverberation suppression methods</title>
<p>Traditional methods for reverberation suppression typically focus on mapping the feature subspace. The differences between reverberation and target echoes have been investigated in the domains of Doppler space, discrete wavelet, and fractional Fourier. Previous research provides essential features for reducing reverberation and aiding target detection. For moving targets, in particular, target tracking can achieve reverberation suppression. However, these methods could be limited in low signal-to-reverberation ratio (SRR) and high reverberation scenes. When the echo of a tiny target is received, the target&#x2019;s echo is weak and covered by reverberation, and its feature subspace is weak and difficult to find.</p>
<p>Low-rank and sparsity theories developed a decade ago have found wide applications in image processing for tasks such as background modeling, camera calibration, and optical character recognition (<xref ref-type="bibr" rid="B5">Chandrasekaran et&#xa0;al., 2011</xref>). They have also been introduced to underwater acoustic engineering for reverberation suppression and target detection (<xref ref-type="bibr" rid="B32">Qian and Cao, 2019</xref>). In the case of fixed-position active sonar, the received signal from multiple pings exhibits significant stationarity. Considering the echo data from a single ping as a frame, multiple frames can be constructed over time and decomposed into dynamic and steady components. The steady components display similar strength distributions over time and can be viewed as a low-rank matrix. On the other hand, the dynamic components, consisting of reverberation fluctuations and target echoes, can be treated as a sparse matrix.</p>
<p>Consequently, reverberation suppression methods based on low-rank and sparse matrix decomposition have been proposed. These include techniques such as non-negative matrix factorization, principal component analysis, and robust principal component analysis (RPCA) (<xref ref-type="bibr" rid="B4">Chalapathy et&#xa0;al., 2017</xref>). For the implementation of processing large matrix factorization, alternative methods have been developed to expedite the process, such as accelerated proximal gradient, augmented Lagrange multiplier, and alternate direction multiplier methods (ADMM) (<xref ref-type="bibr" rid="B26">LiXiukun et&#xa0;al., 2015</xref>). Zhu et&#xa0;al. applied low-rank and sparse matrix estimation to decompose received data, enhancing reverberation suppression techniques&#x2019; robustness (<xref ref-type="bibr" rid="B57">Zhu et&#xa0;al., 2022</xref>).</p>
<p>These reverberation suppression methods can achieve the purpose of reverb suppression to a certain extent. However, they still perform limited at a low signal-to-reverberation ratio.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Application of artificial intelligence in reverberation suppression</title>
<p>With the development and rise of artificial intelligence in recent years, algorithms combining artificial intelligence with anti-reverberation technology continue to surge, such as support vector machines, CNN (<xref ref-type="bibr" rid="B36">Song et&#xa0;al., 2019</xref>), RNN (<xref ref-type="bibr" rid="B6">Chen et&#xa0;al., 2022</xref>), and GAN In the beginning, it was simply a simple addition to machine learning. For example, Zhu et&#xa0;al. designed a feature kernel function SVM based on the non-Gaussian difference between reverberation and target echo to detect the signal in the reverberation background. This method improves the recognition quality of reverberant background, and its effect is better than the adaptive filtering algorithm (<xref ref-type="bibr" rid="B44">Wu et&#xa0;al., 2008</xref>). Jiang Keyu et&#xa0;al. processed the lake test data (<xref ref-type="bibr" rid="B21">Jiang et&#xa0;al., 2007</xref>) and used RBF neural network to detect the target echo in reverberation to be better. Xiang et&#xa0;al. proposed a reverberation suppression method for underwater moving target detection based on a robust autoencoder (<xref ref-type="bibr" rid="B58">Zhu and Sun, 2008</xref>). Xiao et&#xa0;al. proposed an ABNN focusing on the frequency domain characteristics of the target, which suppresses environmental noise and ship interference and makes the accuracy of target detection and recognition higher (<xref ref-type="bibr" rid="B47">Xiao et&#xa0;al., 2021</xref>).</p>
<p>The deep learning technology&#x2019;s continuous development and innovation, many neural network architectures with good performance and robust stability have emerged. For instance, multilayer perceptrons (MLP) and long short-term memory (LSTM) networks have been developed to learn mappings from a window of reverberated frames (or &#x201c;context&#x201d; windows) to a source frame, thus learning to deliberate by inverse transformations <xref ref-type="bibr" rid="B15">Han et&#xa0;al. (2015)</xref>; <xref ref-type="bibr" rid="B39">Wang et&#xa0;al. (2017)</xref>; <xref ref-type="bibr" rid="B46">Wuth et&#xa0;al. (2020)</xref>. Additionally, Zhao et&#xa0;al. <xref ref-type="bibr" rid="B56">Zhao et&#xa0;al. (2018)</xref> proposed an LSTM-based late reverberation suppression strategy that learned the difference between the source and reverberated signals; therefore, dereverberation is performed by subtracting the late reverberation estimation from the observed reverberated signal.</p>
<p>The application of deep learning provides another effective method for reverberation suppression. Artificial intelligence has relatively excellent performance and effect. It can achieve many effects that cannot be achieved by traditional methods, which makes the development of anti-reverberation technology in recent years mainly biased to- wards the direction of artificial intelligence.</p>
<p>The above studies show that the combination of deep learning has specific feasibility for sonar signal reverberation suppression. However, reverberation suppression still needs to be improved under high reverberation environments and different underwater signal environments. At the same time, the extraction ability of effective information in the signal still cannot meet the needs of the complex environment.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>The relationship between artificial intelligence methods and traditional methods</title>
<p>In terms of underwater reverberation suppression, the initial reference of artificial intelligence (AI) and machine learning methods is to make up for the shortcomings of traditional methods and complement and combine them. In a new study, it was found that artificial intelligence could complete the task better to replace it entirely (<xref ref-type="bibr" rid="B24">Koh et&#xa0;al., 2020</xref>).</p>
<p>Traditional methods are mainly based on signal processing and digital filtering techniques, which involve preprocessing, filtering, and noise reduction operations to suppress reverberation in underwater sound signals (<xref ref-type="bibr" rid="B35">Singer et&#xa0;al., 2009</xref>). These methods often rely on domain knowledge and expertise to analyze and model the reverberation characteristics, followed by the design of corresponding algorithms for processing. While traditional methods can reduce the impact of underwater reverberation, their effectiveness is limited when dealing with complex reverberation environments and signals.</p>
<p>AI methods, on the other hand, utilize machine learning and deep learning techniques to learn and recognize reverberation features and perform suppression automatically (<xref ref-type="bibr" rid="B16">Hao et&#xa0;al., 2023</xref>). By training models with large amounts of data, AI methods can possess more substantial generalization and adaptability, making them capable of handling more complex underwater reverberation environments and signals. Compared to traditional methods, AI methods exhibit higher levels of automation and intelligence in underwater reverberation suppression.</p>
<p>Traditional methods and AI methods can be combined in the context of underwater reverberation suppression. Traditional methods can provide basic processing techniques and approaches for preprocessing and initial reverberation suppression, which AI methods can further optimize and enhance (<xref ref-type="bibr" rid="B48">Yin et&#xa0;al., 2023</xref>). For instance, traditional methods can be used for filtering and noise reduction of underwater sound signals, and the processed signals can be used as training data for training AI models to achieve better reverberation suppression.</p>
<p>In the current research, some scholars have found that traditional methods and artificial intelligence methods are complementary in terms of underwater reverberation suppression and can be combined. Other scholars have used AI alone to replace traditional methods and improve reverberation suppression (<xref ref-type="bibr" rid="B42">Weiss et&#xa0;al., 2023</xref>).</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Preliminary</title>
<p>After the signal is received, the received signal will be processed by the active sonar system. In this part, the hardware implementation of the processing will be reproduced by the following algorithm. At the same time, the signals that generate the training set data will be processed in the same way.</p>
<p>The active sonar device processes the received signal in the following way. After the hardware receives the sonar signal, the sonar signal will go through time-varying gain and automatic signal gain control processing so that the long-distance echo signal power is stronger and more convenient for subsequent processing. After processing, the generated training set is closer to the actual data. <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref> shows the signal state of each process.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Signal processing. <bold>(A) </bold>The echo received by the active sonar system. <bold>(B)</bold> The echo signal is processed by TVG. <bold>(C)</bold> The echo signal is processed by AGC. <bold>(D)</bold> The echo signal received and processed by sonar. <bold>(E)</bold> Target echo signal hidden in sonar signal.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1280305-g001.tif"/>
</fig>
<p>The processing of the signal in the hardware device after receiving is shown in <xref ref-type="fig" rid="f1"><bold>Figures&#xa0;1B&#x2013;D</bold></xref>, and the signal processing process will be explained below.</p>
<p>The echo received by the active sonar system, and the generated echo signal with target information in Section 4.1 are shown in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1A</bold></xref>, and the circled position is the target echo.Time-Variable Gain (TVG) <xref ref-type="bibr" rid="B20">Innami and Kasai (2012)</xref>. According to the sonar equation, it can be obtained that the echo margin of the sonar is determined by the difference between the echo signal level and the background interference level.</p>
<disp-formula>
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>T</mml:mi>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>L</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi>T</mml:mi>
<mml:mi>L</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>L</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>D</mml:mi>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In Eq. (1), <italic>DT</italic> represents the detection threshold and is the strength of the echo signal received by the sonar, <italic>TL</italic> represents the propagation loss because the active sonar is bidirectional, so 2<italic>TL</italic> defines its complete propagation loss; <italic>SL</italic> represents the sound source level; <italic>NL</italic> represents the noise level; <italic>DI</italic> represents the directivity index; <italic>TS</italic> represents the target strength. For most sonar systems, including multibeam sonar, the propagation loss (TL) is compensated by the TVG device inside the receiver. The ideal TVG curve should follow the expectation of sonar propagation loss, i.e.</p>
<disp-formula>
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>L</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>10</mml:mn>
<mml:mi>l</mml:mi>
<mml:mi>o</mml:mi>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mn>10</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mi>r</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In Eq. (2), <italic>r</italic> represents the action distance, and a is the loss factor, <italic>a</italic> function of frequency. <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1B</bold></xref> shows that the signal passes through the TVG and that the distant signal is no longer attenuated as the distance increases.</p>
<p>Signal Automatic Gain Control (AGC) (<xref ref-type="bibr" rid="B52">Zhang et&#xa0;al., 2017</xref>). In practice, automatic gain control is typically implemented through circuit design. However, AGC needs to be implemented for the experimental simulation of the sonar data set. The signal adjustment must be automatically adjusted based on the input and output data size, which initially requires numerous logarithmic operations. In order to implement these operations are avoided, a simple comparison operation is used with a gain lookup table instead. The algorithm can be described as follows.</p>
<disp-formula>
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:mn>20</mml:mn>
<mml:mi>l</mml:mi>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>20</mml:mn>
<mml:mi>l</mml:mi>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>10</mml:mn>
<mml:mi>l</mml:mi>
<mml:mi>g</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
<mml:msup>
<mml:mo stretchy="false">)</mml:mo>
<mml:msup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The left side is transformed into the adjusted value between two adjacent gain coefficients <italic>G</italic>(<italic>n</italic> + 1) and <italic>G</italic>(<italic>n</italic>) in dB format for better clarity. Following the estimation of the average power of the output signal, a comparison is made with the reference power to determine the appropriate method for adjusting the gain coefficient based on the outcome of this comparison. In <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1C</bold></xref>, the AGC equalizes the signal strength and partially suppresses reverberation through processing. There are hardware limitations to consider in practical usage. The hardware restricts the received signal and simulates how the part of the signal that exceeds the limit would appear. <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1D</bold></xref> illustrates this simulation. <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1E</bold></xref> displays the desired output result of the network, which will be utilized to calculate the Jensen-Shannon Divergence (JSD), aiming to bring the output result closer to it.</p>
<p>In both experimental and simulation data, some signals exhibit peak clipping phenomena and cannot be restored to complete sine waves, making traditional methods ineffective. After undergoing the above (B), (C), and (D) processing and normalization, the simulated signal has inputs with the same dynamic range.</p>
</sec>
<sec id="s4">
<label>4</label>
<title>Reverberation suppression in RSWGAN-GP</title>
<p>In order to solve the problem of limited suppression effect in a high reverberation environment, a reverberation suppression framework based on the Generative adversarial network is proposed. A nonlinear mapping from the sonar detection signal with reverberation to the sonar signal with reverberation suppression is established, where the input data is the signal propagation time series. The output is the corresponding time series after reverberation suppression. Due to the scarcity of actual data with reverberation signals, the training of RSWGAN-GP will face the problem of overfitting, reducing its generalization performance. To this end, the generation of virtual reverberation data is implemented in this section by combining statistical modeling and a multi-highlight model. The training data set is expanded by mixing real and virtual reverberation data while ensuring the consistency of its distribution pattern. Then, the theoretical signal gain control was calculated by signal propagation theory to form a time series signal for artificial intelligence model learning. The implementation of this part is shown in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Reverberation suppression RSWGAN-GP construction.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1280305-g002.tif"/>
</fig>
<p>The data generated in Part 4.1 is mixed with real data for Part 4.3, 4.4, and the gradient penalty part is invoked for training.</p>
<sec id="s4_1">
<label>4.1</label>
<title>Underwater active sonar reverberation simulation</title>
<p>Underwater reverberation consists of volume reverberation, surface reverberation, and submarine reverberation. Sea surface reverberation and submarine reverberation are collectively referred to as interface reverberation.</p>
<p>To model the network, a significant amount of experimental data is necessary. Therefore, in this paper, we will simulate sonar reverberation data. The reverberation simulation comprises three parts: the generation of reverberation, the simulation of echoes, and environmental noise simulation.</p>
<p>As shown in <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>, at Point <italic>M</italic>, a non-directional signal is emitted to activate the ring energizer.. The reverberation model is illustrated in the figure. Point <italic>M</italic> represents the transducer, the distance from <italic>M</italic> to the interface <italic>xoy</italic> is <italic>h</italic>, and the signal is emitted without any specific direction. At time <italic>t</italic>, the ring of scatterers contributing to the reverberation is inside the ring. (<xref ref-type="bibr" rid="B38">Sun et&#xa0;al., 2010</xref>).</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Reverberation model.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1280305-g003.tif"/>
</fig>
<p>It is assumed that the number of scatterers generating reverberation on the <italic>i</italic>-th ring is <inline-formula>
<mml:math display="inline" id="im1">
<mml:mi>Q</mml:mi>
</mml:math>
</inline-formula>. The emitted signal is denoted as <inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, and its strength is <italic>A</italic>. The length of the <italic>MA</italic> is represented by <italic>r</italic>, and the wave number is <italic>k</italic>, where <inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi>&#x3c0;</mml:mi>
<mml:mi>f</mml:mi>
<mml:mo stretchy="false">/</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The speed of sound in the ocean is represented as <italic>c</italic>, and the scattering coefficient of the <italic>q</italic>-th scatterer is <inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:msub>
<mml:mi>&#x3c8;</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. Where <inline-formula>
<mml:math display="inline" id="im5">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">/</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im6">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>B</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">/</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, the ring area <inline-formula>
<mml:math display="inline" id="im7">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>&#x3c0;</mml:mi>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mi>r</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>M</mml:mi>
<mml:msup>
<mml:mi>B</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. If the unit area is <inline-formula>
<mml:math display="inline" id="im8">
<mml:mrow>
<mml:mo>&#x25b3;</mml:mo>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, so the number of scatterers contributing to the reverberation in the ith ring is <italic>N</italic>, <inline-formula>
<mml:math display="inline" id="im9">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mo stretchy="false">/</mml:mo>
<mml:mo>&#x25b3;</mml:mo>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula>
<mml:math display="inline" id="im10">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denotes the integer command, <inline-formula>
<mml:math display="inline" id="im11">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>Q</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. Therefore, the scattering characteristic function at time <italic>t</italic> can be expressed as:</p>
<disp-formula>
<label>(4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>Q</mml:mi>
</mml:munderover>
<mml:mfrac>
<mml:mi>A</mml:mi>
<mml:mi>r</mml:mi>
</mml:mfrac>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mi>k</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>r</mml:mi>
</mml:mfrac>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mi>k</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(5)</label>
<mml:math display="block" id="M5">
<mml:mrow>
<mml:mtext>&#x2003;&#x2003;</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:msup>
<mml:mi>r</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>Q</mml:mi>
</mml:munderover>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>k</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:msub>
<mml:mi>&#x3c8;</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im12">
<mml:mi>a</mml:mi>
</mml:math>
</inline-formula> represents the amplitude and <inline-formula>
<mml:math display="inline" id="im13">
<mml:mi>&#x3c8;</mml:mi>
</mml:math>
</inline-formula> represents the phaseain, <inline-formula>
<mml:math display="inline" id="im14">
<mml:mi>a</mml:mi>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im15">
<mml:mi>&#x3c8;</mml:mi>
</mml:math>
</inline-formula> in are both random numbers following the Gaussian distribution with the constraint of, <inline-formula>
<mml:math display="inline" id="im16">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im17">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mi>&#x3c8;</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The process of generating reverberation involves convolving the emission signal with the scattering characteristic function. Therefore, the reverberation at time <italic>t</italic> can be expressed as the following formula:</p>
<disp-formula>
<label>(6)</label>
<mml:math display="block" id="M6">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2297;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Active sonar is utilized for detecting underwater targets, which involves reverberation and capturing the target echo signal. When the sonar signal hits the object, it generates a new echo through the multi-point superposition of the target body, enabling the active sonar to receive it (<xref ref-type="bibr" rid="B17">Hodges, 2011</xref>).</p>
<p>In addition to reverberation noise, target echo is an essential component of active sonar signals. The sonar is assumed to have an array of <inline-formula>
<mml:math display="inline" id="im18">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x22ef;</mml:mo>
<mml:mi>m</mml:mi>
<mml:mo>&#x22ef;</mml:mo>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> elements for reception. The target echo signal received by the <italic>m</italic>-th array element can be expressed as:</p>
<disp-formula>
<label>(7)</label>
<mml:math display="block" id="M7">
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>Z</mml:mi>
</mml:munderover>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>I</mml:mi>
</mml:munderover>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>x</mml:mi>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi>&#x3c0;</mml:mi>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#xb7;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3c8;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In Eq. (7) the coordinate of the <italic>m</italic>-th array element <inline-formula>
<mml:math display="inline" id="im19">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is represented by a vector <inline-formula>
<mml:math display="inline" id="im20">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>r</mml:mi>
<mml:mo>&#x2192;</mml:mo>
</mml:mover>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. <inline-formula>
<mml:math display="inline" id="im21">
<mml:mi>I</mml:mi>
</mml:math>
</inline-formula> represents the number of highlights of the target, <inline-formula>
<mml:math display="inline" id="im22">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the reflection coefficient of the <italic>i</italic>-th highlight, <inline-formula>
<mml:math display="inline" id="im23">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the envelope of the transmitted signal, <inline-formula>
<mml:math display="inline" id="im24">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the time delay experienced by the sound wave incident on the <italic>i</italic>-th highlight and then reflected the <italic>m</italic>-th array element, <inline-formula>
<mml:math display="inline" id="im25">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the angular frequency change of the sound wave irradiated to the <italic>i</italic>-th highlight and then reflected the <italic>m</italic>-th array element, <inline-formula>
<mml:math display="inline" id="im26">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the Doppler shift, and <inline-formula>
<mml:math display="inline" id="im27">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c8;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the random phase shift of the <italic>i</italic>-th highlight echo, uniformly distributed between <inline-formula>
<mml:math display="inline" id="im28">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>&#x223c;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. Different objects can be simulated by adjusting the number of highlights.</p>
<p>For environmental noise simulation, the spatial and physical characteristics of the Marine environment are complex, and the noise level depends on mixing multiple noise sources. This paper adopts an AR modeling method to simulate and synthesize Marine environmental noise (<xref ref-type="bibr" rid="B7">Chen et&#xa0;al., 2018</xref>). Firstly, an uncorrelated Gaussian white noise sequence <italic>v</italic>(<italic>n</italic>) is generated, and the Marine environmental noise can be obtained by passing <italic>v</italic>(<italic>n</italic>) through an AR filter with a specific temporal correlation. Mark the Marine environmental noise as <italic>W</italic>(<italic>n</italic>), then the generation process of Marine environmental noise is shown in Eq. (8):</p>
<disp-formula>
<label>(8)</label>
<mml:math display="block" id="M8">
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>p</mml:mi>
</mml:munderover>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>&#x3c5;</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mi>v</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>
<inline-formula>
<mml:math display="inline" id="im29">
<mml:mi>p</mml:mi>
</mml:math>
</inline-formula> represents the order of the AR filter, <inline-formula>
<mml:math display="inline" id="im30">
<mml:mrow>
<mml:mi>&#x3c5;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> Gaussian white noise, and <inline-formula>
<mml:math display="inline" id="im31">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>&#x3c5;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:msubsup>
<mml:mi>b</mml:mi>
<mml:mn>0</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> represents variance of Marine ambient noise.AR filter coefficients <inline-formula>
<mml:math display="inline" id="im32">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im33">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> can be solved by Levinson-Durbin (<xref ref-type="bibr" rid="B10">Diniz et&#xa0;al., 2010</xref>) method.</p>
<p>The data generated by the above three equations are normalized respectively, and the corresponding weight is assigned. The obtained signal is denoted as <italic>S</italic>(<italic>t</italic>), and the obtained signal is shown in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1A</bold></xref>, where the signal marked in the yellow box represents the echo position of the target.</p>
<disp-formula>
<label>(9)</label>
<mml:math display="block" id="M9">
<mml:mrow>
<mml:mtext>&#x2003;&#x2003;</mml:mtext>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mi>W</mml:mi>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2003;&#x2003;</mml:mtext>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Here, <inline-formula>
<mml:math display="inline" id="im34">
<mml:mi>&#x3b1;</mml:mi>
</mml:math>
</inline-formula> represents the trade-off between <inline-formula>
<mml:math display="inline" id="im35">
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im36">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. <inline-formula>
<mml:math display="inline" id="im37">
<mml:mi>&#x3bb;</mml:mi>
</mml:math>
</inline-formula> is an adjustable parameter that controls the degree of the strength fluctuation. Through the adjustment of parameters <inline-formula>
<mml:math display="inline" id="im38">
<mml:mi>&#x3b1;</mml:mi>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im39">
<mml:mi>&#x3bb;</mml:mi>
</mml:math>
</inline-formula>, enable diversity in the data set, the desired SIR (Signal to interference ratio, <inline-formula>
<mml:math display="inline" id="im40">
<mml:mrow>
<mml:mn>10</mml:mn>
<mml:mi>l</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>&#x261;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">/</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>) and SINR (Signal to interference plus noise ratio, <inline-formula>
<mml:math display="inline" id="im41">
<mml:mrow>
<mml:mn>10</mml:mn>
<mml:mi>l</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>&#x261;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">/</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>&#x3bb;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>W</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>) are achieved, where <inline-formula>
<mml:math display="inline" id="im42">
<mml:mi>P</mml:mi>
</mml:math>
</inline-formula> represents power.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Implementation mechanism of the generative adversarial network</title>
<p>GAN is an effective data generation network, including Generator (G) and Discriminator (D). The G-analysis process is a minimal game process, and the discriminator and generator finally reach Nash equilibrium.</p>
<p>The adversarial training optimization process for generators and discriminators can be expressed as follows:</p>
<disp-formula>
<label>(10)</label>
<mml:math display="block" id="M10">
<mml:mrow>
<mml:munder>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
<mml:mi>G</mml:mi>
</mml:munder>
<mml:mtext>&#xa0;</mml:mtext>
<mml:munder>
<mml:mrow>
<mml:mi>max</mml:mi>
</mml:mrow>
<mml:mi>D</mml:mi>
</mml:munder>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mo>&#x223c;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>&#x261;</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In Eq. (9), <italic>x</italic> is the actual data, <inline-formula>
<mml:math display="inline" id="im43">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the actual data distribution, <inline-formula>
<mml:math display="inline" id="im44">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>&#x261;</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the generated data <inline-formula>
<mml:math display="inline" id="im45">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mo>=</mml:mo>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>z</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> distribution. The objective function expressed by min<italic><sub>G</sub>
</italic> max<italic><sub>D</sub> V</italic> (<italic>G,D</italic>) is to minimize the JSD between the expectation data distribution <italic>P<sub>r</sub>
</italic>and the generated data distribution, provided that the <italic>D</italic> is optimal.</p>
<p>In that case, the JSD cannot measure the distance between the generated and actual data distribution. Training the GAN by optimizing the JSD will result in not finding the correct optimization target, which is prone to the problem of unstable training gradient and model collapse.</p>
<p>In order to solve the problems mentioned above, the Wasserstein Generative Adversarial (Wasserstein GAN, WGAN) network proposes to use Wasserstein distance as an optimization method for training GANs. To satisfy the Lipschitz continuity, WGAN limits the weights to a specific range to enforce the Lipschitz continuity, but it is leads to poor generation results. WGAN-GP is a gradient penalty-based WGAN. WGAN-GP improves the Lipschitz continuity constraint by using gradient penalty instead of weight clipping in WGAN.</p>
<p>The objective function of WGAN-GP is:</p>
<disp-formula>
<label>(11)</label>
<mml:math display="block" id="M11">
<mml:mrow>
<mml:munder>
<mml:mrow>
<mml:mi>max</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
<mml:mi>D</mml:mi>
</mml:munder>
<mml:mi>V</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mo>&#x223c;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>&#x261;</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
<mml:msub>
<mml:mo>&#x2207;</mml:mo>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
</mml:msub>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mo>&#x2225;</mml:mo>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:msup>
<mml:mo stretchy="false">)</mml:mo>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(12)</label>
<mml:math display="block" id="M12">
<mml:mrow>
<mml:munder>
<mml:mrow>
<mml:mi>min</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
<mml:mi>G</mml:mi>
</mml:munder>
<mml:mi>L</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mo>&#x223c;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>&#x261;</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In the formula, <inline-formula>
<mml:math display="inline" id="im46">
<mml:mtext>&#x3bb;</mml:mtext>
</mml:math>
</inline-formula> is the gradient penalty term coefficient, <inline-formula>
<mml:math display="inline" id="im47">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the sampling distribution of the gradient penalty term, the discriminator maximization max<italic><sub>D</sub> V</italic> (<italic>G,D</italic>), and the generator minimization min<italic><sub>G</sub> L</italic>(<italic>G,D</italic>).WGAN-GP provides a stable training method that requires little parameter tuning to solve training gradient disappearance and gradient explosion.</p>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Reverberation suppression generator network</title>
<p>In the previous step, the shape of processed data is <inline-formula>
<mml:math display="inline" id="im48">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, meaning that there are <inline-formula>
<mml:math display="inline" id="im49">
<mml:mi>B</mml:mi>
</mml:math>
</inline-formula> test data of length <italic>N</italic>. <italic>B</italic> is divided into multiple <italic>b</italic>. Our goal is to separate a mixture signal <inline-formula>
<mml:math display="inline" id="im50">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> into <italic>K</italic> source signals <inline-formula>
<mml:math display="inline" id="im51">
<mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mo>'</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mo>'</mml:mo>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> with <inline-formula>
<mml:math display="inline" id="im52">
<mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mi>k</mml:mi>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> for all <inline-formula>
<mml:math display="inline" id="im53">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>K</mml:mi>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <italic>K</italic> is set to 1 by default in this paper, <italic>B</italic> as the batch size at training time and <inline-formula>
<mml:math display="inline" id="im54">
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im55">
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>as the respectivenumbers of signal length. For model variants with no extra input context, we have <inline-formula>
<mml:math display="inline" id="im56">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>m</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>N</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and make predictions for the echo part of the input. Here we input the data <italic>S</italic> into the neural network structure and perform feature extraction on the data. It is divided into two parts to introduce the <italic>G</italic> network. The first part is the realization of data crop and concat, and the second part will introduce the whole generator network.</p>
<sec id="s4_3_1">
<label>4.3.1</label>
<title>Data concatenation</title>
<p>It is challenging work to extract sonar signal features using one-dimensional convolution. A well-designed deep network structure is crucial for obtaining more valuable dataset recognition features. As the number of network layers increases, training deep networks becomes labor-intensive due to the common insurmountable problem. To address this issue, optimizing data concatenation when passing network parameters can be more effective. The implementation of this approach in the paper is shown in <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Data concatenation. <bold>(A)</bold> There are  changes in the signal data processing process. <bold>(B)</bold> Show the process of signal data transmission.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1280305-g004.tif"/>
</fig>
<p>In <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>, the signal feature <italic>x</italic> is passed from the previous layer and is processed by the <inline-formula>
<mml:math display="inline" id="im57">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x2026;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> layers to obtain the data <inline-formula>
<mml:math display="inline" id="im58">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. <inline-formula>
<mml:math display="inline" id="im59">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>y</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> processes <italic>x</italic> through another branch line to obtain the result <inline-formula>
<mml:math display="inline" id="im60">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>y</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. Then, the result <inline-formula>
<mml:math display="inline" id="im61">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
<mml:mo>&#x2295;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>y</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is obtained, and subsequent processing continues, done to prevent the loss of original features after multiple convolutions. The convolution of the branch is used to process the data and obtain the final data. In <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4A</bold></xref>, the data crop operation is equivalent to <inline-formula>
<mml:math display="inline" id="im62">
<mml:mo>&#x2295;</mml:mo>
</mml:math>
</inline-formula>, which is the operation of skip connect in <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4B</bold></xref>.</p>
<p>The network construction will be built with network blocks and have the u-net network structure. The network has <inline-formula>
<mml:math display="inline" id="im63">
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> layers, and each layer is labelled <inline-formula>
<mml:math display="inline" id="im64">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math display="inline" id="im65">
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>For a stacked-layer structure (consisting of several stacked layers), the learned feature is recorded as <inline-formula>
<mml:math display="inline" id="im66">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> when the input is <italic>x</italic>. When <inline-formula>
<mml:math display="inline" id="im67">
<mml:mi>&#x2113;</mml:mi>
</mml:math>
</inline-formula> is 0, the accumulation layer only performs identity mapping, and the network performance will not be degraded. It allows the accumulation layer to learn new features based on the input features, resulting in better performance. A convolutional block of length <inline-formula>
<mml:math display="inline" id="im68">
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> can be expressed as:</p>
<disp-formula>
<label>(13)</label>
<mml:math display="block" id="M13">
<mml:mrow>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&lt;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>&#x2115;</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>u</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&gt;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>&#x2115;</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>&#x2113;</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>&#x2113;</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>&#x2113;</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mrow>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(14)</label>
<mml:math display="block" id="M14">
<mml:mrow>
<mml:mtext>&#x2003;&#x2003;</mml:mtext>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2295;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The convolution result can be obtained by iteratively convolving the output <italic>x</italic> from <inline-formula>
<mml:math display="inline" id="im69">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> block to <inline-formula>
<mml:math display="inline" id="im70">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> block, where <inline-formula>
<mml:math display="inline" id="im71">
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> ranges from <inline-formula>
<mml:math display="inline" id="im72">
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula>
<mml:math display="inline" id="im73">
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. The input and output of the formula are represented by <inline-formula>
<mml:math display="inline" id="im74">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>&#x2113;</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and each residual unit typically contains a multi-layer structure. <inline-formula>
<mml:math display="inline" id="im75">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the downsampling block function, <inline-formula>
<mml:math display="inline" id="im76">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>u</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the upsampling block function, representing the learned residual, and <inline-formula>
<mml:math display="inline" id="im77">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>&#x2113;</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the identity mapping. The learned features from the shallow layer <inline-formula>
<mml:math display="inline" id="im78">
<mml:mi>&#x2113;</mml:mi>
</mml:math>
</inline-formula> to the deep layer <inline-formula>
<mml:math display="inline" id="im79">
<mml:mi>L</mml:mi>
</mml:math>
</inline-formula> are expressed as:</p>
<disp-formula>
<label>(15)</label>
<mml:math display="block" id="M15">
<mml:mrow>
<mml:mtext>&#x2003;&#x2003;</mml:mtext>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>L</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:munderover>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The determination of <italic>L</italic> depends on the shortest distance <inline-formula>
<mml:math display="inline" id="im80">
<mml:mi>&#x2112;</mml:mi>
</mml:math>
</inline-formula> detected by the sonar, that is, the number of data points <italic>N</italic> processed by the sonar equipment. The size of <italic>L</italic> can be solved by the formula <inline-formula>
<mml:math display="inline" id="im81">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>l</mml:mi>
<mml:mi>o</mml:mi>
<mml:msub>
<mml:mi>&#x261;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>&#x2112;</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo stretchy="false">/</mml:mo>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math display="inline" id="im82">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the sampling rate of the active sonar brother and <italic>c</italic> represents the speed of sound propagation in water (m/s).</p>
</sec>
<sec id="s4_3_2">
<label>4.3.2</label>
<title>The generator network structure of RSWGAN-GP</title>
<p>The generator side of RSWGAN-GP is called <italic>G</italic>, which is constructed by a U-shaped network. It utilizes a one-dimensional convolution network that convolves specifically on signals while adding skip connections based on their original basis to enhance accuracy in signal feature extraction, as shown in <xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Generator network.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1280305-g005.tif"/>
</fig>
<p>As shown in <xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>, the signal data <italic>S</italic> is directly input into the encoder layer <inline-formula>
<mml:math display="inline" id="im83">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mn>1</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> to start the one-dimensional convolution operation. The network structure&#x2019;s transmission process and main characteristics are shown in the figure, where <italic>G</italic> isspecially designed for processing sonar acoustic signals. The role of the encoder is to transform the input sequence into a low-dimensional representation that can capture the critical features of the input sequence. The decoder transforms the encoding vector into the target sequence and dynamically generates the content related to the target, as shown at <inline-formula>
<mml:math display="inline" id="im84">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mn>3</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>. The decoder receives the feature map from the same-scale encoder layer <inline-formula>
<mml:math display="inline" id="im85">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mn>3</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> directly. Its data scale will not change, still <inline-formula>
<mml:math display="inline" id="im86">
<mml:mrow>
<mml:mn>96</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>25600</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. The convolution of multiple neural networks may weaken data features with the increase of convolutional layers, so the data crop structure is utilized to reduce the loss of information, as shown in <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>.</p>
<p>In the generation part of the RSWGAN-gp network, we formulate the network running result <inline-formula>
<mml:math display="inline" id="im87">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mn>1</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> as follows: let <italic>i</italic> indexes the down-sampling layer along the encoder, <italic>N</italic> refers to the total number of the encoder. The stack of feature maps represented by <inline-formula>
<mml:math display="inline" id="im88">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, is computed as:</p>
<disp-formula>
<label>(16)</label>
<mml:math display="block" id="M16">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mi>&#x210b;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:munder>
<mml:munder>
<mml:mrow>
<mml:mi mathvariant="script">C</mml:mi>
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mi>k</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="script">C</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
<mml:mo>&#xfe38;</mml:mo>
</mml:munder>
<mml:mrow>
<mml:mtext>Scales</mml:mtext>
<mml:mo>:</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msup>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x223c;</mml:mo>
<mml:msup>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:munder>
<mml:munder>
<mml:munder>
<mml:mrow>
<mml:mi mathvariant="script">C</mml:mi>
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="script">U</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mi>k</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>L</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>&#xfe38;</mml:mo>
</mml:munder>
<mml:mrow>
<mml:mtext>Scales</mml:mtext>
<mml:mo>:</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x223c;</mml:mo>
<mml:msup>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:munder>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>,</mml:mo>
<mml:mi>L</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where function <inline-formula>
<mml:math display="inline" id="im89">
<mml:mrow>
<mml:mi mathvariant="script">C</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denotes a convolution operation, <inline-formula>
<mml:math display="inline" id="im90">
<mml:mrow>
<mml:mi>&#x210b;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> realizes the feature aggregation mechanism with a convolution followed by a batch normalization and a Leaky-ReLU activation function. <inline-formula>
<mml:math display="inline" id="im91">
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im92">
<mml:mrow>
<mml:mi>&#x1d4b0;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> indicate up- and down-sampling operation respectively, and <inline-formula>
<mml:math display="inline" id="im93">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mo>,</mml:mo>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the concatenation.</p>
<p>The convolution operation <inline-formula>
<mml:math display="inline" id="im94">
<mml:mi mathvariant="script">C</mml:mi>
</mml:math>
</inline-formula> of the signal is shown in the following formula:</p>
<disp-formula>
<label>(17)</label>
<mml:math display="block" id="M17">
<mml:mrow>
<mml:mtext>&#x2003;&#x2003;</mml:mtext>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mi>j</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mi>K</mml:mi>
</mml:munderover>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mi>F</mml:mi>
</mml:munderover>
<mml:msubsup>
<mml:mi>&#x3c9;</mml:mi>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mi>b</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math display="inline" id="im95">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im96">
<mml:mi>&#x3c9;</mml:mi>
</mml:math>
</inline-formula> are inputs, outputs and weight parameters, respectively; <inline-formula>
<mml:math display="inline" id="im97">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>F</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the size of a single kernel; <italic>K</italic> is the number of kernels. Here, the kernel size of <inline-formula>
<mml:math display="inline" id="im98">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>F</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is shared for the whole input feature maps, called weight sharing.</p>
<p>It is worth mentioning that our proposed generator network is more efficient with fewer parameters. As for the decoder of the generator part, the depth of the feature map in a generation is symmetric to the encoder, and thus <inline-formula>
<mml:math display="inline" id="im99">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, also has <inline-formula>
<mml:math display="inline" id="im100">
<mml:mrow>
<mml:mn>12</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:msup>
<mml:mn>2</mml:mn>
<mml:mi>i</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, channels. The number of parameters in <inline-formula>
<mml:math display="inline" id="im101">
<mml:mrow>
<mml:msup>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> decoder stage of <inline-formula>
<mml:math display="inline" id="im102">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x1d4ab;</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> can be computed as:</p>
<disp-formula>
<label>(18)</label>
<mml:math display="block" id="M18">
<mml:mrow>
<mml:mtable>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x1d4ab;</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mi>d</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im103">
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the convolution kernel size, <inline-formula>
<mml:math display="inline" id="im104">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denotes the depth of the nodes.</p>
<p>In <italic>G</italic>, the convolution with a stride of 1 maintains the output length equal to the input length. A downsampling method <inline-formula>
<mml:math display="inline" id="im105">
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is employed to increase the receptive field of the original data by <inline-formula>
<mml:math display="inline" id="im106">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo stretchy="false">/</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. The signal data <inline-formula>
<mml:math display="inline" id="im107">
<mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mn>1</mml:mn>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mn>3</mml:mn>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mn>4</mml:mn>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mi>m</mml:mi>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is downsampled to <inline-formula>
<mml:math display="inline" id="im108">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mn>1</mml:mn>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mn>3</mml:mn>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mi>n</mml:mi>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. After convolving the data to obtain its minimum scale, corresponding upsampling <inline-formula>
<mml:math display="inline" id="im109">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is used along with interpolation to restore the data to its original scale. The signal is transformed from <inline-formula>
<mml:math display="inline" id="im110">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula>
<mml:math display="inline" id="im111">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>'</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> after processing, while the signal length remains unchanged.</p>
<p>The network is symmetric, with the first half using downsampling and the second half using upsampling. The network&#x2019;s construction affects the length of data processing and the shortest distance for processing sonar signals. When the downsampling block has <italic>L</italic> layers, the number of input points is at least <inline-formula>
<mml:math display="inline" id="im112">
<mml:mrow>
<mml:msup>
<mml:mn>2</mml:mn>
<mml:mi>L</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. An 11-layer symmetric network structure is used in the experiments, so the minimum input signal points are <inline-formula>
<mml:math display="inline" id="im113">
<mml:mrow>
<mml:msup>
<mml:mn>2</mml:mn>
<mml:mrow>
<mml:mn>11</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mn>2048</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. However, if only the signal data with a length of 2048 is input, it will output only one value after 11 downsamplings, leading to less feature representation. For <inline-formula>
<mml:math display="inline" id="im114">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mi>L</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> to be greater than 1 in the middle of the convolution, the data signal length for training should be at least <inline-formula>
<mml:math display="inline" id="im115">
<mml:mrow>
<mml:mi>J</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>2048</mml:mn>
<mml:mo>=</mml:mo>
<mml:mn>4096</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. The shortest detection distance of convolution is <inline-formula>
<mml:math display="inline" id="im116">
<mml:mi>&#x2112;</mml:mi>
</mml:math>
</inline-formula>, and its formula is as follows:</p>
<disp-formula>
<label>(19)</label>
<mml:math display="block" id="M19">
<mml:mrow>
<mml:mtext>&#x2003;&#x2003;&#x2003;&#x2003;</mml:mtext>
<mml:mi>&#x2112;</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mi>J</mml:mi>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where <italic>Fs</italic> stands for the sampling frequency, based on the example calculation, we can determine that the shortest detection range of the active sonar after processing is 12.3m in the network constructed with an 11-layer downsampling block.</p>
</sec>
</sec>
<sec id="s4_4">
<label>4.4</label>
<title>Reverberation suppression discriminator network</title>
<p>This section describes the discriminator part of building the adversarial network (D). The discriminator plays a crucial role in the GAN. It helps the generator to generate more realistic signals after the downsample (achieving unity in the frequency domain). It improves the GAN&#x2019;s ability to understand the training data, which lays the foundation for generating higher-quality sonar signals. In this will generate the signal <inline-formula>
<mml:math display="inline" id="im117">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>'</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and the required <inline-formula>
<mml:math display="inline" id="im118">
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> input. In order to improve the accuracy of D, a one-dimensional convolutional discriminator network of SkipNet suitable for underwater acoustic signals is constructed by referring to the DenseNet <xref ref-type="bibr" rid="B14">Gao et&#xa0;al. (2020)</xref> structure. The main structure and the overall structure are introduced in the following.</p>
<p>When training the network, the complexity of the GAN network will bring problems such as long training time and difficulty in discovering signal features. The discriminator network uses traditional convolution to process longer underwater acoustic signals, which requires the design of a deeper network, which wastes time and may lead to feature disappearance and network degradation problems. Introducing SkipNet blocks can reduce the construction of network depth and training time. The skip connection makes it a flexible and efficient neural network architecture with good accuracy and resource efficiency performance.</p>
<p>In the more compact convolution, where rich features are less readily available due to the limited number of parameters, the different features that emerge from activation map-pings derived from data points during model inference may indicate the existence of a set of unique descriptors that are necessary to distinguish between different classes of objects. In contrast, data points with low feature diversity may need to provide sufficient unique descriptors to make valid predictions, called random predictions. Random prediction can negatively affect the optimization process and impair the final performance. This paper presents a series of Skipnet block models structured to fuse the previously convolved information using a skip net whenever a portion of the convolution passes to show the importance of diversity.</p>
<p>As shown in <xref ref-type="fig" rid="f6"><bold>Figure&#xa0;6</bold></xref>, After downsampling the signal, the signal features will be put into the next layer and skipnet for multiple convolution. Finally, the results of two parts will be added. The feature signal obtained by addition is subjected to the same processing after downsampling once. This can be expressed using the following formula:</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Discriminator SkipNet.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1280305-g006.tif"/>
</fig>
<disp-formula>
<label>(20)</label>
<mml:math display="block" id="M20">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>&#x2113;</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>&#x2113;</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>&#x2113;</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x22ef;</mml:mo>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In Eq. (20), <italic>X</italic> represents the feature data after the convolution operation. <inline-formula>
<mml:math display="inline" id="im119">
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> stands for SkipBlock, which can be expressed as <inline-formula>
<mml:math display="inline" id="im120">
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo stretchy="false">(</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>D</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mi>N</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>C</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>v</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>B</mml:mi>
<mml:mi>N</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>A</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>|</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>D</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, and <italic>H</italic> stands for the main convolution process. <inline-formula>
<mml:math display="inline" id="im121">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>&#x2113;</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> adds the results above the main line and the sideline.</p>
<p>The RSWGAN-GP network uses convolution with a skip network with skipBlock to implement the discriminative signal network. Due to the difference between signal and image processing, the feature extraction is carried out in the form of one-dimensional convolution, and the subsequent dimension reduction processing is transformed from pooling to down-sampling processing, which is more suitable for the processing of signal features <xref ref-type="bibr" rid="B29">Nakaoka et&#xa0;al. (2021)</xref>. <xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7</bold></xref> shows the implementation of the discriminative signal network.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Discriminator network.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1280305-g007.tif"/>
</fig>
<p>In the discriminator network, the generator G is initialized and given a reverberation signal data vector as input. The generator generates signals based on the mapping of the input vector, creating generated data. The discriminator network then judges and identifies the generated data, producing a classification probability that results in a judgment (true or false). During discriminator training, actual data is also inputted to train the discriminator. The de-reverberation signal is labeled 1 (effective reverberation suppression), while the signal without effective reverberation suppression is labeled 0. The loss <italic>L<sub>D</sub>
</italic>is generated based on the generated result. In this paper, a non-densely connected network is designed to avoid redundancy and too many parameters in the signal processing network. The Desenet is designed without dense connections. Sparse connections are used instead, maintaining the same effect.</p>
<p>When the signal generated by the generator network meets the requirements of the signal-to-reverberation ratio, the generator network will stop training, and the final signal generator is the underwater reverberation suppression model.</p>
</sec>
<sec id="s4_5">
<label>4.5</label>
<title>Loss functions</title>
<p>This section presents the calculation of reverberation suppression loss for RSWGAN-GP. Due to the sinusoidal signal characteristics and the influence of phase difference, the original WGAN-GP calculation method cannot converge the training results. Therefore, a new method is used to preprocess the loss calculation before training.</p>
<p>This paper gives the main parameters of the generation network and the discriminant network used in the paper. The main parameters of the network will be described in <xref ref-type="table" rid="T1"><bold>Tables&#xa0;1</bold></xref>, <xref ref-type="table" rid="T2"><bold>2</bold></xref>.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Generation network.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Num</th>
<th valign="top" align="left">Layer</th>
<th valign="top" align="left">Act./Norm.</th>
<th valign="top" align="left">Output shape</th>
<th valign="top" align="left">Num</th>
<th valign="top" align="left">Layer</th>
<th valign="top" align="left">Act./Norm.</th>
<th valign="top" align="left">Output shape</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">0</td>
<td valign="top" align="left">Signal Latent vector</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">102400&#xd7;1</td>
<td valign="middle" align="left">14</td>
<td valign="top" align="left">upsample Conv1d</td>
<td valign="top" align="left">Datacrop12 LReLU/BN</td>
<td valign="top" align="left">50&#xd7;576<break/>50&#xd7;288</td>
</tr>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="left">Conv1d downsample</td>
<td valign="top" align="left">LReLU/BN<break/>-</td>
<td valign="top" align="left">102400&#xd7;24<break/>51200&#xd7;24</td>
<td valign="top" align="left">15</td>
<td valign="top" align="left">upsample Conv1d</td>
<td valign="top" align="left">Datacrop11 LReLU/BN</td>
<td valign="top" align="left">100&#xd7;522<break/>100&#xd7;264</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="left">Conv1d downsample</td>
<td valign="top" align="left">LReLU/BN<break/>-</td>
<td valign="top" align="left">51200&#xd7;48<break/>26500&#xd7;48</td>
<td valign="top" align="left">16</td>
<td valign="top" align="left">upsample Conv1d</td>
<td valign="top" align="left">Datacrop10 LReLU/BN</td>
<td valign="top" align="left">200&#xd7;504<break/>200&#xd7;240</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="left">Conv1d downsample</td>
<td valign="top" align="left">LReLU/BN<break/>-</td>
<td valign="top" align="left">26500&#xd7;96<break/>12800&#xd7;96</td>
<td valign="top" align="left">17</td>
<td valign="top" align="left">upsample Conv1d</td>
<td valign="top" align="left">Datacrop9 LReLU/BN</td>
<td valign="top" align="left">400&#xd7;456<break/>400&#xd7;216</td>
</tr>
<tr>
<td valign="top" align="left">4</td>
<td valign="top" align="left">Conv1d downsample</td>
<td valign="top" align="left">LReLU/BN<break/>-</td>
<td valign="top" align="left">12800&#xd7;120<break/>6400&#xd7;120</td>
<td valign="top" align="left">18</td>
<td valign="top" align="left">upsample Conv1d</td>
<td valign="top" align="left">Datacrop8 LReLU/BN</td>
<td valign="top" align="left">800&#xd7;408<break/>800&#xd7;192</td>
</tr>
<tr>
<td valign="top" align="left">5</td>
<td valign="top" align="left">Conv1d downsample</td>
<td valign="top" align="left">LReLU/BN<break/>-</td>
<td valign="top" align="left">6400&#xd7;144<break/>3200&#xd7;144</td>
<td valign="top" align="left">19</td>
<td valign="top" align="left">upsample Conv1d</td>
<td valign="top" align="left">Datacrop7 LReLU/BN</td>
<td valign="top" align="left">1600&#xd7;360<break/>1600&#xd7;168</td>
</tr>
<tr>
<td valign="top" align="left">6</td>
<td valign="top" align="left">Conv1d downsample</td>
<td valign="top" align="left">LReLU/BN<break/>-</td>
<td valign="top" align="left">3200&#xd7;168<break/>1600&#xd7;168</td>
<td valign="top" align="left">20</td>
<td valign="top" align="left">upsample Conv1d</td>
<td valign="top" align="left">Datacrop6 LReLU/BN</td>
<td valign="top" align="left">3200&#xd7;312<break/>3200&#xd7;144</td>
</tr>
<tr>
<td valign="top" align="left">7</td>
<td valign="top" align="left">Conv1d downsample</td>
<td valign="top" align="left">LReLU/BN<break/>-</td>
<td valign="top" align="left">1600&#xd7;192<break/>800&#xd7;192</td>
<td valign="top" align="left">21</td>
<td valign="top" align="left">upsample Conv1d</td>
<td valign="top" align="left">Datacrop5 LReLU/BN</td>
<td valign="top" align="left">6400&#xd7;576<break/>6400&#xd7;288</td>
</tr>
<tr>
<td valign="top" align="left">8</td>
<td valign="top" align="left">Conv1d downsample</td>
<td valign="top" align="left">LReLU/BN<break/>-</td>
<td valign="top" align="left">800&#xd7;216<break/>400&#xd7;216</td>
<td valign="top" align="left">22</td>
<td valign="top" align="left">upsample Conv1d</td>
<td valign="top" align="left">Datacrop4 LReLU/BN</td>
<td valign="top" align="left">6400&#xd7;264<break/>6400&#xd7;120</td>
</tr>
<tr>
<td valign="top" align="left">9</td>
<td valign="top" align="left">Conv1d downsample</td>
<td valign="top" align="left">LReLU/BN<break/>-</td>
<td valign="top" align="left">400&#xd7;240<break/>200&#xd7;240</td>
<td valign="top" align="left">23</td>
<td valign="top" align="left">upsample Conv1d</td>
<td valign="top" align="left">Datacrop3 LReLU/BN</td>
<td valign="top" align="left">12800&#xd7;216<break/>12800&#xd7;96</td>
</tr>
<tr>
<td valign="top" align="left">10</td>
<td valign="top" align="left">Conv1d downsample</td>
<td valign="top" align="left">LReLU/BN<break/>-</td>
<td valign="top" align="left">200&#xd7;264<break/>100&#xd7;264</td>
<td valign="top" align="left">24</td>
<td valign="top" align="left">upsample Conv1d</td>
<td valign="top" align="left">Datacrop2 LReLU/BN</td>
<td valign="top" align="left">25600&#xd7;168<break/>25600&#xd7;72</td>
</tr>
<tr>
<td valign="top" align="left">11</td>
<td valign="top" align="left">Conv1d downsample</td>
<td valign="top" align="left">LReLU/BN<break/>-</td>
<td valign="top" align="left">100&#xd7;288<break/>50&#xd7;288</td>
<td valign="top" align="left">25</td>
<td valign="top" align="left">upsample Conv1d</td>
<td valign="top" align="left">Datacrop1 LReLU/BN</td>
<td valign="top" align="left">51200&#xd7;120<break/>51200&#xd7;48</td>
</tr>
<tr>
<td valign="top" align="left">12</td>
<td valign="top" align="left">Conv1d downsample</td>
<td valign="top" align="left">LReLU/BN<break/>-</td>
<td valign="top" align="left">50&#xd7;288<break/>25&#xd7;288</td>
<td valign="top" align="left">26</td>
<td valign="top" align="left">upsample Conv1d</td>
<td valign="top" align="left">Datacrop0 LReLU/BN</td>
<td valign="top" align="left">102400&#xd7;72<break/>102400&#xd7;24</td>
</tr>
<tr>
<td valign="top" align="left">13</td>
<td valign="top" align="left">Conv1d</td>
<td valign="top" align="left">LReLU/BN</td>
<td valign="top" align="left">25&#xd7;288</td>
<td valign="top" align="left">27</td>
<td valign="top" align="left">Conv1d</td>
<td valign="top" align="left">tanh</td>
<td valign="top" align="left">102400&#xd7;1</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Discrimination network.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Num</th>
<th valign="top" align="center">Layer</th>
<th valign="top" align="center">Act./Norm.</th>
<th valign="top" align="center">Output shape</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">0</td>
<td valign="top" align="center">Signal Latent vector</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">102400&#xd7;1</td>
</tr>
<tr>
<td valign="bottom" align="center">1</td>
<td valign="top" align="center">Conv1d Downsample</td>
<td valign="top" align="center">LReLU/BN<break/>-</td>
<td valign="top" align="center">102400&#xd7;24 51200&#xd7;24</td>
</tr>
<tr>
<td valign="bottom" align="center">2</td>
<td valign="top" align="center">Conv1d Downsample</td>
<td valign="top" align="center">LReLU/BN<break/>-</td>
<td valign="top" align="center">51200&#xd7;48<break/>26500&#xd7;48</td>
</tr>
<tr>
<td valign="bottom" align="center">3</td>
<td valign="top" align="center">Conv1d Downsamplee</td>
<td valign="top" align="center">LReLU/BN<break/>-</td>
<td valign="top" align="center">26500&#xd7;96<break/>12800&#xd7;96</td>
</tr>
<tr>
<td valign="bottom" align="center">4</td>
<td valign="top" align="center">Conv1d Downsample</td>
<td valign="top" align="center">LReLU/BN DataAdd</td>
<td valign="top" align="center">12800&#xd7;120 6400&#xd7;120</td>
</tr>
<tr>
<td valign="bottom" align="center">5</td>
<td valign="top" align="center">Conv1d Downsample</td>
<td valign="top" align="center">LReLU/BN DataAdd</td>
<td valign="top" align="center">6400&#xd7;144<break/>3200&#xd7;144</td>
</tr>
<tr>
<td valign="bottom" align="center">6</td>
<td valign="top" align="center">Conv1d Downsample</td>
<td valign="top" align="center">LReLU/BN DataAdd</td>
<td valign="top" align="center">3200&#xd7;168<break/>1600&#xd7;168</td>
</tr>
<tr>
<td valign="bottom" align="center">7</td>
<td valign="top" align="center">Conv1d Downsample</td>
<td valign="top" align="center">LReLU/BN Dataadd4</td>
<td valign="top" align="center">1600&#xd7;192 800&#xd7;192</td>
</tr>
<tr>
<td valign="bottom" align="center">8</td>
<td valign="top" align="center">Conv1d Downsample</td>
<td valign="top" align="center">LReLU/BN DataAdd</td>
<td valign="top" align="center">800&#xd7;216<break/>400&#xd7;216</td>
</tr>
<tr>
<td valign="bottom" align="center">9</td>
<td valign="top" align="center">Conv1d Downsample</td>
<td valign="top" align="center">LReLU/BN DataAdd</td>
<td valign="top" align="center">400&#xd7;240<break/>200&#xd7;240</td>
</tr>
<tr>
<td valign="bottom" align="center">10</td>
<td valign="top" align="center">Conv1d Downsample</td>
<td valign="top" align="center">LReLU/BN Dataadd7</td>
<td valign="top" align="center">200&#xd7;264<break/>100&#xd7;264</td>
</tr>
<tr>
<td valign="bottom" align="center">11</td>
<td valign="top" align="center">Conv1d Downsample</td>
<td valign="top" align="center">LReLU/BN DataAdd</td>
<td valign="top" align="center">100&#xd7;288 50&#xd7;288</td>
</tr>
<tr>
<td valign="bottom" align="center">12</td>
<td valign="top" align="center">Conv1d Downsamplee</td>
<td valign="top" align="center">LReLU/BN DataAdd</td>
<td valign="top" align="center">50&#xd7;288<break/>25&#xd7;288</td>
</tr>
<tr>
<td valign="bottom" align="center">13</td>
<td valign="top" align="center">Conv1d Downsample</td>
<td valign="top" align="center">LReLU/BN DataAdd</td>
<td valign="top" align="center">25&#xd7;1<break/>13&#xd7;1</td>
</tr>
<tr>
<td valign="top" align="center">14</td>
<td valign="top" align="center">Linear</td>
<td valign="top" align="center">sigmoid</td>
<td valign="top" align="center">1&#xd7;1</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>, the signal input is 102400&#xd7;1, and the data mapped into the same scale by the generation network is also 102400&#xd7;1. In <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>, the signal input is 102400&#xd7;1, and the Classification probabilities are formed after the convolution and linear layer processing. The output scale is 1&#xd7;1, which is used as the output of the discriminator in GAN.</p>
<p>This paper uses the signal-to-reverberation ratio(SRR) to evaluate the signal after reverberation suppression. The SRR will be used to indicate the degree of signal suppression.</p>
<disp-formula>
<label>(21)</label>
<mml:math display="block" id="M21">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>R</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>10</mml:mn>
<mml:mi>l</mml:mi>
<mml:mi>o</mml:mi>
<mml:msub>
<mml:mi>&#x261;</mml:mi>
<mml:mrow>
<mml:mn>10</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In Eq. (21), <inline-formula>
<mml:math display="inline" id="im122">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the power of the activated sonar signal, <inline-formula>
<mml:math display="inline" id="im123">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the power of the reverberation signal, and <inline-formula>
<mml:math display="inline" id="im124">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the power of the target echo signal. SRR can be used to indicate the degree of reverberation suppression. Denote by <inline-formula>
<mml:math display="inline" id="im125">
<mml:mrow>
<mml:mi>&#x211b;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> in the following calculations.</p>
<p>The reverberation suppression network approach of RSWGAN-GP introduces the Wasserstein distance on top of the reverberation suppression model of GAN, and Eq. (22) shows the distance.</p>
<disp-formula>
<label>(22)</label>
<mml:math display="block" id="M22">
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
<mml:mstyle displaystyle="true"><mml:mo>&#x220f;</mml:mo></mml:mstyle>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:munder>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>s</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mo>,</mml:mo>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
<mml:mi>&#x211b;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mover accent="true">
<mml:mi>s</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x211b;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In Eq. (22), <inline-formula>
<mml:math display="inline" id="im126">
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the calculated expectation; <inline-formula>
<mml:math display="inline" id="im127">
<mml:mover accent="true">
<mml:mi>s</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
</mml:math>
</inline-formula> is the desired sonar signal after actual reverberation suppression, and <inline-formula>
<mml:math display="inline" id="im128">
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:math>
</inline-formula> is the sonar signal after raw reverberation suppression; <inline-formula>
<mml:math display="inline" id="im129">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mo>&#x220f;</mml:mo>
</mml:mstyle>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is <inline-formula>
<mml:math display="inline" id="im130">
<mml:mrow>
<mml:mi>&#x211b;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mover accent="true">
<mml:mi>s</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x211b;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, the set of joint probability distributions of the corresponding expected signal <inline-formula>
<mml:math display="inline" id="im131">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>probability distribution and the generated signal <inline-formula>
<mml:math display="inline" id="im132">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula>
<mml:math display="inline" id="im133">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the exact bound taken down; <inline-formula>
<mml:math display="inline" id="im134">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>s</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
<mml:mi>&#x211b;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mover accent="true">
<mml:mi>s</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x211b;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>is the expected value of the relative distance of the local discharge signal under the set of joint probability distributions <inline-formula>
<mml:math display="inline" id="im135">
<mml:mi>&#x3b3;</mml:mi>
</mml:math>
</inline-formula>.</p>
<p>In Eq. (22), the lower exact expectation bound is difficult to find so that the Wasserstein distance can be converted into the Kantorovich-Rubinstein dual form. Eq. (23) shows the Wasserstein distance&#x2019;s dual structure.</p>
<disp-formula>
<label>(23)</label>
<mml:math display="block" id="M23">
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mtext mathvariant="italic">sup</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
<mml:mi>D</mml:mi>
<mml:msub>
<mml:mo>&#x2225;</mml:mo>
<mml:mi>L</mml:mi>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:munder>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>s</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mover accent="true">
<mml:mi>s</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In Eq. (23). <inline-formula>
<mml:math display="inline" id="im136">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the distance cost function of discriminator <italic>D</italic>; <inline-formula>
<mml:math display="inline" id="im137">
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
<mml:mi>D</mml:mi>
<mml:msub>
<mml:mo>&#x2225;</mml:mo>
<mml:mi>L</mml:mi>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> indicates that the discriminator distance cost function satisfies the 1-Lipschitz restriction. The GP in RSWGAN-GP indicates the gradient penalty function to satisfy the 1-Lipschitz restriction, and its loss function is shown in Eq. (19).</p>
<disp-formula>
<label>(24)</label>
<mml:math display="block" id="M24">
<mml:mrow>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi>G</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mi>Z</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>z</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>Z</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi>D</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>E</mml:mi>
<mml:mi>z</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>z</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>Z</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mover accent="true">
<mml:mi>s</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mi>G</mml:mi>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mo>|</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mo>|</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mtext>&#x3bb;</mml:mtext>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mo>&#x223c;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">[</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>&#x2225;</mml:mo>
<mml:msub>
<mml:mo>&#x2207;</mml:mo>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
</mml:msub>
<mml:mi>D</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mo>|</mml:mo>
<mml:mi>C</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:msub>
<mml:mo>&#x2225;</mml:mo>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mrow>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>
<inline-formula>
<mml:math display="inline" id="im138">
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi>G</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>is the generator loss function; <inline-formula>
<mml:math display="inline" id="im139">
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi>D</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>is the discriminator loss function; <inline-formula>
<mml:math display="inline" id="im140">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>Z</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the suppressed sonar signal generated by the generator; <inline-formula>
<mml:math display="inline" id="im141">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>Z</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>is the prior distribution of the input sonar signal <italic>Z</italic>; <inline-formula>
<mml:math display="inline" id="im142">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>P</mml:mi>
<mml:msub>
<mml:mo>|</mml:mo>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the gradient penalty term; <inline-formula>
<mml:math display="inline" id="im143">
<mml:mtext>&#x3bb;</mml:mtext>
</mml:math>
</inline-formula> is the canonical term coefficient; and <inline-formula>
<mml:math display="inline" id="im144">
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:msub>
<mml:mo>&#x2225;</mml:mo>
<mml:mi>P</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the <italic>P</italic>-parameter.</p>
</sec>
</sec>
<sec id="s5">
<label>5</label>
<title>Experimental verification</title>
<p>This section validates the advancedness of the proposed model and method. For this purpose, we conducted marine experiments to verify that RSWGAN-GP can effectively suppress the reverberation of sonar signals and collect actual data in the field for verification. RSWGAN-GP, with other excellent reverberation suppression methods, compared to prove the effectiveness of the reverberation suppression method proposed. The following sections explain the detailed description and summary of the experiments.</p>
<p>In the experiment, the active sonar with 30-element with a self-receiving function is used to transmit a continuous wave (CW) signal with a <inline-formula>
<mml:math display="inline" id="im145">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>250</mml:mn>
<mml:mi>k</mml:mi>
<mml:mi>H</mml:mi>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> sampling rate and a <inline-formula>
<mml:math display="inline" id="im146">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>30</mml:mn>
<mml:mi>k</mml:mi>
<mml:mi>H</mml:mi>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> frequency. The size of the training data set is 6000 data samples in total, among which 2520 actual sonar data samples are obtained through experiments, and 3480 data samples are generated by the underwater active sonar reverberation simulation method. The simulation data and actual data are randomly arranged, and the training set and the verification set are in a 5:1 ratio. The actual data are used to verify the reverberation suppression effect of the model obtained at the end of training. The experiments were conducted in one of the bays in Qingdao. <xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8</bold></xref> shows the experimental scenario.</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Experimental environment and equipment. <bold>(A)</bold> Sonar and target underwater deployment status. <bold>(B)</bold> Sonar and target experiment terrain environment and experiment method. <bold>(C)</bold> Active sonar equipment during the experiment. <bold>(D)</bold> Target equipment during the experiment.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1280305-g008.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8A</bold></xref> is a schematic diagram of the relative position of the active sonar and the target during the test. In the figure, <inline-formula>
<mml:math display="inline" id="im147">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>200</mml:mn>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the shortest distance from the relative position, and <inline-formula>
<mml:math display="inline" id="im148">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>300</mml:mn>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the longest distance from the relative position, active sonar is at the same position as the target at sea level <inline-formula>
<mml:math display="inline" id="im149">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mn>5</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>10</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. In <xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8B</bold></xref>, the experiment in the harbor can minimize the error caused by the ship&#x2019;s swing caused by wind and waves. The narrowest point of the harbor exit is 200m, and the target ship tows the target at a distance of 200-300m from the active sonar. <xref ref-type="fig" rid="f8"><bold>Figures&#xa0;8A, B</bold></xref> show the experimental active sonar equipment and the detected target, respectively. The detected target is a cylindrical object with an internal cavity with a diameter of 533mm and a length of 3m. The essential experimental data of the validation method are obtained in sea trials.</p>
<p>The data are collected and used as the validation set to validate each epoch in the training process. The active sonar of the signal is subjected to reverberation suppression, and the signal change during training is shown in <xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9</bold></xref>.</p>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Active sonar reverberation suppression changing graph.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1280305-g009.tif"/>
</fig>
<p>In <xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9</bold></xref>, the time domain diagram shows that the model is trained by mixing simulation data with actual acquisition data, and the trained model is used to process the experimental results of the active sonar signal data of the ocean experiment. The figure shows the results of sonar signal processing of model pairs produced by different iterations. In the Epoch 1-50 iteration training process, the processed sonar signal still has the phenomenon of signal chipping. Still, it shows the state of strong reverberation, and the target position cannot be visually observed from the time domain. Starting from Epoch 60-100, the chipping phenomenon of the echo signal disappears, the correct sinusoidal signal can be restored, and the neural network can already find the desired target feature state. From Epoch 110 onwards, the target can be precisely located, and in subsequent training iterations, the target echo feature can be highlighted while suppressing the strong reverberation state during sonar propagation. When the model training iteration is above Epoch 210, the target echo signal can be observed macroscopically from the time domain diagram. Reverberation changes in the middle state of the network as shown below:</p>
<p>The above-processed feature map, which results from the processing of the network <inline-formula>
<mml:math display="inline" id="im150">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, results in a 288&#xd7;25 feature array. Feature array is where the network will generate features, which will then be upsampled to recover the signal. In <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10</bold></xref>, some features of the signal increase and decrease as the epoch increases, representing anti-reverberation operations. From <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10A</bold></xref>, it can be seen that the convolution features of the signal at the beginning are dispersed to each corner of the array. After iteration, some features weaken, as shown in <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10B</bold></xref>, which is reflected in the signal that the reverberation part begins to weaken, and then the features disappear, as shown in <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10C</bold></xref>. Finally, the target echo features are enhanced, as shown in <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10D</bold></xref>, and the corresponding display in the echo signal explored by sonar is the enhancement of the echo signal.</p>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p>The intermediate state of the network changes. <bold>(A)</bold> Initial network intermediate layer processing result. <bold>(B)</bold> After 50 iterations. <bold>(C)</bold> After 100 iterations. <bold>(D)</bold> After 150 iterations.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1280305-g010.tif"/>
</fig>
<p>During the training process, convergence is achieved by continuously correcting errors. The loss curve changes during the training process of RSWGAN-GP, including the generator loss curve and the discriminator loss curve, as shown in <xref ref-type="fig" rid="f11"><bold>Figure&#xa0;11</bold></xref>.</p>
<fig id="f11" position="float">
<label>Figure&#xa0;11</label>
<caption>
<p>Results for different parameters. <bold>(A)</bold> The discriminator loss varies with epoch. <bold>(B)</bold> The generator loss varies with epoch.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1280305-g011.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10</bold></xref> shows the training loss curves of RSWGAN-GP under various weight decay and learning rate(lr) settings. <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10A</bold></xref> is the line chart of the generator loss variation, and <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10B</bold></xref> is the curve chart of the discriminator loss variation. By setting different parameter values, rapid convergence of the network can be achieved when weight decay and learning rate are set to 0.00001 and 0.0001, respectively. As shown in the figure, in the WGAN-GP network, the generator loss continuously approaches 0, and the discriminator loss continuously approaches 0. The loss of the generator shows a rising trend, and the loss of the discriminator shows a decreasing trend. During training, the generator and discriminator are in a state of mutual competition, and their loss values should fluctuate up and down. The trend shown in the figure represents the main direction.</p>
<p>For applications where target detection will be performed after signal processing, the difference between before and after signal processing is shown in <xref ref-type="fig" rid="f12"><bold>Figure&#xa0;12</bold></xref>.</p>
<fig id="f12" position="float">
<label>Figure&#xa0;12</label>
<caption>
<p>Matching field result.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1280305-g012.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="f12"><bold>Figure&#xa0;12</bold></xref> shows the target position detection map after nonlinear processors in matching fields on CW signal processing <xref ref-type="bibr" rid="B37">Sun and Li (2019)</xref>, where the target indicates the target&#x2019;0s location, which is about 204 m&#x2014;the comparison between the original data and the data after RSWGAN-GP processing is shown. In the original data, before the target position, there is the influence of reverberation on its judgment. When making a prediction, the highest correlation position changes and the highest point in front is judged as the target position. When comparing the red line with the blue line, it is evident that the signal processed by RSWGAN-GP can reduce the influence of reverberation when making target judgments, increasing the success rate of target judgments. Here, four contrasting points are selected, as shown in <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Target detection correlation comparison.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Num</th>
<th valign="top" align="center">Before treatment</th>
<th valign="top" align="center">After treatment</th>
<th valign="top" align="center">Difference</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.417</td>
<td valign="top" align="center">0.205</td>
<td valign="top" align="center">-0.212</td>
</tr>
<tr>
<td valign="top" align="center">2</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.419</td>
<td valign="top" align="center">-0.581</td>
</tr>
<tr>
<td valign="top" align="center">3</td>
<td valign="top" align="center">0.553</td>
<td valign="top" align="center">0.32</td>
<td valign="top" align="center">-0.233</td>
</tr>
<tr>
<td valign="top" align="center">4</td>
<td valign="top" align="center">0.962</td>
<td valign="top" align="center">0.984</td>
<td valign="top" align="center">0.022</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Among the data points, the first three are reverberation points, and the comparison shows that the reverberation is well suppressed, and the fourth is the target point. The SRR is improved after echo suppression. The data comparison can prove that the well-trained model can suppress the reverberation well, making the correlation increase by 0.022 and the reverberation part decrease by 0.3 on average.</p>
<p>There are many excellent algorithms in water acoustics reverberation suppression, here will use collected data for various methods to compare with the method proposed in this paper. The comparison results after processing the sonar signal are shown in <xref ref-type="fig" rid="f13"><bold>Figure&#xa0;13</bold></xref>.</p>
<fig id="f13" position="float">
<label>Figure&#xa0;13</label>
<caption>
<p>Sonar signal comparison of reverberation suppression methods. <bold>(A)</bold> Original signal diagram and signal diagram processed by 2D-AR Prewhitening, LMS, Frft, PCI-SVM, RSWGAN-GP methods. <bold>(B)</bold> SRR changes with SINR after model reverberation suppression.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1280305-g013.tif"/>
</fig>
<p>In <xref ref-type="fig" rid="f13"><bold>Figure&#xa0;13A</bold></xref>, the Original signal is unprocessed, the reverberation power is 0.398, the signal echo power is 0.432, and its SRR is 0.361dB, the highlighted red part indicates the position of the target echo, which cannot be effectively identified from the figure. 2D-AR PreWhitener is used to eliminate correlation and frequency correlation in the signal <xref ref-type="bibr" rid="B25">Li et&#xa0;al. (2008)</xref> so that the signal is flatter in the frequency domain, and the processed echo sonar signal shows the position of the target echo. The least mean square filter (LMS) processes the echo signal <xref ref-type="bibr" rid="B23">Kim et&#xa0;al. (2000)</xref>, and the reverberation component is suppressed, decreasing amplitude. Adaptive fractional Fourier transforms (FrFt) for suppressing reverberation <xref ref-type="bibr" rid="B50">Yu et&#xa0;al. (2022)</xref>, Although the reverberation is partially suppressed, the actual impact is not apparent. The PCI-SVM reverberation suppression method combines Principal Component Inversion (PCI) and Support Vector Machine (SVM) techniques <xref ref-type="bibr" rid="B40">Wang et&#xa0;al. (2021)</xref>. This method selects suitable and effective feature values through SVM to extract the main features for reverberation suppression. The figure shows that this method performs better than the previous ones. We present the impact of the RSWGAN-GP method proposed in this paper. Compared to the excellent methods, the reverberation component is effectively suppressed, and the echo component is more visible and prominent. The detailed data comparison is presented in <xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Experimental data comparison table.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center"/>
<th valign="top" align="center">Reverberation power</th>
<th valign="top" align="center">Echo Power</th>
<th valign="top" align="center">SRR(dB)</th>
<th valign="top" align="center">Improve SRR(dB)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">Original Data</td>
<td valign="top" align="center">0.398</td>
<td valign="top" align="center">0.432</td>
<td valign="top" align="center">0.361</td>
<td valign="top" align="center">0</td>
</tr>
<tr>
<td valign="top" align="center">2D-AR PreWhitener</td>
<td valign="top" align="center">0.096</td>
<td valign="top" align="center">0.087</td>
<td valign="top" align="center">0.35</td>
<td valign="top" align="center">-0.011</td>
</tr>
<tr>
<td valign="top" align="center">LMS</td>
<td valign="top" align="center">0.036</td>
<td valign="top" align="center">0.051</td>
<td valign="top" align="center">1.525</td>
<td valign="top" align="center">1.164</td>
</tr>
<tr>
<td valign="top" align="center">FrFt</td>
<td valign="top" align="center">0.086</td>
<td valign="top" align="center">0.094</td>
<td valign="top" align="center">0.414</td>
<td valign="top" align="center">0.053</td>
</tr>
<tr>
<td valign="top" align="center">PCI-SVM</td>
<td valign="top" align="center">0.0105</td>
<td valign="top" align="center">0.0562</td>
<td valign="top" align="center">7.2925</td>
<td valign="top" align="center">6.932</td>
</tr>
<tr>
<td valign="top" align="center">RSWGAN-GP</td>
<td valign="top" align="center">0.01</td>
<td valign="top" align="center">0.1265</td>
<td valign="top" align="center">11.021</td>
<td valign="top" align="center">10.659</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>
<xref ref-type="fig" rid="f13"><bold>Figure&#xa0;13B</bold></xref> compares SRR results processed by different methods under different SINR environments. In actual use, the reverberation signal processing SINR is between 0 dB and 10 dB. In order to reflect the processing ability of a high reverberation environment, the signal is mainly concentrated between -10 dB and 10 dB (data comparison in the middle of the two blue colors in the figure). As can be seen from the figure, RSWGAN-GP and PCI-SVM methods are significantly superior to other methods. At the same time, RSWGAN-GP is 3 dB higher than the PCI-SVM method at SINR -10 dB, and the advantages become more evident as SINR increases. The advantage of RSWGAN-GP is that it can learn many high reverberation data to improve the processing ability of high-reverberation data. In <xref ref-type="fig" rid="f13"><bold>Figures&#xa0;13A, B</bold></xref>, we compare different reverberation suppression methods to demonstrate the effectiveness of RSWGAN-GP in suppressing reverberation.</p>
<p>
<xref ref-type="fig" rid="f14"><bold>Figure&#xa0;14</bold></xref> shows the processing results and time-frequency distribution of raw signal, AR pre-whitening, LMS, FrFt, PCI-SVM and RSWGAN-GP, respectively. Experimental results show that the algorithm can effectively suppress reverberation and extract target echo components under high SRR conditions. At the same time, the time-frequency structure of the target highlight echo remains unchanged. The processing results of the original signal distribution are shown in <xref ref-type="fig" rid="f14"><bold>Figure&#xa0;14A</bold></xref>. It can be seen that there is some substantial reverberation interference near the target echo, but its energy is weaker than the target echo. The signal processed by 2D-AR PreWhitener, LMS, FrFt and PCI-SVM is shown in <xref ref-type="fig" rid="f14"><bold>Figures 14B&#x2013;E</bold></xref>, and the target echo energy is still not obvious. As shown in <xref ref-type="fig" rid="f14"><bold>Figure&#xa0;14F</bold></xref>, the RSWAGN-GP processing results show that the overall reverberation background has been effectively removed, and only a tiny part of weak background interference remains near the target echo.</p>
<fig id="f14" position="float">
<label>Figure&#xa0;14</label>
<caption>
<p>Time-frequency distributions. <bold>(A)</bold> Signal original state .<bold>(B)</bold> After 2D-AR PreWhitener processing. <bold>(C)</bold> After LMS processing. <bold>(D)</bold> After FrFt processing. <bold>(E)</bold> After PCI-SVM processing. <bold>(F)</bold> After RSWGAN-GP processing.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-10-1280305-g014.tif"/>
</fig>
<p>The superiority of the proposed method can be seen in <xref ref-type="fig" rid="f12"><bold>Figures&#xa0;12</bold></xref>, <xref ref-type="fig" rid="f13"><bold>13</bold></xref>, and the detailed parameters are listed here for comparison, including reverberation amplitude, echo amplitude, reverberation power, echo power and SRR. The specific parameters are shown in <xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>.</p>
<p>The findings in <xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref> demonstrate that the RSWGAN-GP method proposed in this study is the most efficient technique for attenuating reverberation compared to the other methods evaluated. With a remarkable improvement in the reverberation ratio by 15dB, RSWGAN-GP significantly enhances speech quality from 0.83dB to 16.79 dB. The PCI-SVM algorithm comes in a close second, mainly when the optimal rank is 42, as determined by the SVM classification experiment, resulting in a significant improvement in the SRR by around 13dB. The 2D-AR PreWhitener, LMS, and FrFt techniques also improve the reverberant environment by -0.024dB, -2.68dB, and 0.12dB, respectively. Nevertheless, their ultimate effects are less substantial than those of RSWGAN-GP and PCI-SVM.</p>
<p>The effectiveness of the RSWGAN-GP method proposed in this paper for suppressing reverberation in sonar signals is demonstrated through experiments. Using experimental data from the ocean during training shows that features are extracted and amplified during the training iterations. By comparing the results of the algorithms, it is evident that this method can significantly improve signal quality. The improved SRR is 15.169 dB, demonstrating the method&#x2019;s effectiveness and superiority for reverberation suppression.</p>
</sec>
<sec id="s6" sec-type="conclusion">
<label>6</label>
<title>Conclusion</title>
<p>Reverberation suppression of echo signals is a crucial issue in active sonar systems. This paper presents a novel RSWGAN-GP method for suppressing reverberation in sonar signals using the generative adversarial network. This reverberation suppression network employs a one-dimensional convolutional network to process the signal content. RSWGAN-GP refers to U-net and DenseNet, using skip network structure and dense connection network to suppress sonar signal reverberation efficiently. Comparison using actual collected data demonstrates the effectiveness of the proposed method, which can effectively suppress the active sonar reverberation signal, improving SRR by approximately 10 dB, better than other methods.</p>
<p>Through several experiments, it was discovered that the processing model trained by RSWGAN-GP has specific requirements for the transmission pulse width. The transmission pulse width of the signal needs to be adjusted for different detection distances, but the model&#x2019;s pulse width for echo signal processing is not sensitive to the width. However, this relationship may be specific to the dataset used. In future research, efforts will be made to improve the model&#x2019;s generalization capabilities to process signals with different pulse widths efficiently.</p>
</sec>
<sec id="s7" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material. Further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec id="s8" sec-type="author-contributions">
<title>Author contributions</title>
<p>ZW: Writing &#x2013; original draft. HZ: Conceptualization, Supervision, Writing &#x2013; review &amp; editing. WH: Conceptualization, Formal Analysis, Methodology, Resources, Supervision, Writing &#x2013; review &amp; editing. XC: Methodology, Supervision, Writing &#x2013; review &amp; editing. NT: Data curation, Supervision, Writing &#x2013; review &amp; editing. YA: Data curation, Methodology, Writing &#x2013; review &amp; editing.</p>
</sec>
</body>
<back>
<sec id="s9" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This work was financially supported by the National Natural Science Foundation of China (Grant No. 91938204, 41527901, 61701462 and 62271459), the Marine S&amp;T fund of Shandong Province for Pilot National Laboratory for Marine Science and Technology (Qingdao) (No.2018SDKJ0210), National Defense Science and Technology Innovation special Zone Project: Marine Science and Technology Collaborative Innovation Center (22-05-CXZX-04-01-02), Open Studio for Marine High Frequency Communications.</p>
</sec>
<sec id="s10" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abrahamyan</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Ziatchin</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Deligiannis</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Bias loss for mobile neural networks</article-title>. <source>Proceedings of the IEEE/CVF International Conference on Computer Vision</source> <volume>1</volume>, <fpage>6556</fpage>&#x2013;<lpage>6566</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ICCV48922.2021.00649</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ashraf</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Jeong</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>C. H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Underwater ambient-noise removing gan based on magnitude and phase spectra</article-title>. <source>IEEE Access</source> <volume>9</volume>, <fpage>24513</fpage>&#x2013;<lpage>24530</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ACCESS.2021.3051263</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bharathi</surname> <given-names>B. M. R.</given-names>
</name>
<name>
<surname>Mohanty</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Time delay estimation in reverberant and low snr environment by emd based maximum likelihood method</article-title>. <source>Measurement</source> <volume>137</volume>, <fpage>655</fpage>&#x2013;<lpage>663</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.measurement.2019.01.096</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Chalapathy</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Menon</surname> <given-names>A. K.</given-names>
</name>
<name>
<surname>Chawla</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Robust, deep and inductive anomaly detection</article-title>,&#x201d; in <conf-name>Machine Learning and Knowledge Discovery in Databases: European Conference, ECML PKDD 2017, Skopje, Macedonia</conf-name>, (<publisher-loc>Skopje, Macedonia</publisher-loc>: <publisher-name>Springer</publisher-name>), <conf-date>September 18&#x2013;22, 2017</conf-date>. <fpage>36</fpage>&#x2013;<lpage>51</lpage>, <italic>Proceedings, Part I 10</italic>Springer.</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chandrasekaran</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Sanghavi</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Parrilo</surname> <given-names>P. A.</given-names>
</name>
<name>
<surname>Willsky</surname> <given-names>A. S.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Rank-sparsity incoherence for matrix decomposition</article-title>. <source>SIAM J. Optimization</source> <volume>21</volume>, <fpage>572</fpage>&#x2013;<lpage>596</lpage>. doi: <pub-id pub-id-type="doi">10.1137/090761793</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>J.</given-names>
</name>
<name>
<surname>An</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Time&#x2013;frequency mask-aware bidirectional lstm: A deep learning approach for underwater acoustic signal separation</article-title>. <source>Sensors</source> <volume>22</volume>, <fpage>5598</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s22155598</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Fang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>An</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Method of spatially correlated wideband ambient noise simulation for underwater acoustic array</article-title>,&#x201d; in <conf-name>In INTER-NOISE and NOISE-CON Congress and Conference Proceedings</conf-name>  (<publisher-loc>Chicago, IL</publisher-loc>: <publisher-name>Institute of Noise Control Engineering</publisher-name>), Vol. <volume>258</volume>. <fpage>3160</fpage>&#x2013;<lpage>3169</lpage>.</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Costas</surname> <given-names>J. P.</given-names>
</name>
</person-group> (<year>1984</year>). <article-title>A study of a class of detection waveforms having nearly ideal range&#x2014;doppler ambiguity properties</article-title>. <source>Proc. IEEE</source> <volume>72</volume>, <fpage>996</fpage>&#x2013;<lpage>1009</lpage>. doi: <pub-id pub-id-type="doi">10.1109/PROC.1984.12967</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cox</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Lai</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>1994</year>). <article-title>Geometric comb waveforms for reverberation suppression</article-title>. <source>In Proc. 1994 28th Asilomar Conf. Signals Syst. Comput. (IEEE)</source> <volume>2</volume>, <fpage>1185</fpage>&#x2013;<lpage>1189</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACSSC.1994.471646</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Diniz</surname> <given-names>P. S.</given-names>
</name>
<name>
<surname>Da Silva</surname> <given-names>E. A.</given-names>
</name>
<name>
<surname>Netto</surname> <given-names>S. L.</given-names>
</name>
</person-group> (<year>2010</year>). <source>Digital signal processing: system analysis and design</source> (<publisher-loc>UK</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name>).</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dong</surname> <given-names>H.-W.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Y.-H.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Convolutional generative adversarial networks with binary neurons for polyphonic music generation</article-title>. <source>arXiv preprint arXiv:1804.09399</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1804.09399</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Faure</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>1964</year>). <article-title>Theoretical model of reverberation noise</article-title>. <source>J. Acoustical Soc. America</source> <volume>36</volume>, <fpage>259</fpage>&#x2013;<lpage>266</lpage>. doi: <pub-id pub-id-type="doi">10.1121/1.1918943</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Freburger</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Tufts</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>1997</year>). &#x201c;<article-title>Rapidly adaptive signal detection using the principal component inverse (pci) method</article-title>,&#x201d; in <conf-name>Conference Record of the Thirty-First Asilomar Conference on Signals, Systems and Computers (Cat. No. 97CB36136)</conf-name> (<publisher-loc>Pacific Grove, CA</publisher-loc>: <publisher-name>IEEE</publisher-name>), Vol. <volume>1</volume>. <fpage>765</fpage>&#x2013;<lpage>769</lpage>, IEEE.</citation>
</ref>
<ref id="B14">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Gao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>He</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Recognition method for underwater acoustic target based on dcgan and densenet</article-title>,&#x201d; in <conf-name>2020 IEEE 5th International Conference on Image, Vision and Computing (ICIVC)</conf-name> (<publisher-loc>Beijing, China</publisher-loc>: <publisher-name>IEEE</publisher-name>). <fpage>215</fpage>&#x2013;<lpage>221</lpage>.</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Han</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Woods</surname> <given-names>W. S.</given-names>
</name>
<name>
<surname>Merks</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Learning spectral mapping for speech dereverberation and denoising</article-title>. <source>IEEE/ACM Trans. Audio Speech Lang. Process.</source> <volume>23</volume>, <fpage>982</fpage>&#x2013;<lpage>992</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TASLP.2015.2416653</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>He</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Hao</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Underwater reverberation suppression via attention and cepstrum analysis-guided network</article-title>. <source>J. Mar. Sci. Eng.</source> <volume>11</volume>, <fpage>313</fpage>. doi: <pub-id pub-id-type="doi">10.3390/jmse11020313</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Hodges</surname> <given-names>R. P.</given-names>
</name>
</person-group> (<year>2011</year>). <source>Underwater acoustics: Analysis, design and performance of sonar</source> (<publisher-loc>United Kingdom</publisher-loc>: <publisher-name>John Wiley &amp; Sons</publisher-name>).</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Rao</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>A shallow seafloor reverberation simulation method based on generative adversarial networks</article-title>. <source>Appl. Sci.</source> <volume>13</volume>, <fpage>595</fpage>. doi: <pub-id pub-id-type="doi">10.3390/app13010595</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Research on analyzing and processing methods of ocean sonar signals</article-title>. <source>J. Coast. Res.</source> <volume>94</volume>, <fpage>208</fpage>&#x2013;<lpage>212</lpage>. doi: <pub-id pub-id-type="doi">10.2112/SI94-044.1</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Innami</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Kasai</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Nmf-based environmental sound source separation using time-variant gain features</article-title>. <source>Comput. Mathematics Appl.</source> <volume>64</volume>, <fpage>1333</fpage>&#x2013;<lpage>1342</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.camwa.2012.03.077</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname> <given-names>K.-Y.</given-names>
</name>
<name>
<surname>Cai</surname> <given-names>Z.-M.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>Z.-B.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Forward and backward prediction model based signal detection in sea bottom reverberation</article-title>. <source>Acta ELECTONICA Sin.</source> <volume>35</volume>, <fpage>1766</fpage>.</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kay</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Salisbury</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>1990</year>). <article-title>Improved active sonar detection using autoregressive prewhiteners</article-title>. <source>J. Acoustical Soc. America</source> <volume>87</volume>, <fpage>1603</fpage>&#x2013;<lpage>1611</lpage>. doi: <pub-id pub-id-type="doi">10.1121/1.399408</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname> <given-names>K. M.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Youn</surname> <given-names>D. H.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Adaptive processing technique for enhanced cfar detecting performance in active sonar systems</article-title>. <source>IEEE Trans. Aerospace Electronic Syst.</source> <volume>36</volume>, <fpage>693</fpage>&#x2013;<lpage>700</lpage>. doi: <pub-id pub-id-type="doi">10.1109/7.845261</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Koh</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Chia</surname> <given-names>C. S.</given-names>
</name>
<name>
<surname>Tan</surname> <given-names>B. A.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Underwater signal denoising using deep learning approach</article-title>,&#x201d; in <source>Global Oceans 2020: SINGAPORE&#x2013;US Gulf Coast</source> (<publisher-loc>Biloxi, MS, USA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Hou</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Detection in reverberation using space time adaptive prewhiteners</article-title>. <source>J. acoustical Soc. America</source> <volume>124</volume>, <fpage>EL236</fpage>&#x2013;<lpage>EL242</lpage>. doi: <pub-id pub-id-type="doi">10.1121/1.2963187</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>LiXiukun</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Mu</surname> <given-names>W.</given-names>
</name>
</person-group>. (<year>2015</year>). <article-title>Blind separation of unde rwater target echoes in reverberation back ground</article-title>. <source>Journal of Harbin Engineering University</source> <volume>36</volume>, <fpage>62</fpage>.</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Gulliver</surname> <given-names>T. A.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Zeng</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>An underwater bistatic positioning system based on an acoustic vector sensor and experimental investigation</article-title>. <source>Appl. Acoustics</source> <volume>171</volume>, <fpage>107558</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.apacoust.2020.107558</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mejjaoli</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Omri</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Spectral theorems associated with the directional short-time fourier transform</article-title>. <source>J. Pseudo-Differential Operators Appl.</source> <volume>11</volume>, <fpage>15</fpage>&#x2013;<lpage>54</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11868-019-00308-z</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Nakaoka</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Inoue</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Makino</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Teacher-student learning for low-latency online speech enhancement using wave-u-net</article-title>,&#x201d; in <conf-name>ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</conf-name>, (<publisher-loc>Toronto, ON, Canada</publisher-loc>: <publisher-name>IEEE</publisher-name>). <fpage>661</fpage>&#x2013;<lpage>665</lpage>.</citation>
</ref>
<ref id="B30">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Neasham</surname> <given-names>J. A.</given-names>
</name>
<name>
<surname>Menon</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Hinton</surname> <given-names>O. R.</given-names>
</name>
</person-group> (<year>2007</year>). &#x201c;<article-title>Broadband, ultra-sparse array processing for low complexity multibeam sonar imaging</article-title>,&#x201d; in <source>OCEANS 2007-Europe</source> (<publisher-loc>Aberdeen, UK</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ozaktas</surname> <given-names>H. M.</given-names>
</name>
<name>
<surname>Arikan</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Kutay</surname> <given-names>M. A.</given-names>
</name>
<name>
<surname>Bozdagt</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>1996</year>). <article-title>Digital computation of the fractional fourier transform</article-title>. <source>IEEE Trans. Signal Process.</source> <volume>44</volume>, <fpage>2141</fpage>&#x2013;<lpage>2150</lpage>. doi: <pub-id pub-id-type="doi">10.1109/78.536672</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qian</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Adaptive algorithms for low-rank and sparse matrix recovery with truncated nuclear norm</article-title>. <source>Int. J. Mach. Learn. Cybernetics</source> <volume>10</volume>, <fpage>1341</fpage>&#x2013;<lpage>1355</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s13042-018-0814-9</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ratnarajah</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Ananthabhotla</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Ithapu</surname> <given-names>V. K.</given-names>
</name>
<name>
<surname>Hoffmann</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Manocha</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Calamia</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Towards improved room impulse response estimation for speech recognition</article-title>,&#x201d; in <conf-name>ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</conf-name>. (<publisher-loc>Rhodes Island, Greece</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>5</lpage>.</citation>
</ref>
<ref id="B34">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ronneberger</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Fischer</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Brox</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>U-net: Convolutional networks for biomedical image segmentation</article-title>,&#x201d; in <conf-name>Medical Image Computing and Computer-Assisted Intervention&#x2013;MICCAI 2015: 18th International Conference</conf-name>, <conf-loc>Munich, Germany</conf-loc> (<publisher-name>Springer</publisher-name>), <conf-date>October 5-9, 2015</conf-date>. <fpage>234</fpage>&#x2013;<lpage>241</lpage>, Proceedings, Part III 18Springer.</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Singer</surname> <given-names>A. C.</given-names>
</name>
<name>
<surname>Nelson</surname> <given-names>J. K.</given-names>
</name>
<name>
<surname>Kozat</surname> <given-names>S. S.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Signal processing for underwater acoustic communications</article-title>. <source>IEEE Commun. Magazine</source> <volume>47</volume>, <fpage>90</fpage>&#x2013;<lpage>96</lpage>. doi: <pub-id pub-id-type="doi">10.1109/MCOM.2009.4752683</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Song</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>He</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Real-time object detection for auvs using self-cascaded convolutional neural networks</article-title>. <source>IEEE J. Oceanic Eng.</source> <volume>46</volume>, <fpage>56</fpage>&#x2013;<lpage>67</lpage>. doi: <pub-id pub-id-type="doi">10.1109/JOE.2019.2950974</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Robust adaptive beamforming method for active sonar in single snapshot</article-title>. <source>MATEC Web Conferences (EDP Sciences)</source> <volume>283</volume>, <fpage>03006</fpage>. doi: <pub-id pub-id-type="doi">10.1051/matecconf/201928303006</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Ning</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Research on the statistical modeling and simulation for interface reverberation</article-title>. <source>IEEE</source> <volume>9</volume>, <fpage>566</fpage>&#x2013;<lpage>570</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ICCSIT.2010.5563616</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Du</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>A maximum likelihood approach to deep neural network based speech dereverberation</article-title>,&#x201d; in <conf-name>2017 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)</conf-name>. (<publisher-loc>Kuala Lumpur, Malaysia</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>155</fpage>&#x2013;<lpage>158</lpage>.</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Peng</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Study on an anti-reverberation method based on pci-svm</article-title>. <source>Appl. Acoustics</source> <volume>182</volume>, <fpage>108189</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.apacoust.2021.108189</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ward</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2001</year>). &#x201c;<article-title>The use of sinusoidal frequency modulated pulses for low-doppler detection</article-title>,&#x201d; in <conf-name>MTS/IEEE Oceans 2001. An Ocean Odyssey. Conference Proceedings (IEEE Cat. No. 01CH37295)</conf-name> (<publisher-loc>Honolulu, HI, USA</publisher-loc>: <publisher-name>IEEE</publisher-name>), Vol. <volume>4</volume>. <fpage>2147</fpage>&#x2013;<lpage>2151</lpage>.</citation>
</ref>
<ref id="B42">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Weiss</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Singer</surname> <given-names>A. C.</given-names>
</name>
<name>
<surname>Wornell</surname> <given-names>G. W.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Towards robust data-driven underwater acoustic localization: A deep cnn solution with performance guarantees for model mismatch</article-title>,&#x201d; in <conf-name>ICASSP 2023- 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</conf-name>. (<publisher-loc>Rhodes Island, Greece</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>5</lpage>.</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Widrow</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Mantey</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Griffiths</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Goode</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>1967</year>). <article-title>Adaptive antenna systems</article-title>. <source>Proc. IEEE</source> <volume>55</volume>, <fpage>2143</fpage>&#x2013;<lpage>2159</lpage>. doi: <pub-id pub-id-type="doi">10.1109/PROC.1967.6092</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wu</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Cen</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Cai</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2008</year>). &#x201c;<article-title>Svr-based approach to improve active sonar detection in reverberation</article-title>,&#x201d; in <conf-name>2008 IEEE International Joint Conference on Neural Networks (IEEE World Congress on Computational Intelligence)</conf-name>. (<publisher-loc>Hong Kong, China</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>563</fpage>&#x2013;<lpage>568</lpage>.</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Extraction and classification of acoustic scattering from underwater target based on wigner-ville distribution</article-title>. <source>Appl. Acoustics</source> <volume>138</volume>, <fpage>52</fpage>&#x2013;<lpage>59</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.apacoust.2018.03.026</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wuth</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Stern</surname> <given-names>R. M.</given-names>
</name>
<name>
<surname>Yoma</surname> <given-names>N. B.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Non causal deep learning based dereverberation</article-title>. <source>arXiv preprint arXiv:2009.02832</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2009.02832</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiao</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Gerstoft</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Underwater acoustic target recognition using attention-based deep neural network</article-title>. <source>JASA Express Lett.</source> <volume>1</volume>, <fpage>106001</fpage>. doi: <pub-id pub-id-type="doi">10.1121/10.0006299</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Yin</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Sonar image target detection based on deep learning</article-title>,&#x201d; in <conf-name>2023 International Conference on Distributed Computing and Electrical Circuits and Electronics (ICDCECE)</conf-name>. (<publisher-loc>Ballar, India</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>9</lpage>.</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Lim</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>E.</given-names>
</name>
<name>
<surname>White</surname> <given-names>L. B.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Rational-orthogonal-wavelet-based active sonar pulse and detector design</article-title>. <source>IEEE J. Oceanic Eng.</source> <volume>44</volume>, <fpage>167</fpage>&#x2013;<lpage>178</lpage>. doi: <pub-id pub-id-type="doi">10.1109/JOE.2018.2801158</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A signal enhancement method based on the reverberation statistical information</article-title>. <source>EURASIP J. Adv. Signal Process.</source> <volume>2022</volume>, <fpage>1</fpage>&#x2013;<lpage>13</lpage>. doi: <pub-id pub-id-type="doi">10.1186/s13634-022-00902-2</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhan</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Spatial fusion gan for image synthesis</article-title>. <source>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</source> <volume>1</volume>, <fpage>3653</fpage>&#x2013;<lpage>3662</lpage>. doi: <pub-id pub-id-type="doi">10.1109/CVPR.2019.00377</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Wen</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Hou</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wen</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Digital automatic gain control design with large dynamic range in wireless communication receivers</article-title>,&#x201d; in <conf-name>2017 IEEE 17th International Conference on Communication Technology (ICCT)</conf-name>. (<publisher-loc>Chengdu, China</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1402</fpage>&#x2013;<lpage>1406</lpage>.</citation>
</ref>
<ref id="B53">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>X.-W.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>D.-D.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>J.-X.</given-names>
</name>
<name>
<surname>Zuo</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Weak moving target detection based on short-time fourier transform in sea clutter</article-title>,&#x201d; in <conf-name>2019 IEEE 4th International Conference on Signal and Image Processing (ICSIP)</conf-name>. (<publisher-loc>Wuxi, China</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>415</fpage>&#x2013;<lpage>419</lpage>.</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2023</year>a). <article-title>An omega-k algorithm for multireceiver synthetic aperture sonar</article-title>. <source>Electron. Lett.</source> <volume>59</volume>, <elocation-id>e12859</elocation-id>. doi: <pub-id pub-id-type="doi">10.1049/ell2.12859</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2023</year>b). <article-title>Multireceiver sas imagery with generalized pca</article-title>. <source>IEEE Geosci. Remote Sens. Lett</source>. <volume>20</volume>. doi: <pub-id pub-id-type="doi">10.1109/LGRS.2023.3286180</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Late reverberation suppression using recurrent neural networks with long short-term memory</article-title>,&#x201d; in <conf-name>2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</conf-name>. (<publisher-loc>Calgary, AB, Canada</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>5434</fpage>&#x2013;<lpage>5438</lpage>.</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Duan</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Robust shallow water reverberation reduction methods based on low-rank and sparsity decomposition</article-title>. <source>J. Acoustical Soc. America</source> <volume>151</volume>, <fpage>2826</fpage>&#x2013;<lpage>2842</lpage>. doi: <pub-id pub-id-type="doi">10.1121/10.0010353</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2008</year>). &#x201c;<article-title>Improved support vectors machine for signal detection in non-reverberation</article-title>,&#x201d; in <conf-name>2008 IEEE Ultrasonics Symposium</conf-name>. (<publisher-loc>Beijing, China</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1508</fpage>&#x2013;<lpage>1511</lpage>.</citation>
</ref>
</ref-list>
</back>
</article>
