<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mar. Sci.</journal-id>
<journal-title>Frontiers in Marine Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mar. Sci.</abbrev-journal-title>
<issn pub-type="epub">2296-7745</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmars.2024.1321549</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Marine Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A learnable full-frequency transformer dual generative adversarial network for underwater image enhancement</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Zheng</surname><given-names>Shijian</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2507366"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname><given-names>Rujing</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zheng</surname><given-names>Shitao</given-names>
</name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname><given-names>Liusan</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>*</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Liu</surname><given-names>Zhigui</given-names>
</name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>*</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Intelligent Agriculture Engineering Laboratory of Anhui Province, Institute of Intelligent Machines, Hefei Institutes of Physical Science, Chinese Academy of Science</institution>, <addr-line>Hefei</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>School of Information Engineering, Southwest University of Science and Technology</institution>, <addr-line>Mianyang</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>School of Information Engineering, University of Science and Technology of China</institution>, <addr-line>Hefei</addr-line>, <country>China</country></aff>
<aff id="aff4"><sup>4</sup><institution>School of Computer Science and Technology, Xiamen University</institution>, <addr-line>Xiamen</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Philipp Friedrich Fischer, Alfred Wegener Institute Helmholtz Centre for Polar and Marine Research (AWI), Germany</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Xuebo Zhang, Northwest Normal University, China</p>
<p>Ning Wang, Dalian Maritime University, China</p>
<p>Dehuan Zhang, Dalian Maritime University, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Liusan Wang, <email xlink:href="mailto:lswang@iim.ac.cn">lswang@iim.ac.cn</email>; Zhigui Liu, <email xlink:href="mailto:liuzhigui@swust.edu.cn">liuzhigui@swust.edu.cn</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>28</day>
<month>05</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>11</volume>
<elocation-id>1321549</elocation-id>
<history>
<date date-type="received">
<day>24</day>
<month>10</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>06</day>
<month>05</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Zheng, Wang, Zheng, Wang and Liu</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Zheng, Wang, Zheng, Wang and Liu</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Underwater applications present unique challenges such as color deviation, noise, and low contrast, which can degrade image quality. Addressing these issues, we propose a novel approach called the learnable full-frequency transformer dual generative adversarial network (LFT-DGAN). Our method comprises several key innovations. Firstly, we introduce a reversible convolution-based image decomposition technique. This method effectively separates underwater image information into low-, medium-, and high-frequency domains, enabling more thorough feature extraction. Secondly, we employ image channels and spatial similarity to construct a learnable full-frequency domain transformer. This transformer facilitates interaction between different branches of information, enhancing the overall image processing capabilities. Finally, we develop a robust dual-domain discriminator capable of learning spatial and frequency domain characteristics of underwater images. Extensive experimentation demonstrates the superiority of the LFT-DGAN method over state-of-the-art techniques across multiple underwater datasets. Our approach achieves significantly improved quality and evaluation metrics, showcasing its effectiveness in addressing the challenges posed by underwater imaging. The code can be found at <ext-link ext-link-type="uri" xlink:href="https://github.com/zhengshijian1993/LFT-DGAN">https://github.com/zhengshijian1993/LFT-DGAN</ext-link>.</p>
</abstract>
<kwd-group>
<kwd>dual generative adversarial network</kwd>
<kwd>reversible convolutional image decomposition</kwd>
<kwd>learnable full-frequency transformer</kwd>
<kwd>underwater image enhancement</kwd>
<kwd>frequency domain discriminator</kwd>
</kwd-group>
<counts>
<fig-count count="14"/>
<table-count count="5"/>
<equation-count count="20"/>
<ref-count count="48"/>
<page-count count="20"/>
<word-count count="8148"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Ocean Observation</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Underwater image enhancement is a complex and challenging endeavor aimed at enhancing the visual quality of underwater images to suit specific application scenarios. This technology finds extensive utility in domains like marine scientific research, underwater robotics, and underwater object recognition. Owing to the unique characteristics of the underwater environment, underwater images typically suffer from significant noise and color deviation, adding to the complexity of the enhancement process. Consequently, enhancing the quality of underwater images remains a daunting task, necessitating ongoing exploration and innovation to cater to the demands for high-quality underwater imagery across diverse application scenarios.</p>
<p>Traditional methods for underwater image enhancement often rely on manually designed features and shallow learning algorithms, which struggle to handle the inherent variability and complexity of underwater images. To tackle these challenges, recent research has shifted towards leveraging advanced deep learning techniques to enhance underwater image quality performance. Deep neural networks (<xref ref-type="bibr" rid="B44">Zhang et&#xa0;al., 2021a</xref>; <xref ref-type="bibr" rid="B18">Li et&#xa0;al., 2022</xref>) have exhibited remarkable capabilities in learning intricate patterns and representations directly from raw data, enabling them to adapt to the degraded and noisy nature of underwater images. One approach involves the direct development of complex deep network models, where researchers aim to create intricate deep structures to enhance underwater image quality. However, this approach often leads to issues of high model complexity. Another strategy explores leveraging characteristics from other domains to enhance images. This method allows for better processing of image details, structure, and frequency information, thereby improving clarity, contrast, and overall image quality. Although these methods have made strides in image enhancement, they sometimes overlook local feature differences. As illustrated in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>, frequency domain techniques are employed to investigate the issue of perceptual quality distortion in degraded underwater images alongside their corresponding authentic reference images. The task of image enhancement is carried out by computing the one-dimensional power spectrum information (indicative of image information quantity) between the images. This involves an analysis of frequency domain variability across the images. Upon closer examination of different stages depicted in the figure, it is evident that there exists a discernible variance in the frequency domain power spectrum values between the original underwater image and the reference image at each stage, as presented across the entire frequency spectrum. Hence, employing a frequency domain decoupling method to separately learn and approximate the authentic labels proves to be a highly rational approach. By addressing these challenges, researchers can pave the way for more effective underwater image enhancement techniques, meeting the demands of various application scenarios effectively.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Frequency domain difference analysis between the original image and the ground-truth image, with the amount of image information represented as a 1D power spectral density image. The calculation method of 1D power spectrum image can be found in <xref ref-type="app" rid="app1_1"><bold>Appendix 1.1</bold></xref>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1321549-g001.tif"/>
</fig>
<p>To cope with the above problems, we propose a novel underwater image enhancement method using the learnable full-frequency transformer dual generative adversarial network (LFT-DGAN). Specifically, we first design an image frequency domain decomposition without natural information loss based on a reversible convolutional neural network structure. Reversible convolutional networks allow us to apply the advantageous high-frequency texture enhancement approach explicitly and separately to the high-frequency branch, which greatly alleviates the problem of frequency conflicts in the optimization objective. In addition, an interactive transformer structure has been designed to ensure improved information consistency and interactivity across multiple frequency bands. Finally, we develop an efficient and robust dual-domain discriminator to further ensure high-quality underwater image generation. The main innovations of this article are as follows:</p>
<list list-type="order">
<list-item>
<p>We present a novel approach to image frequency domain decomposition, implemented using reversible neural networks. This marks the first instance of introducing reversible neural networks to the domain of underwater image frequency domain decomposition.</p>
</list-item>
<list-item>
<p>We introduce an interactive transformer structure to ensure improved information consistency and interactivity across multiple frequency bands, further enhancing the quality of enhanced underwater images.</p>
</list-item>
<list-item>
<p>We have developed an efficient and robust dual-domain discriminator to further ensure high-quality underwater image generation. The proposed discriminator effectively distinguishes between real and generated images in both spatial and frequency domains, thereby aiding in achieving high-quality image enhancement results.</p>
</list-item>
<list-item>
<p>In comparison to current state-of-the-art methods, our approach yields satisfactory image effects across multiple underwater datasets. Additionally, we have performed several challenging applications, such as underwater image structure analysis, fog removal, and rain removal, demonstrating the superiority of our method.</p>
</list-item>
</list>
</sec>
<sec id="s2">
<label>2</label>
<title>Related work</title>
<sec id="s2_1">
<label>2.1</label>
<title>Underwater image enhancement methods</title>
<p>The development of underwater image enhancement methods has evolved from the traditional methods at the beginning to deep learning methods, which have received much attention from scholars. Traditional underwater image enhancement methods are mainly based on the redistribution of pixel intensities to improve image contrast and color without considering the specific characteristics of underwater images. It mainly includes spatial domain and transform domain image enhancement; spatial domain image enhancement is primarily based on gray mapping theory, and intensity histogram redistribution is achieved by expanding the gray level, which is done in different color models, such as the retinex theory (<xref ref-type="bibr" rid="B43">Zhang et&#xa0;al., 2022a</xref>) and histogram methods (<xref ref-type="bibr" rid="B9">Ghani and Isa, 2017</xref>). Transform domain image enhancement generally transforms the image into the frequency domain, enhancing the high-frequency components (target edges) and eliminating the low-frequency components (background) to improve the quality of underwater images, such as Laplacian pyramid (<xref ref-type="bibr" rid="B48">Zhuang et&#xa0;al., 2022</xref>) and wavelet transform (<xref ref-type="bibr" rid="B25">Ma and Oh, 2022</xref>). These traditional underwater image enhancement methods have the advantages of high flexibility and low computational requirements; however, the traditional methods require human intervention, have limited enhancement effects, and lack generalization (they cannot be applied to multi-scene underwater scenes).</p>
<p>With the rapid development of deep learning, many new methods for underwater image enhancement have made significant progress and overcome some limitations of traditional methods. For example, <xref ref-type="bibr" rid="B33">Wang et&#xa0;al. (2021)</xref> effectively integrate RGB color space and HSV color space into a single CNN model and propose the uiec2-net end-to-end trainable network, which achieves better results in underwater images, but the authors are limited to the spatial domain and do not explore whether the transform domain could also effectively improve underwater image enhancement. To validate this idea, <xref ref-type="bibr" rid="B36">Wang et&#xa0;al. (2022b)</xref> use an asymptotic frequency domain module and a convolutional bootstrap module to create an underwater image correction model. However, the aforementioned deep learning methods often require a large amount of running memory and computational resources, which is not conducive to real-time application on underwater devices. In order to reduce the number of parameters and improve computational efficiency, <xref ref-type="bibr" rid="B47">Zheng et&#xa0;al. (2023)</xref> propose a lightweight multi-scale underwater enhancement model. They achieve efficient underwater image enhancement by using a layer-wise attention mechanism and a compact network structure. In addition to these methods, scholars have proposed GAN algorithms to improve the quality of image perception for underwater image enhancement. <xref ref-type="bibr" rid="B31">Wang et&#xa0;al. (2023a)</xref> enhance underwater images by integrating a cascaded dense channel attention module and a position attention module into the generator in the GAN framework. <xref ref-type="bibr" rid="B19">Li et&#xa0;al. (2018b)</xref> use a circular structure to learn the mapping function between input and target and propose a weakly supervised method for color correction of underwater images. Although the above methods achieve promising results, their performance is dependent on the network architecture design and training data, and deep learning architectures for underwater images have some potential drawbacks, such as limited applicability of the trained model, blurring of some underwater image features, and over-emphasis on human visual quality.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Image decomposition methods</title>
<p>Underwater images mainly suffer from color distortion caused by light absorption and blurring caused by light scattering (<xref ref-type="bibr" rid="B32">Wang et&#xa0;al., 2023b</xref>). In terms of spatial domain, image enhancement is generally performed as a whole, ignoring the coherence between multiple degradations. Image decomposition methods can help represent an image as a collection of frequency components, such as wavelet transform and sparse representation. By processing these frequency components, we can gain a better understanding of the structure and features of the image, and isolate noise and blur components. This decomposition method can assist with enhancing underwater images by addressing various degradation issues. <xref ref-type="bibr" rid="B16">Kang et&#xa0;al. (2022)</xref> decompose underwater images into three conceptually independent components of average intensity, contrast and structure for processing, and fusion to produce enhanced underwater images. <xref ref-type="bibr" rid="B37">Wu et&#xa0;al. (2021)</xref> decompose the underwater image into high- and low-frequency images, then process the high-frequency part using a deep learning method and the low-frequency part using a physical modeling method, and finally obtained the enhanced image.</p>
<p>Currently, the commonly used frequency domain decomposition methods include discrete Fourier transform (DFT), discrete wavelet transform (DWT), and discrete cosine transform (DCT). However, these methods are built based on mathematical approaches, which can cause different degrees of data loss phenomena and are not suitable for task-specific studies. For this reason, influenced by octave convolution (<xref ref-type="bibr" rid="B2">Chen et&#xa0;al., 2019</xref>), <xref ref-type="bibr" rid="B21">Li et&#xa0;al. (2021)</xref> use downsampling convolution to decompose mixed feature representations of images, and although they are able to decompose different frequency domain features, there is still unknown information loss due to the random loss of information features from convolution. In a similar operation, <xref ref-type="bibr" rid="B13">Jiang et&#xa0;al. (2023)</xref> use dilated convolution to decompose a mixed feature representation of the image, which also suffer from random loss of information (which could not be quantified). The above studies show that the convolution approach is effective in decomposing image information, so what is the best way to do it without information loss? The reversible convolution approach has the advantage of low memory and no loss of information. For this work, we can refer to the cornerstone NICE (<xref ref-type="bibr" rid="B4">Dinh et&#xa0;al., 2014</xref>), which describes the entire framework and coupling layers in detail. Although this method is widely used in the areas of image scaling (<xref ref-type="bibr" rid="B39">Xiao et&#xa0;al., 2020</xref>), denoising (<xref ref-type="bibr" rid="B24">Liu et&#xa0;al., 2021</xref>), and hiding information (<xref ref-type="bibr" rid="B10">Guan et&#xa0;al., 2022</xref>), it has not yet been explored in the area of underwater image decomposition. For this reason, we use a reversible convolution approach to decompose image information, which allows for both no loss of image information and a complete decomposition of the image hybrid feature representation.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Proposed method</title>
<p>The LFT-DGAN framework consists of a generator network and two discriminator networks. The generator takes underwater images as input and produces enhanced images. The two discriminator networks, one operating at the frequency level and the other at the image level, provide feedback to improve the quality of the generated images. The training process involves an adversarial loss, perceptual loss, <italic>l</italic><sub>1</sub> loss, and gradient difference loss (GDL) to optimize the generator and discriminator networks, as shown in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>The overall network structure of LFT-DGAN, where <italic>D<sub>s</sub>
</italic> represents the image domain discriminator and <italic>D<sub>f</sub>
</italic> represents the frequency domain discriminator.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1321549-g002.tif"/>
</fig>
<sec id="s3_1">
<label>3.1</label>
<title>Generator network</title>
<p>The generator overall structure is shown in <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>. The network includes a frequency domain decomposition module and a frequency domain enhancement module. Specifically, given the original underwater image <inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:mtext>I</mml:mtext>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msup>
<mml:mtext>R</mml:mtext>
<mml:mrow>
<mml:mtext>C</mml:mtext>
<mml:mo>&#xd7;</mml:mo>
<mml:mtext>H</mml:mtext>
<mml:mo>&#xd7;</mml:mo>
<mml:mtext>W</mml:mtext>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, our method first applies the frequency domain decomposition module to project the image into the low-frequency <italic>L</italic><sub>0</sub>, mid-frequency <italic>M</italic><sub>0</sub>, and high-frequency <italic>H</italic><sub>0</sub> components&#x2019; (i.e., full-frequency) feature space, and then the low-frequency feature <italic>L</italic><sub>0</sub> is used to extract the low-frequency effective feature <italic>L</italic><sub>1</sub> through a multi-level residual network (ResBlk), while the mid-frequency feature <italic>M</italic><sub>0</sub> and the feature <italic>L</italic><sub>1</sub> go through an interactive transformer structure (LFT-ResBlk) to obtain the mid-frequency effective feature <italic>M</italic><sub>1</sub>. Subsequently, the effective mid-frequency feature <italic>M</italic><sub>1</sub> and the high-frequency feature <italic>H</italic><sub>0</sub> through the LFT-ResBlk obtain the high-frequency effective feature <italic>H</italic><sub>1</sub>. The effective high-frequency (<italic>H</italic><sub>1</sub>), mid-frequency (<italic>M</italic><sub>1</sub>), and low-frequency (<italic>L</italic><sub>1</sub>) features are further propagated into densely connected neural network modules to construct clear underwater images <inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:mtext>O</mml:mtext>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msup>
<mml:mtext>R</mml:mtext>
<mml:mrow>
<mml:mtext>C</mml:mtext>
<mml:mo>&#xd7;</mml:mo>
<mml:mtext>H</mml:mtext>
<mml:mo>&#xd7;</mml:mo>
<mml:mtext>W</mml:mtext>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. The generator network process can be described as follows (<xref ref-type="disp-formula" rid="eq1">Equations 1</xref>&#x2013;<xref ref-type="disp-formula" rid="eq5">5</xref>):</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>The generator framework of our proposed LFT-DGAN method. For detailed explanations of the Decomposition and LFT modules in the figure, please refer to Sections 3.1.1 and 3.1.2.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1321549-g003.tif"/>
</fig>
<disp-formula id="eq1">
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mo>&#x222b;</mml:mo>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>I</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq2">
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mo>&#x222b;</mml:mo>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq3">
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mo>&#x222b;</mml:mo>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq4">
<label>(4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mo>&#x222b;</mml:mo>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq5">
<label>(5)</label>
<mml:math display="block" id="M5">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mo>&#x222b;</mml:mo>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In the formula, <inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:msub>
<mml:mo>&#x222b;</mml:mo>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the simplified forms of the frequency decomposition. <inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:msub>
<mml:mo>&#x222b;</mml:mo>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> signifies a multi-level residual network (ResBlk). <inline-formula>
<mml:math display="inline" id="im5">
<mml:mrow>
<mml:msub>
<mml:mo>&#x222b;</mml:mo>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denotes an interactive transformer structure (LFT-ResBlk). <inline-formula>
<mml:math display="inline" id="im6">
<mml:mrow>
<mml:msub>
<mml:mo>&#x222b;</mml:mo>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> refers to a densely connected fusion network.</p>
<sec id="s3_1_1">
<label>3.1.1</label>
<title>Frequency decomposition module</title>
<p>In traditional signal processing methods, image transformation methods include fast Fourier transform (FFT), DCT, and DWT. These methods are deterministic mathematical operations and task-independent, and inevitably discard some key information for the recovery task. Both DFT and DCT characterize the entire spatial frequency domain of an image, which is not conducive to local information. DWT can represent the entire spatial frequency domain of an image and local spatial frequency domain features. In addition, scholars proposed dilated convolution filtering transform (DCFT) (<xref ref-type="bibr" rid="B13">Jiang et&#xa0;al., 2023</xref>) and up-/downsampling sampling filtering transform (USFT) (<xref ref-type="bibr" rid="B21">Li et&#xa0;al., 2021</xref>), both of which are difficult to measure quantitatively as the information through the convolution or up-/downsampling sampling will randomly lose high-frequency signals. <xref ref-type="fig" rid="f4"><bold>Figures&#xa0;4A&#x2013;C</bold></xref> show that different decomposition methods can separate low-, medium-, and high-frequency image differences to some extent, with the DWT being able to obtain better separation, while the convolution and up-/downsampling sampling filter transforms designed by the researcher are able to separate image differences in high-frequency images, and our proposed decomposition method can obtain better separation in each frequency domain. In addition, <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4D</bold></xref> shows the low redundancy (correlation) of decomposed features by our proposed decomposition method, indicating that the image information is relatively thoroughly decomposed.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Analysis of the frequency domain difference between the original images (OI) and ground-truth images (GT) for different frequency domain decomposition methods. <bold>(A)</bold> Difference in the amount of low-frequency image information (1D power spectrum) between GT and OI. <bold>(B)</bold> Difference in the amount of information in the mid-frequency image between GT and OI. <bold>(C)</bold> Difference in the amount of high-frequency image information between GT and OI. <bold>(D)</bold> Correlation between low-, mid-, and high-frequency images for different frequency domain decomposition methods.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1321549-g004.tif"/>
</fig>
<p>To solve the problems of information integrity and flexibility of decomposed images, we propose an image decomposition method based on reversible convolution. If we make the potential representation of the image after the reversible network transformation separable, the different frequency signals will be encoded in different channels. As shown in <xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>, the input image goes through a reversible convolution module and produces two feature parts, <italic>Z</italic><sub>1</sub> and <italic>Z</italic><sub>2</sub>. The <italic>Z</italic><sub>1</sub> feature serves as the style extraction template for high-frequency features of the image. The <italic>Z</italic><sub>2</sub> feature continues to pass through another reversible convolution module, resulting in <italic>Z</italic><sub>3</sub> and <italic>Z</italic><sub>4</sub>. Similarly, the <italic>Z</italic><sub>4</sub> feature acts as the style extraction template for medium-frequency features of the image. The <italic>Z</italic><sub>3</sub> feature further goes through a reversible block to obtain the <italic>Z</italic><sub>5</sub> feature, which is used as the style extraction template for low-frequency features of the image. Finally, based on the distribution of these new style templates, the original image is redistributed. This process decomposes the original image into high-, medium-, and low-frequency images, with each frequency part influenced by the corresponding style extraction template. Note that the reversible convolution module (<xref ref-type="bibr" rid="B17">Kingma and Dhariwal, 2018</xref>) uses the regular stream mode. The Frequency decomposition process can be described as follows (<xref ref-type="disp-formula" rid="eq6">Equations 6</xref>&#x2013;<xref ref-type="disp-formula" rid="eq9">9</xref>):</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Image decomposition method based on reversible convolution.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1321549-g005.tif"/>
</fig>
<disp-formula id="eq6">
<label>(6)</label>
<mml:math display="block" id="M6">
<mml:mrow>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>R</mml:mi>
<mml:mi>B</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>I</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq7">
<label>(7)</label>
<mml:math display="block" id="M7">
<mml:mrow>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mn>4</mml:mn>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>R</mml:mi>
<mml:mi>B</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq8">
<label>(8)</label>
<mml:math display="block" id="M8">
<mml:mrow>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mn>5</mml:mn>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>R</mml:mi>
<mml:mi>B</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq9">
<label>(9)</label>
<mml:math display="block" id="M9">
<mml:mrow>
<mml:mtext>low</mml:mtext>
<mml:mo>,</mml:mo>
<mml:mtext>mid</mml:mtext>
<mml:mo>,</mml:mo>
<mml:mtext>high</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#x2609;</mml:mo>
<mml:mi>I</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mn>4</mml:mn>
</mml:msub>
<mml:mo>&#x2609;</mml:mo>
<mml:mi>I</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mn>5</mml:mn>
</mml:msub>
<mml:mo>&#x2609;</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>RB</italic> represents the reversible block. &#x2609; indicates the re-editing of images according to the new distribution.</p>
</sec>
<sec id="s3_1_2">
<label>3.1.2</label>
<title>Learnable full-frequency adaptive transformer</title>
<p>There are two key problems with the frequency domain hierarchical feature processing approach: (1) The amount of information in the low-frequency, mid-frequency, and high-frequency image features is significantly different, and a simple connection will greatly suppress the high-frequency information.</p>
<p>(2) Different branch enhancement modules will generate what they consider reasonable for their own bands (independent) and may not be consistent with other branch band enhancement content. To address these issues, we have designed a novel custom transformer structure technique that results in more realistic restoration results, as shown in <xref ref-type="fig" rid="f6"><bold>Figure&#xa0;6</bold></xref>.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Learnable full-frequency adaptive transformer model.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1321549-g006.tif"/>
</fig>
<p>For the problem of independence of the augmented content of different branches, we calculate the similarity between the <italic>L</italic><sub>1</sub> features and the <italic>M</italic><sub>0</sub> features to align the augmented content of each branch. Specifically, we first flatten one patch of <italic>L</italic><sub>1</sub> &#x2208; 1 &#xd7; 1 &#xd7; <italic>K</italic><sup>2</sup> into a <italic>K</italic> &#xd7; <italic>K</italic> &#xd7; 1 feature map, then take out the <italic>K</italic> &#xd7; <italic>K</italic> &#xd7; <italic>C</italic> size <italic>M</italic><sub>0</sub> features centered on the <italic>L</italic><sub>1</sub> feature size, and finally multiply the two features and scale them inward as size features. Different patches are processed in turn to obtain each channel and spatial similarity content of the two branches.</p>
<p>For the problem of different sparsity of frequency information, we adopt a half instance normalization (HIN) (<xref ref-type="bibr" rid="B3">Chen et&#xa0;al., 2021</xref>) to retain structural information. The normalization method is first applied to the pre-modulation <italic>M</italic><sub>0</sub> features and post-modulation <italic>F</italic><sub>0</sub> features, then the <italic>&#x3b1;</italic> and <italic>&#x3b2;</italic> modulation parameters are obtained by two 3 &#xd7; 3 convolutions of <italic>F</italic><sub>0</sub> features, and finally the <italic>F</italic><sub>1</sub> features are modulated as follows (<xref ref-type="disp-formula" rid="eq10">Equation 10</xref>):</p>
<disp-formula id="eq10">
<label>(10)</label>
<mml:math display="block" id="M10">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mfrac>
<mml:mo>+</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>H</italic> is the post-processing image features, and <italic>&#xb5;</italic> and <italic>&#x3b4;</italic> are means and variances of <italic>F</italic><sub>1</sub>, respectively.</p>
</sec>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Dual discriminator network</title>
<p>Most of the current discriminators of GAN-based underwater image enhancement methods mainly focus on image domain discrimination. However, the difference of image features in the frequency domain is often ignored. To solve this problem, we introduce a dual discriminator to more comprehensively discriminate the authenticity of images.</p>
<sec id="s3_2_1">
<label>3.2.1</label>
<title>Discriminator in the image domain</title>
<p>The discriminator requires greater discriminative power for complex training outputs. We have replaced the PatchGAN-style discriminator with a U-Net. The U-Net can obtain more detailed features, but increases the instability of training, and we introduce spectral normalization technology to solve this problem, as shown in <xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7</bold></xref>. With these adjustments, we can achieve better training of the network.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Architecture of the U-Net discriminator with spectral normalization.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1321549-g007.tif"/>
</fig>
</sec>
<sec id="s3_2_2">
<label>3.2.2</label>
<title>Discriminator in the frequency domain</title>
<p>Recent research has shown that there is a gap between the real and generated images in the frequency domain, leading to artifacts in the spatial domain. Based on this observation, we propose the use of a frequency domain discriminator to improve image quality. The ideas in this paper are mainly influenced by the conclusion that a one-dimensional representation of the Fourier power spectrum is sufficient to highlight the differences in the spectra, as proposed by <xref ref-type="bibr" rid="B15">Jung and Keuper (2021)</xref>. We transform the results of the Fourier transform into polar coordinates and calculate the azimuthal integral.</p>
<p>We propose a spectral discriminator using a full 2D power spectrum and a convolution structure, as in the original discriminator. Firstly, the proposed spectral discriminator takes as input a real or generated image and then calculates the magnitude of its Fourier transform through the spectral transform layer, which converts a 2D image into a 2D array of its spatial frequencies. Next, we calculate the magnitude of the 2D spectrum frequencies and integrate the resulting 2D array for each radius to obtain a one-dimensional profile of the power spectrum. Finally, in order to understand the differences in the higher-frequency bands, we feed the 1D spectral vectors into a high-pass filter and then apply the results to a spectral discriminator. The specific formula is described as follows (<xref ref-type="disp-formula" rid="eq11">Equations 11</xref>&#x2013;<xref ref-type="disp-formula" rid="eq13">13</xref>):</p>
<disp-formula id="eq11">
<label>(11)</label>
<mml:math display="block" id="M11">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi>&#x3c0;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mfrac>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mi>M</mml:mi>
</mml:mfrac>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi>&#x3c0;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mfrac>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:mfrac>
</mml:mrow>
</mml:msup>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>*</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>I</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq12">
<label>(12)</label>
<mml:math display="block" id="M12">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:munderover>
<mml:mo>&#x222b;</mml:mo>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
<mml:mn>0</mml:mn>
</mml:munderover>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq13">
<label>(13)</label>
<mml:math display="block" id="M13">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
<mml:mi>p</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>&gt;</mml:mo>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>&#x3c4;</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mtext>otherwise</mml:mtext>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>F</italic>(<italic>k, l</italic>) denotes calculation of the DFT on two-dimensional image data, in which <italic>k</italic> &#x2208; [0<italic>, M</italic> &#x2212; 1] and <italic>l</italic> &#x2208; [0<italic>, N</italic> &#x2212; 1]. <italic>AI</italic>(<italic>r</italic>) means the average intensity of the image signal about radial distance r. <inline-formula>
<mml:math display="inline" id="im7">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the high-pass filter. <inline-formula>
<mml:math display="inline" id="im8">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>&#x3c4;</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is a threshold radius for high-pass filtering.</p>
</sec>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Loss function</title>
<p>L1 loss (<italic>l</italic><sub>1</sub>): We use L1 loss on RGB pixels between the predicted <italic>J<sub>F</sub>
</italic> and ground-truth images <italic>J<sub>T</sub>
</italic>. The formula is as follows (<xref ref-type="disp-formula" rid="eq14">Equation 14</xref>):</p>
<disp-formula id="eq14">
<label>(14)</label>
<mml:math display="block" id="M14">
<mml:mrow>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2016;</mml:mo>
<mml:msub>
<mml:mi>J</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>J</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mo>&#x2016;</mml:mo>
</mml:mrow>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Perceptual loss <inline-formula>
<mml:math display="inline" id="im9">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>: We use perceptual loss (<xref ref-type="bibr" rid="B14">Johnson et&#xa0;al., 2016</xref>) to provide additional supervision in the high-level feature space. The formula is as follows (<xref ref-type="disp-formula" rid="eq15">Equation 15</xref>):</p>
<disp-formula id="eq15">
<label>(15)</label>
<mml:math display="block" id="M15">
<mml:mrow>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo>&#x2016;</mml:mo>
<mml:mrow>
<mml:mo>&#xd8;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>J</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#xd8;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>J</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>&#x2016;</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im10">
<mml:mrow>
<mml:mo>&#xd8;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the feature maps obtained by the layers within the VGG16 network.</p>
<p>WGAN loss <inline-formula>
<mml:math display="inline" id="im11">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mi>G</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>G</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>: the WGAN-GP loss is adopted and modified into conditional setting as the adversarial loss. The formula is as follows (<xref ref-type="disp-formula" rid="eq16">Equation 16</xref>):</p>
<disp-formula id="eq16">
<label>(16)</label>
<mml:math display="block" id="M16">
<mml:mrow>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mi>G</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>G</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>J</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>J</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>J</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>J</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>J</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>J</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>J</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:msub>
<mml:mi>J</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">^</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:msub>
<mml:mi>J</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">^</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>J<sub>F</sub>
</italic> and <italic>J<sub>T</sub>
</italic> are the original raw image and the ground-truth underwater image, respectively, <inline-formula>
<mml:math display="inline" id="im12">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:msub>
<mml:mi>J</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>^</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> are the samples along the lines between the generated images <italic>G</italic>(<italic>J<sub>F</sub>
</italic>) and <italic>J<sub>T</sub>
</italic>, and <italic>&#x3bb;</italic> stands for the weight factor.</p>
<p>GDL loss (<italic>l<sub>gdl</sub>
</italic>): We use the GDL function (<xref ref-type="bibr" rid="B6">Fabbri et&#xa0;al., 2018</xref>) by directly improving the generator of these predictions by penalizing the image gradient predictions. The formula is as follows (<xref ref-type="disp-formula" rid="eq17">Equation 17</xref>):</p>
<disp-formula id="eq17">
<label>(17)</label>
<mml:math display="block" id="M17">
<mml:mrow>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mover>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>J</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>J</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>J</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>F</mml:mi>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>J</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>F</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>J</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>J</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>J</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>F</mml:mi>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>J</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>F</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>J<sup>T</sup>
</italic> means a ground-truth image. <italic>J<sup>F</sup>
</italic> stands for predicted image.</p>
<p>The final combination loss is a linear combination of L1 loss, perceptual loss, WGAN loss, and GDL loss (<xref ref-type="disp-formula" rid="eq18">Equation 18</xref>):</p>
<disp-formula id="eq18">
<label>(18)</label>
<mml:math display="block" id="M18">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mtext>WGAN</mml:mtext>
<mml:mo>&#x2212;</mml:mo>
<mml:mtext>GP&#xa0;</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mtext>perceptual&#xa0;</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>4</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mtext>gdl&#xa0;</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>&#x3b1;</italic><sub>1</sub>, <italic>&#x3b1;</italic><sub>2</sub>, <italic>&#x3b1;</italic><sub>3</sub>, and <italic>&#x3b1;</italic><sub>4</sub> are determined through extensive experimental exploration, and we set <italic>&#x3b1;</italic><sub>1</sub> = 1, <italic>&#x3b1;</italic><sub>2</sub> = 10, <italic>&#x3b1;</italic><sub>3</sub> = 2, and <italic>&#x3b1;</italic><sub>4</sub> = 10.</p>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Experimental results</title>
<sec id="s4_1">
<label>4.1</label>
<title>Baseline methods</title>
<p>To demonstrate the superiority of our proposed method, we compare 10 advanced underwater image enhancement methods. In more detail, four representative traditional methods are selected for comparison, namely, ULAP (<xref ref-type="bibr" rid="B30">Song et&#xa0;al., 2018</xref>), UDCP (<xref ref-type="bibr" rid="B5">Drews et&#xa0;al., 2013</xref>), HLRP (<xref ref-type="bibr" rid="B48">Zhuang et&#xa0;al., 2022</xref>), and MLLE (<xref ref-type="bibr" rid="B46">Zhang et&#xa0;al., 2022b</xref>). Our method is also compared with six deep learning-based methods, namely, USLN (<xref ref-type="bibr" rid="B38">Xiao et&#xa0;al., 2022</xref>), URSCT (<xref ref-type="bibr" rid="B27">Ren et&#xa0;al., 2022</xref>), UDnet (<xref ref-type="bibr" rid="B29">Saleh et&#xa0;al., 2022</xref>), PUIE (<xref ref-type="bibr" rid="B8">Fu et&#xa0;al., 2022b</xref>), CWR (<xref ref-type="bibr" rid="B11">Han et&#xa0;al., 2021</xref>), and STSC (<xref ref-type="bibr" rid="B35">Wang et&#xa0;al., 2022a</xref>). All our experiments are conducted on an NVIDIA Titan RTX GPU (24 GB), 64-GB memory device, and the deep models use the Adam optimizer. The initial learning rate is 1e-2.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Dataset and evaluation metrics</title>
<p>To train our network, we utilize a dataset comprising 800 labeled images. These images were randomly drawn from the UIEB dataset (<xref ref-type="bibr" rid="B20">Li et&#xa0;al., 2019</xref>), which encompasses 890 pairs of underwater images captured across various scenes, exhibiting diverse quality and content. The reference image was selected from among 12 enhanced results by a panel of 50 volunteers.</p>
<p>For assessing our network&#x2019;s performance, we employ three widely recognized benchmark datasets: UIEB, UCCS (<xref ref-type="bibr" rid="B23">Liu et&#xa0;al., 2020</xref>), and UIQS (<xref ref-type="bibr" rid="B23">Liu et&#xa0;al., 2020</xref>). Among these, UCCS and UIQS lack reference images, while the UIEB dataset includes them. The UCCS dataset is used primarily for evaluating the efficacy of corrective color models and comprises three subsets, each containing 100 images exhibiting blue, green, and cyan tones, respectively. Meanwhile, the UIQS dataset is chiefly utilized to gauge the correction capabilities of models aimed at enhancing image visibility, featuring a subset with five distinct levels of image quality as measured by UCIQE.</p>
<p>We have used seven commonly used image quality evaluation metrics, namely, peak signal-to-noise ratio (PSNR), structural similarity (SSIM), underwater image quality metric (UIQM) (<xref ref-type="bibr" rid="B26">Panetta et&#xa0;al., 2015</xref>), underwater color image quality evaluation (UCIQE) (<xref ref-type="bibr" rid="B40">Yang and Sowmya, 2015</xref>), twice mixing (TM) (<xref ref-type="bibr" rid="B7">Fu et&#xa0;al., 2022a</xref>), a combination index of colorfulness, contrast, and fog density (CCF) (<xref ref-type="bibr" rid="B34">Wang et&#xa0;al., 2018</xref>), and entropy. The UIQM is an image quality evaluation index that comprehensively considers factors such as color, contrast, and clarity of underwater images. The UCIQE is a perceptual model based on color images that takes into account color distortion, contrast changes, and other factors to evaluate the quality of underwater color images by simulating the working mode of the human visual system. The TM evaluates image quality by using two blending ratios in the generation of training data and in the supervised training process. The CCF quantify color loss, blurring, and foggy, respectively. The entropy indicates the entropy value of the image.</p>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Color restoration on the UCCS dataset</title>
<list list-type="order">
<list-item>
<p>Qualitative comparisons: We evaluate the color correction capability of the algorithm on the UCCS dataset with three tones. <xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8</bold></xref> shows the results of the different methods of processing; here, we focus on the ability of the algorithm to correct the colors. The UDCP method enhances the results with darker colors, where the results for the bluish-green dataset are relatively more realistic. The ULAP method is barely able to process the underwater images and only weakly recovers the results for the bluish dataset. The HLRP method is able to handle underwater images, but still suffers from color bias, and the MLLE method is the best of the traditional image enhancement methods, with some local over-enhancement, such as excessive brightness of stones in the image. The CWR method is a good solution to the problem of underwater color bias, especially in the greenish dataset, but there is an uneven color distribution in the bluish-green dataset. The PUIE method is able to obtain more balanced colors on the bluish-green dataset, but cannot handle the greenish dataset well. The STSC and URSCT methods give greenish and blurred results on both blue-green and greenish datasets. The UDnet method gives bluish results on the UCCS dataset with three tones. The images enhanced by the USLN method appear to be over-processed. Our proposed method gives more balanced and reasonable results in terms of color.</p>
</list-item>
<list-item>
<p>Quantitative comparisons: We use five evaluation indicators to further demonstrate the superiority of our approach to color correction. From the metric measures in <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>, we can draw the following conclusions: (1) It shows that the traditional method enhancement results are not necessarily worse than the deep learning method enhancement processing; for example, the MLLE and HLRP methods show better scores in some metrics. (2) Based on the scores of the UCCS dataset processed by the various methods, the algorithm enhancement results show decreasing scores from the blue data subset to the green data subset. It indicates that the algorithm is able to handle the underwater blue bias problem excellently, but is slightly less capable of solving the green bias problem. (3) The enhanced results of our proposed method are able to obtain essentially optimal scores, and it can be observed that it is difficult to have a method that obtains optimal results on every indicator, probably due to the fact that the various evaluation indicators are biased towards one factor of the image.</p>
</list-item>
</list>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Visualization of the comparative results of the UCCS dataset. The results produced using the following methods: The three inputs are from the blue (first row), blue-green (second row), and green (third row) subsets of the <bold>(A)</bold> UCCS, <bold>(B)</bold> UDCP, <bold>(C)</bold> ULAP, <bold>(D)</bold> MLLE, <bold>(E)</bold> HLRP, <bold>(F)</bold> CWR, <bold>(G)</bold> PUIE, <bold>(H)</bold> STSC, <bold>(I)</bold> UDnet, <bold>(J)</bold> URSCT, <bold>(K)</bold> USLN, and <bold>(L)</bold> OURS data.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1321549-g008.tif"/>
</fig>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Experimental results of different approaches to the UCCS dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Dataset</th>
<th valign="top" align="center">Method</th>
<th valign="top" align="center">UDCP</th>
<th valign="top" align="center">ULAP</th>
<th valign="top" align="center">HLRP</th>
<th valign="top" align="center">MLLE</th>
<th valign="top" align="center">USLN</th>
<th valign="top" align="center">UDnet</th>
<th valign="top" align="center">URSCT</th>
<th valign="top" align="center">PUIE</th>
<th valign="top" align="center">CWR</th>
<th valign="top" align="center">STSC</th>
<th valign="top" align="center">OURS</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" rowspan="5" align="center">Blue</td>
<td valign="top" align="center">UQIM</td>
<td valign="top" align="center">0.2805</td>
<td valign="top" align="center">0.5986</td>
<td valign="top" align="center">0.7672</td>
<td valign="top" align="center">0.8762</td>
<td valign="top" align="center">0.6444</td>
<td valign="top" align="center">0.4774</td>
<td valign="top" align="center">0.6569</td>
<td valign="top" align="center">0.7011</td>
<td valign="top" align="center">0.8002</td>
<td valign="top" align="center">0.7170</td>
<td valign="top" align="center">0.7930</td>
</tr>
<tr>
<td valign="top" align="center">UCIQE</td>
<td valign="top" align="center">1.4839</td>
<td valign="top" align="center">2.6869</td>
<td valign="top" align="center">3.4340</td>
<td valign="top" align="center">2.2574</td>
<td valign="top" align="center">2.5893</td>
<td valign="top" align="center">2.0489</td>
<td valign="top" align="center">2.8328</td>
<td valign="top" align="center">2.7054</td>
<td valign="top" align="center">3.4420</td>
<td valign="top" align="center">2.8404</td>
<td valign="top" align="center">3.3026</td>
</tr>
<tr>
<td valign="top" align="center">CCF</td>
<td valign="top" align="center">15.8399</td>
<td valign="top" align="center">18.2907</td>
<td valign="top" align="center">27.0656</td>
<td valign="top" align="center">23.5719</td>
<td valign="top" align="center">25.2206</td>
<td valign="top" align="center">15.1000</td>
<td valign="top" align="center">24.7161</td>
<td valign="top" align="center">24.1712</td>
<td valign="top" align="center">22.7284</td>
<td valign="top" align="center">22.8724</td>
<td valign="top" align="center">30.6200</td>
</tr>
<tr>
<td valign="top" align="center">Entropy</td>
<td valign="top" align="center">6.9562</td>
<td valign="top" align="center">7.5203</td>
<td valign="top" align="center">7.4128</td>
<td valign="top" align="center">7.7734</td>
<td valign="top" align="center">7.5826</td>
<td valign="top" align="center">7.2552</td>
<td valign="top" align="center">7.5959</td>
<td valign="top" align="center">7.5974</td>
<td valign="top" align="center">7.7386</td>
<td valign="top" align="center">7.6037</td>
<td valign="top" align="center">7.7588</td>
</tr>
<tr>
<td valign="top" align="center">TM</td>
<td valign="top" align="center">0.0356</td>
<td valign="top" align="center">0.5561</td>
<td valign="top" align="center">0.5914</td>
<td valign="top" align="center">0.6146</td>
<td valign="top" align="center">0.9007</td>
<td valign="top" align="center">0.4936</td>
<td valign="top" align="center">1.0185</td>
<td valign="top" align="center">0.7491</td>
<td valign="top" align="center">0.4937</td>
<td valign="top" align="center">0.7466</td>
<td valign="top" align="center">1.0658</td>
</tr>
<tr>
<td valign="top" rowspan="5" align="center">Blue-green</td>
<td valign="top" align="center">UQIM</td>
<td valign="top" align="center">0.2177</td>
<td valign="top" align="center">0.1737</td>
<td valign="top" align="center">0.5721</td>
<td valign="top" align="center">0.6823</td>
<td valign="top" align="center">0.7168</td>
<td valign="top" align="center">0.2442</td>
<td valign="top" align="center">0.3822</td>
<td valign="top" align="center">0.5815</td>
<td valign="top" align="center">0.7161</td>
<td valign="top" align="center">0.6789</td>
<td valign="top" align="center">0.8241</td>
</tr>
<tr>
<td valign="top" align="center">UCIQE</td>
<td valign="top" align="center">1.2079</td>
<td valign="top" align="center">3.1600</td>
<td valign="top" align="center">2.3751</td>
<td valign="top" align="center">2.3677</td>
<td valign="top" align="center">3.1126</td>
<td valign="top" align="center">2.7901</td>
<td valign="top" align="center">2.9749</td>
<td valign="top" align="center">3.2026</td>
<td valign="top" align="center">4.3596</td>
<td valign="top" align="center">3.1216</td>
<td valign="top" align="center">3.7236</td>
</tr>
<tr>
<td valign="top" align="center">CCF</td>
<td valign="top" align="center">10.4546</td>
<td valign="top" align="center">19.7899</td>
<td valign="top" align="center">21.8464</td>
<td valign="top" align="center">14.4578</td>
<td valign="top" align="center">21.4152</td>
<td valign="top" align="center">12.2822</td>
<td valign="top" align="center">18.1140</td>
<td valign="top" align="center">18.2764</td>
<td valign="top" align="center">20.0089</td>
<td valign="top" align="center">18.0861</td>
<td valign="top" align="center">28.3831</td>
</tr>
<tr>
<td valign="top" align="center">Entropy</td>
<td valign="top" align="center">6.8138</td>
<td valign="top" align="center">7.2097</td>
<td valign="top" align="center">7.1199</td>
<td valign="top" align="center">7.6959</td>
<td valign="top" align="center">7.5539</td>
<td valign="top" align="center">6.9794</td>
<td valign="top" align="center">7.2003</td>
<td valign="top" align="center">7.3745</td>
<td valign="top" align="center">7.7378</td>
<td valign="top" align="center">7.4311</td>
<td valign="top" align="center">7.7878</td>
</tr>
<tr>
<td valign="top" align="center">TM</td>
<td valign="top" align="center">0.2577</td>
<td valign="top" align="center">0.5863</td>
<td valign="top" align="center">1.0404</td>
<td valign="top" align="center">0.8182</td>
<td valign="top" align="center">1.2999</td>
<td valign="top" align="center">1.4221</td>
<td valign="top" align="center">1.0813</td>
<td valign="top" align="center">0.8211</td>
<td valign="top" align="center">0.6626</td>
<td valign="top" align="center">0.9145</td>
<td valign="top" align="center">1.5907</td>
</tr>
<tr>
<td valign="top" rowspan="5" align="center">Green</td>
<td valign="top" align="center">UQIM</td>
<td valign="top" align="center">0.4904</td>
<td valign="top" align="center">0.3129</td>
<td valign="top" align="center">0.5201</td>
<td valign="top" align="center">0.6931</td>
<td valign="top" align="center">0.6696</td>
<td valign="top" align="center">0.3108</td>
<td valign="top" align="center">0.2266</td>
<td valign="top" align="center">0.5655</td>
<td valign="top" align="center">0.8625</td>
<td valign="top" align="center">0.6135</td>
<td valign="top" align="center">0.9274</td>
</tr>
<tr>
<td valign="top" align="center">UCIQE</td>
<td valign="top" align="center">1.1012</td>
<td valign="top" align="center">2.8052</td>
<td valign="top" align="center">2.2637</td>
<td valign="top" align="center">1.8285</td>
<td valign="top" align="center">2.5843</td>
<td valign="top" align="center">2.5745</td>
<td valign="top" align="center">2.6777</td>
<td valign="top" align="center">2.7592</td>
<td valign="top" align="center">4.0124</td>
<td valign="top" align="center">2.7567</td>
<td valign="top" align="center">3.9203</td>
</tr>
<tr>
<td valign="top" align="center">CCF</td>
<td valign="top" align="center">22.4462</td>
<td valign="top" align="center">15.4961</td>
<td valign="top" align="center">17.3230</td>
<td valign="top" align="center">27.0804</td>
<td valign="top" align="center">19.4656</td>
<td valign="top" align="center">11.4729</td>
<td valign="top" align="center">16.7603</td>
<td valign="top" align="center">19.9980</td>
<td valign="top" align="center">21.3270</td>
<td valign="top" align="center">15.7379</td>
<td valign="top" align="center">29.8022</td>
</tr>
<tr>
<td valign="top" align="center">Entropy</td>
<td valign="top" align="center">6.2752</td>
<td valign="top" align="center">6.8939</td>
<td valign="top" align="center">7.4152</td>
<td valign="top" align="center">7.7382</td>
<td valign="top" align="center">7.4024</td>
<td valign="top" align="center">6.7962</td>
<td valign="top" align="center">7.0914</td>
<td valign="top" align="center">7.2439</td>
<td valign="top" align="center">7.7248</td>
<td valign="top" align="center">7.2369</td>
<td valign="top" align="center">7.8478</td>
</tr>
<tr>
<td valign="top" align="center">TM</td>
<td valign="top" align="center">0.1202</td>
<td valign="top" align="center">0.0769</td>
<td valign="top" align="center">0.4269</td>
<td valign="top" align="center">0.3431</td>
<td valign="top" align="center">0.4711</td>
<td valign="top" align="center">0.1017</td>
<td valign="top" align="center">0.4245</td>
<td valign="top" align="center">0.3806</td>
<td valign="top" align="center">0.3221</td>
<td valign="top" align="center">0.2944</td>
<td valign="top" align="center">0.6809</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Red indicates the best result, and blue indicates the second best result.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s4_4">
<label>4.4</label>
<title>Visibility comparisons on the UIQS dataset</title>
<list list-type="order">
<list-item>
<p>Qualitative comparisons: We evaluated the algorithm&#x2019;s enhanced contrast performance on the UIQS dataset at five levels of different visibility. <xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9</bold></xref> shows the different algorithm enhancement results. The UDCP and ULAP methods basically fail to enhance the contrast of underwater images, but ULAP gives better results for the A data subset of the UIQS dataset. The MLLE method gives excellent results in images of different contrast levels, but is not ideal for enhancing underwater images with particularly low contrast (subset E). The HLAP method also enhances underwater low contrast to some extent, but not as well as the MLLE method. The results of the STSC and URSCT methods of enhancement are yellowish. The CWR method and the USLN method give the best results for the C subset of images, but the contrast of the enhanced images in the D and E subsets is still low (greenish or bluish). The UDnet method results in low-contrast (bluish) enhancement. The PUIE method is able to solve the low-contrast problem better in the A subset of images, but as the contrast gets lower and lower, the processing becomes less and less effective. Our proposed method gives the best visualization results at all contrast levels.</p>
</list-item>
<list-item>
<p>Quantitative comparisons: We use five evaluation indicators to further demonstrate the ability of our method to achieve excellent scores on each level of contrast. From the metric measures in <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>, we can obtain similar conclusions to those obtained by correcting the color processing results of underwater images.</p>
</list-item>
</list>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Visualization of the comparison results for the UIQS dataset. Results generated using five inputs from subsets <bold>(A)</bold> (first row), <bold>(B)</bold> (second row), <bold>(C)</bold> (third row), <bold>(D)</bold> (fourth row), and <bold>(E)</bold> (fifth row) of the <bold>(A)</bold> UIQS, <bold>(B)</bold> UDCP, <bold>(C)</bold> ULAP, <bold>(D)</bold> MLLE, <bold>(E)</bold> HLRP, <bold>(F)</bold> CWR, <bold>(G)</bold> PUIE, <bold>(H)</bold> STSC, <bold>(I)</bold> UDnet, <bold>(J)</bold> URSCT, <bold>(K)</bold> USLN, and <bold>(L)</bold> OURS data.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1321549-g009.tif"/>
</fig>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Experimental results of different approaches to the UIQS dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Dataset</th>
<th valign="top" align="center">Method</th>
<th valign="top" align="center">UDCP</th>
<th valign="top" align="center">ULAP</th>
<th valign="top" align="center">HLRP</th>
<th valign="top" align="center">MLLE</th>
<th valign="top" align="center">USLN</th>
<th valign="top" align="center">UDnet</th>
<th valign="top" align="center">URSCT</th>
<th valign="top" align="center">PUIE</th>
<th valign="top" align="center">CWR</th>
<th valign="top" align="center">STSC</th>
<th valign="top" align="center">OURS</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" rowspan="5" align="center">A</td>
<td valign="top" align="center">UQIM</td>
<td valign="top" align="center">0.2624</td>
<td valign="top" align="center">0.6717</td>
<td valign="top" align="center">1.05</td>
<td valign="top" align="center">1.4177</td>
<td valign="top" align="center">0.8747</td>
<td valign="top" align="center">0.5917</td>
<td valign="top" align="center">0.8889</td>
<td valign="top" align="center">0.9126</td>
<td valign="top" align="center">0.9605</td>
<td valign="top" align="center">0.9142</td>
<td valign="top" align="center">1.0072</td>
</tr>
<tr>
<td valign="top" align="center">UCIQE</td>
<td valign="top" align="center">1.6184</td>
<td valign="top" align="center">1.4487</td>
<td valign="top" align="center">3.4037</td>
<td valign="top" align="center">2.5668</td>
<td valign="top" align="center">3.1534</td>
<td valign="top" align="center">2.2996</td>
<td valign="top" align="center">3.2995</td>
<td valign="top" align="center">3.4283</td>
<td valign="top" align="center">4.2247</td>
<td valign="top" align="center">3.4761</td>
<td valign="top" align="center">3.7073</td>
</tr>
<tr>
<td valign="top" align="center">CCF</td>
<td valign="top" align="center">18.706</td>
<td valign="top" align="center">11.5649</td>
<td valign="top" align="center">26.8382</td>
<td valign="top" align="center">22.51</td>
<td valign="top" align="center">29.7624</td>
<td valign="top" align="center">17.8651</td>
<td valign="top" align="center">29.5053</td>
<td valign="top" align="center">29.4401</td>
<td valign="top" align="center">26.7736</td>
<td valign="top" align="center">28.0359</td>
<td valign="top" align="center">34.3478</td>
</tr>
<tr>
<td valign="top" align="center">Entropy</td>
<td valign="top" align="center">7.0386</td>
<td valign="top" align="center">7.4441</td>
<td valign="top" align="center">7.3737</td>
<td valign="top" align="center">7.7582</td>
<td valign="top" align="center">7.6527</td>
<td valign="top" align="center">7.2168</td>
<td valign="top" align="center">7.6075</td>
<td valign="top" align="center">7.6617</td>
<td valign="top" align="center">7.7869</td>
<td valign="top" align="center">7.6721</td>
<td valign="top" align="center">7.8197</td>
</tr>
<tr>
<td valign="top" align="center">TM</td>
<td valign="top" align="center">0.3725</td>
<td valign="top" align="center">0.7422</td>
<td valign="top" align="center">0.7124</td>
<td valign="top" align="center">0.8398</td>
<td valign="top" align="center">1.1837</td>
<td valign="top" align="center">0.555</td>
<td valign="top" align="center">1.2481</td>
<td valign="top" align="center">0.9335</td>
<td valign="top" align="center">0.6238</td>
<td valign="top" align="center">0.9442</td>
<td valign="top" align="center">1.3742</td>
</tr>
<tr>
<td valign="top" rowspan="5" align="center">B</td>
<td valign="top" align="center">UQIM</td>
<td valign="top" align="center">0.1347</td>
<td valign="top" align="center">0.2245</td>
<td valign="top" align="center">1.1408</td>
<td valign="top" align="center">1.3351</td>
<td valign="top" align="center">0.8352</td>
<td valign="top" align="center">0.463</td>
<td valign="top" align="center">0.7377</td>
<td valign="top" align="center">0.7794</td>
<td valign="top" align="center">0.9785</td>
<td valign="top" align="center">0.8149</td>
<td valign="top" align="center">1.0115</td>
</tr>
<tr>
<td valign="top" align="center">UCIQE</td>
<td valign="top" align="center">1.4018</td>
<td valign="top" align="center">2.0279</td>
<td valign="top" align="center">2.8489</td>
<td valign="top" align="center">2.1675</td>
<td valign="top" align="center">2.7157</td>
<td valign="top" align="center">2.4606</td>
<td valign="top" align="center">2.6478</td>
<td valign="top" align="center">2.9528</td>
<td valign="top" align="center">4.0891</td>
<td valign="top" align="center">2.9368</td>
<td valign="top" align="center">3.5109</td>
</tr>
<tr>
<td valign="top" align="center">CCF</td>
<td valign="top" align="center">10.8455</td>
<td valign="top" align="center">10.1708</td>
<td valign="top" align="center">21.8436</td>
<td valign="top" align="center">24.8785</td>
<td valign="top" align="center">24.5079</td>
<td valign="top" align="center">14.6972</td>
<td valign="top" align="center">22.0912</td>
<td valign="top" align="center">24.9349</td>
<td valign="top" align="center">25.7268</td>
<td valign="top" align="center">21.1953</td>
<td valign="top" align="center">32.0104</td>
</tr>
<tr>
<td valign="top" align="center">Entropy</td>
<td valign="top" align="center">6.7771</td>
<td valign="top" align="center">7.22</td>
<td valign="top" align="center">7.5254</td>
<td valign="top" align="center">7.7461</td>
<td valign="top" align="center">7.5398</td>
<td valign="top" align="center">7.0236</td>
<td valign="top" align="center">7.3609</td>
<td valign="top" align="center">7.4704</td>
<td valign="top" align="center">7.7696</td>
<td valign="top" align="center">7.468</td>
<td valign="top" align="center">7.8019</td>
</tr>
<tr>
<td valign="top" align="center">TM</td>
<td valign="top" align="center">0.1826</td>
<td valign="top" align="center">0.4765</td>
<td valign="top" align="center">0.7246</td>
<td valign="top" align="center">0.7241</td>
<td valign="top" align="center">1.0577</td>
<td valign="top" align="center">0.3565</td>
<td valign="top" align="center">1.0057</td>
<td valign="top" align="center">0.759</td>
<td valign="top" align="center">0.6269</td>
<td valign="top" align="center">0.7683</td>
<td valign="top" align="center">1.2991</td>
</tr>
<tr>
<td valign="top" rowspan="5" align="center">C</td>
<td valign="top" align="center">UQIM</td>
<td valign="top" align="center">0.1908</td>
<td valign="top" align="center">0.0939</td>
<td valign="top" align="center">1.1052</td>
<td valign="top" align="center">1.2566</td>
<td valign="top" align="center">0.7507</td>
<td valign="top" align="center">0.3883</td>
<td valign="top" align="center">0.6113</td>
<td valign="top" align="center">0.6932</td>
<td valign="top" align="center">0.9041</td>
<td valign="top" align="center">0.7502</td>
<td valign="top" align="center">0.972</td>
</tr>
<tr>
<td valign="top" align="center">UCIQE</td>
<td valign="top" align="center">2.4367</td>
<td valign="top" align="center">2.0319</td>
<td valign="top" align="center">2.677</td>
<td valign="top" align="center">2.2374</td>
<td valign="top" align="center">2.8143</td>
<td valign="top" align="center">2.6215</td>
<td valign="top" align="center">2.7135</td>
<td valign="top" align="center">3.0184</td>
<td valign="top" align="center">4.2389</td>
<td valign="top" align="center">2.9981</td>
<td valign="top" align="center">3.6102</td>
</tr>
<tr>
<td valign="top" align="center">CCF</td>
<td valign="top" align="center">18.5137</td>
<td valign="top" align="center">14.9624</td>
<td valign="top" align="center">24.1595</td>
<td valign="top" align="center">21.5271</td>
<td valign="top" align="center">22.2052</td>
<td valign="top" align="center">13.3821</td>
<td valign="top" align="center">20.0343</td>
<td valign="top" align="center">22.3457</td>
<td valign="top" align="center">23.5882</td>
<td valign="top" align="center">19.3187</td>
<td valign="top" align="center">30.3306</td>
</tr>
<tr>
<td valign="top" align="center">Entropy</td>
<td valign="top" align="center">6.6623</td>
<td valign="top" align="center">7.1361</td>
<td valign="top" align="center">7.394</td>
<td valign="top" align="center">7.7096</td>
<td valign="top" align="center">7.4867</td>
<td valign="top" align="center">6.9612</td>
<td valign="top" align="center">7.2692</td>
<td valign="top" align="center">7.3961</td>
<td valign="top" align="center">7.7445</td>
<td valign="top" align="center">7.4098</td>
<td valign="top" align="center">7.7922</td>
</tr>
<tr>
<td valign="top" align="center">TM</td>
<td valign="top" align="center">0.1628</td>
<td valign="top" align="center">0.4364</td>
<td valign="top" align="center">0.6945</td>
<td valign="top" align="center">0.6243</td>
<td valign="top" align="center">0.9577</td>
<td valign="top" align="center">0.3255</td>
<td valign="top" align="center">1.0412</td>
<td valign="top" align="center">0.7392</td>
<td valign="top" align="center">0.5249</td>
<td valign="top" align="center">0.7386</td>
<td valign="top" align="center">1.1891</td>
</tr>
<tr>
<td valign="top" rowspan="5" align="center">D</td>
<td valign="top" align="center">UQIM</td>
<td valign="top" align="center">0.2864</td>
<td valign="top" align="center">0.1065</td>
<td valign="top" align="center">0.9749</td>
<td valign="top" align="center">1.0247</td>
<td valign="top" align="center">0.6074</td>
<td valign="top" align="center">0.2521</td>
<td valign="top" align="center">0.3638</td>
<td valign="top" align="center">0.5279</td>
<td valign="top" align="center">0.7381</td>
<td valign="top" align="center">0.5838</td>
<td valign="top" align="center">0.836</td>
</tr>
<tr>
<td valign="top" align="center">UCIQE</td>
<td valign="top" align="center">1.0903</td>
<td valign="top" align="center">1.8212</td>
<td valign="top" align="center">2.2942</td>
<td valign="top" align="center">2.2026</td>
<td valign="top" align="center">2.9106</td>
<td valign="top" align="center">2.9257</td>
<td valign="top" align="center">2.7073</td>
<td valign="top" align="center">2.9997</td>
<td valign="top" align="center">4.4156</td>
<td valign="top" align="center">2.9333</td>
<td valign="top" align="center">3.7216</td>
</tr>
<tr>
<td valign="top" align="center">CCF</td>
<td valign="top" align="center">25.7091</td>
<td valign="top" align="center">16.9191</td>
<td valign="top" align="center">26.6111</td>
<td valign="top" align="center">23.5912</td>
<td valign="top" align="center">18.8959</td>
<td valign="top" align="center">10.8657</td>
<td valign="top" align="center">16.4595</td>
<td valign="top" align="center">17.7478</td>
<td valign="top" align="center">19.3404</td>
<td valign="top" align="center">15.0257</td>
<td valign="top" align="center">26.4707</td>
</tr>
<tr>
<td valign="top" align="center">Entropy</td>
<td valign="top" align="center">6.4785</td>
<td valign="top" align="center">7.0627</td>
<td valign="top" align="center">7.152</td>
<td valign="top" align="center">7.6602</td>
<td valign="top" align="center">7.4379</td>
<td valign="top" align="center">6.8769</td>
<td valign="top" align="center">7.1504</td>
<td valign="top" align="center">7.2836</td>
<td valign="top" align="center">7.6938</td>
<td valign="top" align="center">7.2796</td>
<td valign="top" align="center">7.7715</td>
</tr>
<tr>
<td valign="top" align="center">TM</td>
<td valign="top" align="center">0.0832</td>
<td valign="top" align="center">0.2562</td>
<td valign="top" align="center">0.5543</td>
<td valign="top" align="center">0.4671</td>
<td valign="top" align="center">0.7086</td>
<td valign="top" align="center">0.2208</td>
<td valign="top" align="center">0.619</td>
<td valign="top" align="center">0.5202</td>
<td valign="top" align="center">0.4504</td>
<td valign="top" align="center">0.5084</td>
<td valign="top" align="center">0.9437</td>
</tr>
<tr>
<td valign="top" rowspan="5" align="center">E</td>
<td valign="top" align="center">UQIM</td>
<td valign="top" align="center">0.2966</td>
<td valign="top" align="center">0.0279</td>
<td valign="top" align="center">0.9249</td>
<td valign="top" align="center">0.9727</td>
<td valign="top" align="center">0.5608</td>
<td valign="top" align="center">0.2272</td>
<td valign="top" align="center">0.4277</td>
<td valign="top" align="center">0.5196</td>
<td valign="top" align="center">0.6743</td>
<td valign="top" align="center">0.5610</td>
<td valign="top" align="center">0.7863</td>
</tr>
<tr>
<td valign="top" align="center">UCIQE</td>
<td valign="top" align="center">1.5435</td>
<td valign="top" align="center">2.9118</td>
<td valign="top" align="center">2.7188</td>
<td valign="top" align="center">2.3697</td>
<td valign="top" align="center">3.0641</td>
<td valign="top" align="center">3.0453</td>
<td valign="top" align="center">2.6687</td>
<td valign="top" align="center">3.1068</td>
<td valign="top" align="center">4.6266</td>
<td valign="top" align="center">3.1638</td>
<td valign="top" align="center">3.8615</td>
</tr>
<tr>
<td valign="top" align="center">CCF</td>
<td valign="top" align="center">16.0148</td>
<td valign="top" align="center">18.6792</td>
<td valign="top" align="center">18.9914</td>
<td valign="top" align="center">22.2963</td>
<td valign="top" align="center">18.3614</td>
<td valign="top" align="center">10.6133</td>
<td valign="top" align="center">16.3301</td>
<td valign="top" align="center">17.0854</td>
<td valign="top" align="center">18.3641</td>
<td valign="top" align="center">14.6125</td>
<td valign="top" align="center">24.7637</td>
</tr>
<tr>
<td valign="top" align="center">Entropy</td>
<td valign="top" align="center">6.4374</td>
<td valign="top" align="center">7.1444</td>
<td valign="top" align="center">6.9743</td>
<td valign="top" align="center">7.6286</td>
<td valign="top" align="center">7.4475</td>
<td valign="top" align="center">6.9092</td>
<td valign="top" align="center">7.2046</td>
<td valign="top" align="center">7.3128</td>
<td valign="top" align="center">7.6756</td>
<td valign="top" align="center">7.3092</td>
<td valign="top" align="center">7.7418</td>
</tr>
<tr>
<td valign="top" align="center">TM</td>
<td valign="top" align="center">0.117</td>
<td valign="top" align="center">0.3058</td>
<td valign="top" align="center">0.5612</td>
<td valign="top" align="center">0.467</td>
<td valign="top" align="center">0.7063</td>
<td valign="top" align="center">0.2698</td>
<td valign="top" align="center">0.6534</td>
<td valign="top" align="center">0.5383</td>
<td valign="top" align="center">0.4562</td>
<td valign="top" align="center">0.5289</td>
<td valign="top" align="center">0.9199</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Red indicates the best result, and blue indicates the second best result.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Firstly, the traditional method of enhancement was able to obtain images that scored as well as the deep learning enhanced images. Secondly, as the contrast of the underwater images decreases, the scores of the algorithm-enhanced results for each of the image metrics also decrease. Finally, the enhanced results of our proposed method are able to obtain excellent scores in most metrics.</p>
</sec>
<sec id="s4_5">
<label>4.5</label>
<title>Comprehensive comparisons on the UIEB dataset</title>
<list list-type="order">
<list-item>
<p>Qualitative comparisons: We comprehensively evaluate the superiority of our proposed method on different degraded underwater image datasets (blur, low visibility, low light, color shift, etc.). <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10</bold></xref> shows several visual results where the enhanced results of the UDCP and ULAP methods still do not resolve the blue and green bias, and cause color shifts in other colors. The MLLE method over-enhances the results and causes a slight color cast, while the HLAP method is blurrier for border enhancement and does not enhance darker areas as well. The CWR method enhances the color and contrast of underwater images, but with localized brightness overload (first row). The USLN method is able to resolve the underwater color cast, but some images are over-colored (second row). The UDnet method of enhancement results in a blue bias. Both the PUIE and STSC methods solve the problem of underwater color cast, with the STSC method enhancing the high contrast of the results. The URSCT method enhanced the results more satisfactorily. In terms of color and contrast analysis, we can observe that the proposed method enhances the results visually better.</p>
</list-item>
<list-item>
<p>Quantitative comparisons: We also quantitatively assessed the ability of these methods to address different degraded data through seven metrics. <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref> shows the average quantitative scores of the algorithms across the UIEB. Our method has higher PSNR, SSIM, UQIM, UCIQE, CCF, Entropy, and TM values compared to the comparative method. The results show that our method generally produces pleasing visual effects.</p>
</list-item>
</list>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p>Visualization of the comparison results for the UIEB dataset. Results generated using the inputs of the <bold>(A)</bold> UIEB, <bold>(B)</bold> UDCP, <bold>(C)</bold> ULAP, <bold>(D)</bold> MLLE, <bold>(E)</bold> HLRP, <bold>(F)</bold> CWR, <bold>(G)</bold> PUIE, <bold>(H)</bold> STSC, <bold>(I)</bold> UDnet, <bold>(J)</bold> URSCT, <bold>(K)</bold> USLN, and <bold>(L)</bold> OURS data.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1321549-g010.tif"/>
</fig>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Experimental results of different approaches to the UIEB dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Method</th>
<th valign="top" align="center">UDCP</th>
<th valign="top" align="center">ULAP</th>
<th valign="top" align="center">HLRP</th>
<th valign="top" align="center">MLLE</th>
<th valign="top" align="center">USLN</th>
<th valign="top" align="center">UDnet</th>
<th valign="top" align="center">URSCT</th>
<th valign="top" align="center">PUIE</th>
<th valign="top" align="center">CWR</th>
<th valign="top" align="center">STSC</th>
<th valign="top" align="center">OURS</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">PSNR</td>
<td valign="top" align="center">11.7997</td>
<td valign="top" align="center">19.4783</td>
<td valign="top" align="center">15.6763</td>
<td valign="top" align="center">21.1899</td>
<td valign="top" align="center">25.545</td>
<td valign="top" align="center">20.3741</td>
<td valign="top" align="center">22.194</td>
<td valign="top" align="center">24.0521</td>
<td valign="top" align="center">24.3758</td>
<td valign="top" align="center">21.1268</td>
<td valign="top" align="center">24.4591</td>
</tr>
<tr>
<td valign="top" align="center">SSIM</td>
<td valign="top" align="center">0.5216</td>
<td valign="top" align="center">0.7582</td>
<td valign="top" align="center">0.2931</td>
<td valign="top" align="center">0.8208</td>
<td valign="top" align="center">0.8948</td>
<td valign="top" align="center">0.7651</td>
<td valign="top" align="center">0.8608</td>
<td valign="top" align="center">0.9057</td>
<td valign="top" align="center">0.8614</td>
<td valign="top" align="center">0.7327</td>
<td valign="top" align="center">0.8284</td>
</tr>
<tr>
<td valign="top" align="center">UQIM</td>
<td valign="top" align="center">0.3004</td>
<td valign="top" align="center">0.739</td>
<td valign="top" align="center">0.8426</td>
<td valign="top" align="center">0.9154</td>
<td valign="top" align="center">0.7696</td>
<td valign="top" align="center">0.4733</td>
<td valign="top" align="center">0.8778</td>
<td valign="top" align="center">0.7582</td>
<td valign="top" align="center">0.7617</td>
<td valign="top" align="center">0.8820</td>
<td valign="top" align="center">0.9618</td>
</tr>
<tr>
<td valign="top" align="center">UCIQE</td>
<td valign="top" align="center">2.3597</td>
<td valign="top" align="center">2.7313</td>
<td valign="top" align="center">4.6963</td>
<td valign="top" align="center">3.4436</td>
<td valign="top" align="center">4.6444</td>
<td valign="top" align="center">2.7125</td>
<td valign="top" align="center">4.5149</td>
<td valign="top" align="center">4.248</td>
<td valign="top" align="center">4.8607</td>
<td valign="top" align="center">4.6332</td>
<td valign="top" align="center">5.2957</td>
</tr>
<tr>
<td valign="top" align="center">CCF</td>
<td valign="top" align="center">15.1024</td>
<td valign="top" align="center">20.8028</td>
<td valign="top" align="center">23.8784</td>
<td valign="top" align="center">26.6497</td>
<td valign="top" align="center">29.8479</td>
<td valign="top" align="center">14.5760</td>
<td valign="top" align="center">29.4049</td>
<td valign="top" align="center">23.8073</td>
<td valign="top" align="center">23.9543</td>
<td valign="top" align="center">27.0381</td>
<td valign="top" align="center">34.9749</td>
</tr>
<tr>
<td valign="top" align="center">Entropy</td>
<td valign="top" align="center">6.6726</td>
<td valign="top" align="center">7.3559</td>
<td valign="top" align="center">7.3946</td>
<td valign="top" align="center">7.6831</td>
<td valign="top" align="center">7.6076</td>
<td valign="top" align="center">7.1623</td>
<td valign="top" align="center">7.6649</td>
<td valign="top" align="center">7.6463</td>
<td valign="top" align="center">7.6660</td>
<td valign="top" align="center">7.6716</td>
<td valign="top" align="center">7.8017</td>
</tr>
<tr>
<td valign="top" align="center">TM</td>
<td valign="top" align="center">0.6095</td>
<td valign="top" align="center">0.8413</td>
<td valign="top" align="center">0.8714</td>
<td valign="top" align="center">0.9172</td>
<td valign="top" align="center">2.3716</td>
<td valign="top" align="center">0.6792</td>
<td valign="top" align="center">2.4479</td>
<td valign="top" align="center">1.3374</td>
<td valign="top" align="center">1.0579</td>
<td valign="top" align="center">1.2415</td>
<td valign="top" align="center">2.1803</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Red indicates the best result, and blue indicates the second best result.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s4_6">
<label>4.6</label>
<title>Comparisons of detail enhancement</title>
<p>We observe the superiority of the algorithm by enhancing the local magnification of the image for each algorithm. <xref ref-type="fig" rid="f11"><bold>Figure&#xa0;11</bold></xref> shows the local features of the images, and it can be seen that the UDCP, ULAP, and UDnet methods enhance the results with color bias and low contrast. The STSC, URSCT, and USLN methods give slightly greenish results; the MLLE and HLRP methods give results as good as the deep learning enhancement; the PUIE method gives low contrast; and the CWR method gives high contrast-enhanced images. The enhanced results of our method give better color and contrast.</p>
<fig id="f11" position="float">
<label>Figure&#xa0;11</label>
<caption>
<p>Local visualization comparison results for selected UIEB datasets. Results generated using the inputs of the <bold>(A)</bold> UIEB, <bold>(B)</bold> UDCP, <bold>(C)</bold> ULAP, <bold>(D)</bold> MLLE, <bold>(E)</bold> HLRP, <bold>(F)</bold> CWR, <bold>(G)</bold> PUIE, <bold>(H)</bold> STSC, <bold>(I)</bold> UDnet, <bold>(J)</bold> URSCT, <bold>(K)</bold> USLN, and <bold>(L)</bold> OURS data.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1321549-g011.tif"/>
</fig>
</sec>
<sec id="s4_7">
<label>4.7</label>
<title>Structural enhancement comparison</title>
<p>To demonstrate the image structure enhancement effect of our proposed method, the number of visibility edges recovered in the blind contrast enhancement assessment (<xref ref-type="bibr" rid="B12">Hautiere et&#xa0;al., 2008</xref>) was measured, as shown in <xref ref-type="fig" rid="f12"><bold>Figure&#xa0;12</bold></xref>. We can observe the following phenomena: (1) The proposed method is able to obtain a higher number of recovered visible edges, confirming its effectiveness in terms of sharpness and contrast enhancement. (2) The enhancement results of the traditional methods are comparable to those of the deep learning methods (HLRP and MLLE methods) and even surpass those of the UDnet method. (3) Our enhancement method, despite being able to obtain the maximum number of visible edges while blurring the target edges (by enhancing many of the background ambient edges as well), may not be beneficial for advanced visual processing.</p>
<fig id="f12" position="float">
<label>Figure&#xa0;12</label>
<caption>
<p>Comparison results for structural enhancement. The red numbers indicate the number of visible edges recovered by the algorithm. The inputs of the <bold>(A)</bold> UIEB, <bold>(B)</bold> UDCP, <bold>(C)</bold> ULAP, <bold>(D)</bold> MLLE, <bold>(E)</bold> HLRP, <bold>(F)</bold> CWR, <bold>(G)</bold> PUIE, <bold>(H)</bold> STSC, <bold>(I)</bold> UDnet, <bold>(J)</bold> URSCT, <bold>(K)</bold> USLN, and <bold>(L)</bold> OURS data.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1321549-g012.tif"/>
</fig>
</sec>
<sec id="s4_8">
<label>4.8</label>
<title>Ablation study</title>
<p>We perform ablation experiments on the UCCS, UIQS, and UIEB datasets to illustrate the effectiveness of each component of our approach. The main components include (a) our method without frequency decomposition module (-w/o FD); here, we use the conventional DFT approach as a benchmark; (b) our method without learnable full-frequency transformer module (-w/o LFT); here, we use the regular multi-headed transformer structure as a benchmark; (c) our method without discriminator in the frequency domain (-w/o DFD); and (d) our method with LFT-DGAN.</p>
<p>
<xref ref-type="fig" rid="f13"><bold>Figure&#xa0;13</bold></xref> shows the visualization results for the UCCS, UCIQ, and UIEB datasets. From the visualization results, it can be observed that (1) OURS-w/o FD is able to correct the color of underwater images, but the detail texture is blurred; (2) OURS-w/o LFT is able to recover image texture details but does not work well for color correction and contrast; (3) OURS-w/o DFD can solve the underwater image color and contrast problems well, but the image colors are dark; and (4) our proposed method can further enhance the contrast and color of underwater images.</p>
<fig id="f13" position="float">
<label>Figure&#xa0;13</label>
<caption>
<p>Ablation visualization results for each module in the model in the UCCS, UCIQ, and UIEB datasets.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1321549-g013.tif"/>
</fig>
<p>As shown in <xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>, we quantitatively evaluate the scores for each module of the proposed algorithm, where the UCCS dataset is the average of three subsets and the UIQS dataset is the average of five subsets. It can be seen that each of our proposed modules plays a role in the LFT-DGAN algorithm and that the LFT-DGAN algorithm was able to obtain the best scores.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Quantitative ablation experiments on the UCCS, UCIQ, and UIEB datasets.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Method</th>
<th valign="top" align="center"/>
<th valign="top" align="center"/>
<th valign="top" align="center"/>
<th valign="top" align="center">UIEB</th>
<th valign="top" align="center"/>
<th valign="top" align="center"/>
<th valign="top" align="center"/>
<th valign="top" align="center"/>
<th valign="top" align="center"/>
<th valign="top" align="center">UCCS</th>
<th valign="top" align="center"/>
<th valign="top" align="center"/>
<th valign="top" align="center"/>
<th valign="top" align="center"/>
<th valign="top" align="center">UIQS</th>
<th valign="top" align="center"/>
<th valign="top" align="center"/>
</tr>
<tr>
<th valign="top" align="center">PSNR</th>
<th valign="top" align="center">SSIM</th>
<th valign="top" align="center">UIQM</th>
<th valign="top" align="center">UCIQE</th>
<th valign="top" align="center">CCF</th>
<th valign="top" align="center">Entropy</th>
<th valign="top" align="center">TM</th>
<th valign="top" align="center">UIQM</th>
<th valign="top" align="center">UCIQE</th>
<th valign="top" align="center">CCF</th>
<th valign="top" align="center">Entropy</th>
<th valign="top" align="center">TM</th>
<th valign="top" align="center">UIQM</th>
<th valign="top" align="center">UCIQE</th>
<th valign="top" align="center">CCF</th>
<th valign="top" align="center">Entropy</th>
<th valign="top" align="center">TM</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">w/o FD</td>
<td valign="top" align="center">20.4302</td>
<td valign="top" align="center">0.7049</td>
<td valign="top" align="center">0.9029</td>
<td valign="top" align="center">3.0997</td>
<td valign="top" align="center">31.2226</td>
<td valign="top" align="center">7.7328</td>
<td valign="top" align="center">1.1431</td>
<td valign="top" align="center">0.9862</td>
<td valign="top" align="center">2.1330</td>
<td valign="top" align="center">27.9388</td>
<td valign="top" align="center">7.7000</td>
<td valign="top" align="center">0.8197</td>
<td valign="top" align="center">1.0971</td>
<td valign="top" align="center">1.9652</td>
<td valign="top" align="center">27.8073</td>
<td valign="top" align="center">7.6918</td>
<td valign="top" align="center">0.7502</td>
</tr>
<tr>
<td valign="top" align="center">w/o LFT</td>
<td valign="top" align="center">23.3375</td>
<td valign="top" align="center">0.8235</td>
<td valign="top" align="center">0.9365</td>
<td valign="top" align="center">4.6894</td>
<td valign="top" align="center">31.0282</td>
<td valign="top" align="center">7.7461</td>
<td valign="top" align="center">2.0217</td>
<td valign="top" align="center">0.7118</td>
<td valign="top" align="center">2.4796</td>
<td valign="top" align="center">23.5163</td>
<td valign="top" align="center">7.6088</td>
<td valign="top" align="center">1.0772</td>
<td valign="top" align="center">0.7911</td>
<td valign="top" align="center">2.6054</td>
<td valign="top" align="center">23.7649</td>
<td valign="top" align="center">7.6150</td>
<td valign="top" align="center">1.0226</td>
</tr>
<tr>
<td valign="top" align="center">w/o DFD</td>
<td valign="top" align="center">24.7891</td>
<td valign="top" align="center">0.8424</td>
<td valign="top" align="center">0.8724</td>
<td valign="top" align="center">5.0469</td>
<td valign="top" align="center">30.4158</td>
<td valign="top" align="center">7.7562</td>
<td valign="top" align="center">2.1010</td>
<td valign="top" align="center">0.6657</td>
<td valign="top" align="center">3.2697</td>
<td valign="top" align="center">23.7472</td>
<td valign="top" align="center">7.5861</td>
<td valign="top" align="center">0.9396</td>
<td valign="top" align="center">0.7451</td>
<td valign="top" align="center">3.2643</td>
<td valign="top" align="center">23.5248</td>
<td valign="top" align="center">7.5743</td>
<td valign="top" align="center">0.9901</td>
</tr>
<tr>
<td valign="top" align="center">LFT-DGAN</td>
<td valign="top" align="center">24.4591</td>
<td valign="top" align="center">0.8284</td>
<td valign="top" align="center">0.9618</td>
<td valign="top" align="center">5.2957</td>
<td valign="top" align="center">34.9749</td>
<td valign="top" align="center">7.8017</td>
<td valign="top" align="center">2.1803</td>
<td valign="top" align="center">0.8482</td>
<td valign="top" align="center">3.6488</td>
<td valign="top" align="center">29.6018</td>
<td valign="top" align="center">7.7981</td>
<td valign="top" align="center">1.1125</td>
<td valign="top" align="center">0.9226</td>
<td valign="top" align="center">3.6823</td>
<td valign="top" align="center">28.3939</td>
<td valign="top" align="center">7.7854</td>
<td valign="top" align="center">1.1627</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Red indicates the best result, and blue indicates the second best result.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s4_9">
<label>4.9</label>
<title>Generalization performance of our method</title>
<p>We validate the generalization of the LFT-DGAN on different tasks [motion blurring (<xref ref-type="bibr" rid="B28">Rim et&#xa0;al., 2020</xref>), brightness (<xref ref-type="bibr" rid="B1">Cai et&#xa0;al., 2018</xref>), defogging (<xref ref-type="bibr" rid="B22">Li et&#xa0;al., 2018a</xref>), and rain removal (<xref ref-type="bibr" rid="B41">Yasarla and Patel, 2020</xref>)]. As can be seen in <xref ref-type="fig" rid="f14"><bold>Figure&#xa0;14</bold></xref>, the LFT-DGAN is able to remove the haze phenomenon better and improve the contrast of the image at different levels of haze images. In the dataset of images with different degrees of motion blur, the LFT-DGAN is only able to remove minor motion blur and does not work well for images with strong motion blur. In images with different degrees of illumination, our method is able to perfectly eliminate the effects of illumination. In images with different levels of rain, the LFT-DGAN is able to resolve the effect of different levels of raindrops on the image and can significantly enhance the image details.</p>
<fig id="f14" position="float">
<label>Figure&#xa0;14</label>
<caption>
<p>The generalization results of our proposed method (motion blurring, brightness, defogging, and rain removal). <bold>(A)</bold> Light haze, <bold>(B)</bold> heavy haze, <bold>(C)</bold> low motion blurring, <bold>(D)</bold> high motion blurring, <bold>(E)</bold> bright light, <bold>(F)</bold> weak light, <bold>(G)</bold> light rain, and <bold>(H)</bold> heavy rain.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1321549-g014.tif"/>
</fig>
</sec>
</sec>
<sec id="s5" sec-type="conclusion">
<label>5</label>
<title>Conclusion</title>
<p>In this paper, an underwater single-image enhancement method that can learn from an LFT-DGAN is proposed. Experimental results show that the advantages of the proposed method are summarized as follows: (1) A new image frequency domain decomposition method is designed using reversible convolutional networks, which can effectively separate low-, medium-, and high-frequency image information from underwater images. Note that the frequency domain decomposition method in this paper can be applied not only in the field of image enhancement, but also in other fields. (2) We have designed a custom transformer for frequency domain image feature enhancement that takes full account of underwater image space and channel correlation to effectively address underwater image color and contrast issues. (3) We design a dual discriminator method in the spatial and frequency domains, taking into account the differences between spatial and frequency domain underwater image features in order to reduce the differences between images. (4) The proposed model is able to operate directly at the pixel level without additional conditional priors. (5) The combined analysis shows that our proposed method performs superiorly on multiple datasets. Moreover, the ablation experiments also demonstrate the effectiveness of each module.</p>
<p>Although our method has good performance, it also has some limitations. The image enhancement method proposed in this paper is able to achieve pleasing results, but whether it is beneficial to the high-level domain deserves further investigation. In addition, current underwater imagery consists mainly of sonar imaging (<xref ref-type="bibr" rid="B45">Zhang et&#xa0;al., 2021b</xref>; <xref ref-type="bibr" rid="B42">Zhang, 2023</xref>) and optical camera imaging. Since sonar imaging uses acoustic signals while camera imaging uses optical signals, there are significant differences between the two methods. It is difficult for the method in this paper to perform mutual transfer learning. In future work, we intend to address the above issues.</p>
</sec>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material. Further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>SJZ: Methodology, Writing &#x2013; original draft. RW: Supervision, Funding acquisition, Writing &#x2013; review &amp; editing. STZ: Conceptualization, Data curation, Writing &#x2013; review &amp; editing. LW: Formal Analysis, Visualization, Writing &#x2013; review &amp; editing. ZL: Supervision, Writing &#x2013; review &amp; editing.</p>
</sec>
</body>
<back>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This work was supported by the National Key R&amp;D Program of China (2019YFE0125700) and the National Natural Science Foundation of China (Grant No. 31671586).</p>
</sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cai</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Gu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Learning a deep single image contrast enhancer from multi-exposure images</article-title>. <source>IEEE Trans. Image Process.</source> <volume>27</volume>, <fpage>2049</fpage>&#x2013;<lpage>2062</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TIP.2018.2794218</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Kalantidis</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Rohrbach</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). &#x201c;<article-title>Drop an octave: Reducing spatial redundancy in convolutional neural networks with octave convolution</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF international conference on computer vision</conf-name>. (<publisher-loc>New York, USA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>3435</fpage>&#x2013;<lpage>3444</lpage>.</citation>
</ref>
<ref id="B3">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Chu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Hinet: Half instance normalization network for image restoration</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</conf-name>. (<publisher-loc>New York, USA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>182</fpage>&#x2013;<lpage>192</lpage>.</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dinh</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Krueger</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Bengio</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Nice: Non-linear independent components estimation</article-title>. <source>arXiv preprint arXiv:1410.8516</source>.</citation>
</ref>
<ref id="B5">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Drews</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Nascimento</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Moraes</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Botelho</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Campos</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2013</year>). &#x201c;<article-title>Transmission estimation in underwater single images</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE international conference on computer vision workshops</conf-name>. (<publisher-loc>New York, USA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>825</fpage>&#x2013;<lpage>830</lpage>.</citation>
</ref>
<ref id="B6">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Fabbri</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Islam</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>Sattar</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Enhancing underwater imagery using generative adversarial networks</article-title>,&#x201d; in <conf-name>2018 IEEE International Conference on Robotics and Automation (ICRA)</conf-name>. (<publisher-loc>New York, USA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>7159</fpage>&#x2013;<lpage>7165</lpage>.</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>a). <article-title>Twice mixing: a rank learning based quality assessment approach for underwater image enhancement</article-title>. <source>Signal Processing: Image Communication</source> <volume>102</volume>, <fpage>116622</fpage>.</citation>
</ref>
<ref id="B8">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Fu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>K.-K.</given-names>
</name>
</person-group> (<year>2022</year>b). &#x201c;<article-title>Uncertainty inspired underwater image enhancement</article-title>,&#x201d; in <conf-name>Computer Vision&#x2013;ECCV 2022: 17th European Conference</conf-name>. (<publisher-loc>Tel Aviv, Israel</publisher-loc>: <publisher-name>IEEE</publisher-name>), <conf-date>October 23&#x2013;27, 2022</conf-date>. <fpage>465</fpage>&#x2013;<lpage>482</lpage>.</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ghani</surname> <given-names>A. S. A.</given-names>
</name>
<name>
<surname>Isa</surname> <given-names>N. A. M.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Automatic system for improving underwater image contrast and color through recursive adaptive histogram modification</article-title>. <source>Comput. Electron. Agric.</source> <volume>141</volume>, <fpage>181</fpage>&#x2013;<lpage>195</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2017.07.021</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guan</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Jing</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Deng</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Deepmih: Deep invertible network for multiple image hiding</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell.</source> <volume>45</volume>, <fpage>372</fpage>&#x2013;<lpage>390</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TPAMI.2022.3141725</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Han</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Shoeiby</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Malthus</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Botha</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Anstee</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Anwar</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). &#x201c;<article-title>Single underwater image restoration by contrastive learning</article-title>,&#x201d; in <conf-name>2021 IEEE International Geoscience and Remote Sensing Symposium IGARSS</conf-name>.  (<publisher-loc>New York, USA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>2385</fpage>&#x2013;<lpage>2388</lpage>.</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hautiere</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Tarel</surname> <given-names>J.-P.</given-names>
</name>
<name>
<surname>Aubert</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Dumont</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Blind contrast enhancement assessment by gradient ratioing at visible edges</article-title>. <source>Image Anal. Stereology</source> <volume>27</volume>, <fpage>87</fpage>&#x2013;<lpage>95</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5566/ias.v27.p87-95</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Enhanced frequency fusion network with dynamic hash attention for image denoising</article-title>. <source>Inf. Fusion</source> <volume>92</volume>, <fpage>420</fpage>&#x2013;<lpage>434</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.inffus.2022.12.015</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Johnson</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Alahi</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Fei-Fei</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Perceptual losses for real-time style transfer and super-resolution</article-title>,&#x201d; in <conf-name>Computer Vision&#x2013;ECCV 2016: 14th European Conference</conf-name>.  (<publisher-loc>Amsterdam, The Netherlands</publisher-loc>: <publisher-name>IEEE</publisher-name>), <conf-date>October 11&#x2013;14, 2016</conf-date>. <fpage>694</fpage>&#x2013;<lpage>711</lpage>.</citation>
</ref>
<ref id="B15">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Jung</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Keuper</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Spectral distribution aware image generation</article-title>,&#x201d; in <conf-name>Proceedings of the AAAI conference on artificial intelligence</conf-name> (<publisher-loc>British Columbia, Canada</publisher-loc>: <publisher-name>AAAI</publisher-name>), <volume>35</volume>. <fpage>1734</fpage>&#x2013;<lpage>1742</lpage>.</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A perception-aware decomposition and fusion framework for underwater image enhancement</article-title>. <source>IEEE Trans. Circuits Syst. Video Technol</source>.</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kingma</surname> <given-names>D. P.</given-names>
</name>
<name>
<surname>Dhariwal</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Glow: Generative flow with invertible 1x1 convolutions</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>31</volume>.</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Di</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Fa-gan: a feature attention gan with fusion discriminator for non-homogeneous dehazing</article-title>. <source>Signal Image Video Process.</source> <volume>16</volume>(<issue>5</issue>), <fpage>1243</fpage>&#x2013;<lpage>1251</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11760-021-02075-1</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2018</year>b). <article-title>Emerging from water: Underwater image color correction based on weakly supervised color transfer</article-title>. <source>IEEE Signal Process. Lett.</source> <volume>25</volume>, <fpage>323</fpage>&#x2013;<lpage>327</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/LSP.2018.2792050</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Cong</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Hou</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Kwong</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>An underwater image enhancement benchmark dataset and beyond</article-title>. <source>IEEE Trans. Image Process.</source> <volume>29</volume>, <fpage>4376</fpage>&#x2013;<lpage>4389</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TIP.83</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Jin</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Pang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Learning omni-frequency region-adaptive representations for real image super-resolution</article-title>. <source>Proc. AAAI Conf. Artif. Intell.</source> <volume>35</volume>, <fpage>1975</fpage>&#x2013;<lpage>1983</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1609/aaai.v35i3.16293</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Tao</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Zeng</surname> <given-names>W.</given-names>
</name>
<etal/>
</person-group>. (<year>2018</year>a). <article-title>Benchmarking single-image dehazing and beyond</article-title>. <source>IEEE Trans. Image Process.</source> <volume>28</volume>, <fpage>492</fpage>&#x2013;<lpage>505</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TIP.83</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Hou</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Luo</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Real-world underwater enhancement: Challenges, benchmarks, and solutions under natural light</article-title>. <source>IEEE Trans. Circuits Syst. Video Technol.</source> <volume>30</volume>, <fpage>4861</fpage>&#x2013;<lpage>4875</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TCSVT.76</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Qin</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Anwar</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Caldwell</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). &#x201c;<article-title>Invertible denoising network: A light solution for real noise removal</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>. (<publisher-loc>New York, USA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>13365</fpage>&#x2013;<lpage>13374</lpage>.</citation>
</ref>
<ref id="B25">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ma</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Oh</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>A wavelet-based dual-stream network for underwater image enhancement</article-title>,&#x201d; in <conf-name>ICASSP 2022&#x2013;2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</conf-name>. (<publisher-loc>New York, USA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>2769</fpage>&#x2013;<lpage>2773</lpage>.</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Panetta</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Agaian</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Human-visual-system-inspired underwater image quality measures</article-title>. <source>IEEE J. Oceanic Eng.</source> <volume>41</volume>, <fpage>541</fpage>&#x2013;<lpage>551</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JOE.2015.2469915</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ren</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>B.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Reinforced swin-convs transformer for simultaneous underwater sensing scene image enhancement and super-resolution</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>60</volume>, <fpage>1</fpage>&#x2013;<lpage>16</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2022.3205061</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Rim</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Won</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Cho</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Real-world blur dataset for learning and benchmarking deblurring algorithms</article-title>,&#x201d; in <conf-name>Computer Vision&#x2013;ECCV 2020: 16th European Conference</conf-name>, (<publisher-loc>Glasgow, UK</publisher-loc>: <publisher-name>IEEE</publisher-name>), <conf-date>August 23&#x2013;28, 2020</conf-date>. <fpage>184</fpage>&#x2013;<lpage>201</lpage>.</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Saleh</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Sheaves</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Jerry</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Azghadi</surname> <given-names>M. R.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Adaptive uncertainty distribution in deep learning for unsupervised underwater image enhancement</article-title>. <source>arXiv preprint arXiv:2212.08983</source>.</citation>
</ref>
<ref id="B30">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Song</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Tjondronegoro</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>A rapid scene depth estimation model based on underwater light attenuation prior for underwater image restoration</article-title>,&#x201d; in <conf-name>Advances in Multimedia Information Processing&#x2013;PCM 2018: 19th Pacific-Rim Conference on Multimedia</conf-name>. (<publisher-loc>Hefei, China</publisher-loc>: <publisher-name>Springer</publisher-name>), <conf-date>September 21&#x2013;22, 2018</conf-date>. <fpage>678</fpage>&#x2013;<lpage>688</lpage>.</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Kong</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Gong</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>a). <article-title>Underwater attentional generative adversarial networks for image enhancement</article-title>. <source>IEEE Trans. Human-Machine Syst</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/THMS.2023.3261341</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Karimi</surname> <given-names>H. R.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>b). <article-title>Deep learning-based visual detection of marine organisms: A survey</article-title>. <source>Neurocomputing</source> <volume>532</volume>, <fpage>1</fpage>&#x2013;<lpage>32</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.neucom.2023.02.018</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Yue</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Uiec&#x2c6; 2-net: Cnn-based underwater image enhancement using two color space</article-title>. <source>Signal Processing: Image Communication</source> <volume>96</volume>, <fpage>116250</fpage>.</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Gu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>B.</given-names>
</name>
<etal/>
</person-group>. (<year>2018</year>). <article-title>An imaging-inspired no-reference underwater color image quality assessment metric</article-title>. <source>Comput. Electrical Eng.</source> <volume>70</volume>, <fpage>904</fpage>&#x2013;<lpage>913</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compeleceng.2017.12.006</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>a). &#x201c;<article-title>Semantic-aware texture-structure feature collaboration for underwater image enhancement</article-title>,&#x201d; in <conf-name>2022 International Conference on Robotics and Automation (ICRA)</conf-name>. (<publisher-loc>New York, USA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>4592</fpage>&#x2013;<lpage>4598</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ICRA46639.2022.9812457</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Tian</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2022</year>b). <article-title>Underwater image super-resolution and enhancement via progressive frequency-interleaved network</article-title>. <source>J. Visual Communication Image Representation</source> <volume>86</volume>, <fpage>103545</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jvcir.2022.103545</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Luo</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>A two-stage underwater enhancement network based on structure decomposition and characteristics of underwater imaging</article-title>. <source>IEEE J. Oceanic Eng.</source> <volume>46</volume>, <fpage>1213</fpage>&#x2013;<lpage>1227</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JOE.2021.3064093</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiao</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Rahardja</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Usln: A statistically guided lightweight network for underwater image enhancement via dual-statistic white balance and multi-color space stretch</article-title>. <source>arXiv preprint arXiv:2209.02221</source>.</citation>
</ref>
<ref id="B39">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Xiao</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>He</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Ke</surname> <given-names>G.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). &#x201c;<article-title>Invertible image rescaling</article-title>,&#x201d; in <conf-name>Computer Vision&#x2013;ECCV 2020: 16th European Conference</conf-name> (<publisher-loc>Glasgow, UK</publisher-loc>: <publisher-name>IEEE</publisher-name>), <conf-date>August 23&#x2013;28, 2020</conf-date>. <fpage>126</fpage>&#x2013;<lpage>144</lpage>.</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Sowmya</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>An underwater color image quality evaluation metric</article-title>. <source>IEEE Trans. Image Process.</source> <volume>24</volume>, <fpage>6062</fpage>&#x2013;<lpage>6071</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TIP.2015.2491020</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yasarla</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Patel</surname> <given-names>V. M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Confidence measure guided single image de-raining</article-title>. <source>IEEE Trans. Image Process.</source> <volume>29</volume>, <fpage>4544</fpage>&#x2013;<lpage>4555</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TIP.83</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>An efficient method for the simulation of multireceiver sas raw signal</article-title>. <source>Multimedia Tools Appl.</source>, <fpage>1</fpage>&#x2013;<lpage>18</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11042-023-16992-5</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2022</year>a). <article-title>Retinex-inspired color correction and detail preserved fusion for underwater image enhancement</article-title>. <source>Comput. Electron. Agric.</source> <volume>192</volume>, <fpage>106585</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2021.106585</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2021</year>a). <article-title>Enhancing underwater image via color correction and bi-interval contrast enhancement</article-title>. <source>Signal Processing: Image Communication</source> <volume>90</volume>, <fpage>116030</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.image.2020.116030</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Ying</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2021</year>b). <article-title>Multireceiver sas imagery based on monostatic conversion</article-title>. <source>IEEE J. Selected Topics Appl. Earth Observations Remote Sens.</source> <volume>14</volume>, <fpage>10835</fpage>&#x2013;<lpage>10853</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JSTARS.2021.3121405</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Zhuang</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>H.-H.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Kwong</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2022</year>b). <article-title>Underwater image enhancement via minimal color loss and locally adaptive contrast enhancement</article-title>. <source>IEEE Trans. Image Process.</source> <volume>31</volume>, <fpage>3997</fpage>&#x2013;<lpage>4010</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TIP.2022.3177129</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A multi-scale feature modulation network for efficient underwater image enhancement</article-title>. <source>J. King Saud University-Computer Inf. Sci.</source> <volume>36</volume> (<issue>1</issue>), <fpage>101888</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jksuci.2023.101888</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhuang</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Porikli</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Underwater image enhancement with hyper-laplacian reflectance priors</article-title>. <source>IEEE Trans. Image Process.</source> <volume>31</volume>, <fpage>5442</fpage>&#x2013;<lpage>5455</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TIP.2022.3196546</pub-id>
</citation>
</ref>
</ref-list>
<app-group>
<title>Appendix</title>
<app id="app1_1">
<title>Power spectral density image</title>
<p>To analyze the influence of the image frequency domain distribution, we rely on the Fourier power spectrum with a one-dimensional representation of the features. First, we calculate the spectral representation from the DFT of a two-dimensional image of size M*N. The specific formula is (<xref ref-type="disp-formula" rid="eqA1">Equation A1</xref>):</p>
<disp-formula id="eqA1">
<label>(A1)</label>
<mml:math display="block" id="M19">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mo>|</mml:mo>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:munderover>
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:munderover>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#xb7;</mml:mo>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>&#x3c0;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:mfrac>
<mml:mo>+</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:mfrac>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:msup>
<mml:mo>|</mml:mo>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In this equation, <italic>P</italic>(<italic>u</italic>,<italic>v</italic>) represents the power spectrum of the image, while <italic>f</italic>(<italic>m</italic>,<italic>n</italic>) represents the pixel values of the original image in the spatial domain. The symbol &#x3a3; denotes summation, indicating that we sum over all values of <italic>m</italic> and <italic>n</italic>. The symbol <italic>i</italic> represents the imaginary unit, while <italic>u</italic> and <italic>v</italic> respectively denote the coordinates in the frequency domain, and <italic>m</italic> and <italic>n</italic> represent the coordinates in the spatial domain. <italic>N</italic> represents the size or dimensions of the image. Secondly, by integrating the azimuth angle at the radial frequency, the one-dimensional power spectral density of the image at the radial frequency <italic>&#x3b8;</italic> can be obtained. The specific formula is (<xref ref-type="disp-formula" rid="eqA2">Equation A2</xref>):</p>
<disp-formula id="eqA2">
<label>(A2)</label>
<mml:math display="block" id="M20">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:munderover>
<mml:mo>&#x222b;</mml:mo>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
<mml:mn>0</mml:mn>
</mml:munderover>
<mml:mo>|</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>|</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</app>
<app id="app1_2">
<title>Param, flops, memory, and running time comparisons</title>
<p>In order to fully detect the efficiency of our proposed method, we based on the number of parameters (#Pama), number of floating point operations per second (#Flops), GPU memory consumption (#GPU Mem.) and running time (#Avg.Time). Among them, #Flops is the calculation value of the underwater image whose input size is 256*256. #Avg.Time is the average time for testing 100 underwater images with a size of 620*460. The results are shown in <xref ref-type="table" rid="TA1"><bold>Table A1</bold></xref>. Our proposed method can basically maintain a smaller footprint in terms of parameter volume and running floating point numbers, but our proposed method can achieve the optimal image enhancement results, secondly, in terms of memory footprint and average test running time. It can also achieve more excellent results.</p>
<table-wrap id="TA1" position="float">
<label>Table A1</label>
<caption>
<p>Comparison of parameter counts, floating-point operation per second, memory, and runtime.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Methods</th>
<th valign="top" align="center">#Param [M]</th>
<th valign="top" align="center">#Flops [G]</th>
<th valign="top" align="center">#GPU Mem. [M]</th>
<th valign="top" align="center">#Avg.Time [ms]</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">CWR</td>
<td valign="top" align="center">11.4</td>
<td valign="top" align="center">642.5781</td>
<td valign="top" align="center">18,475</td>
<td valign="top" align="center">20.55</td>
</tr>
<tr>
<td valign="top" align="center">PUIE</td>
<td valign="top" align="center">1.4</td>
<td valign="top" align="center">234.4316</td>
<td valign="top" align="center">295</td>
<td valign="top" align="center">1.3</td>
</tr>
<tr>
<td valign="top" align="center">USLN</td>
<td valign="top" align="center">0.00085</td>
<td valign="top" align="center">0.5708</td>
<td valign="top" align="center">313.89</td>
<td valign="top" align="center">13.4</td>
</tr>
<tr>
<td valign="top" align="center">STSC</td>
<td valign="top" align="center">69.3</td>
<td valign="top" align="center">242.63</td>
<td valign="top" align="center">356.21</td>
<td valign="top" align="center">18.6</td>
</tr>
<tr>
<td valign="top" align="center">UDnet</td>
<td valign="top" align="center">16.1</td>
<td valign="top" align="center">286.12</td>
<td valign="top" align="center">596.92</td>
<td valign="top" align="center">22.7</td>
</tr>
<tr>
<td valign="top" align="center">URSCT</td>
<td valign="top" align="center">11.4</td>
<td valign="top" align="center">14.95</td>
<td valign="top" align="center">435.62</td>
<td valign="top" align="center">25.62</td>
</tr>
<tr>
<td valign="top" align="center">LFT-DGAN</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">132.14</td>
<td valign="top" align="center">234.52</td>
<td valign="top" align="center">15.23</td>
</tr>
</tbody>
</table>
</table-wrap>
</app>
</app-group>
</back>
</article>
