<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Energy Res.</journal-id>
<journal-title>Frontiers in Energy Research</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Energy Res.</abbrev-journal-title>
<issn pub-type="epub">2296-598X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1341103</article-id>
<article-id pub-id-type="doi">10.3389/fenrg.2024.1341103</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Energy Research</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Water level control of nuclear steam generators using intelligent hierarchical autonomous controller</article-title>
<alt-title alt-title-type="left-running-head">Peng et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fenrg.2024.1341103">10.3389/fenrg.2024.1341103</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Peng</surname>
<given-names>Binsen</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2425320/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ma</surname>
<given-names>Xintong</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xia</surname>
<given-names>Hong</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>China North Artificial Intelligence and Innovation Research Institute</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Collective Intelligence and Collaboration Laboratory</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Key Laboratory of Nuclear Safety and Advanced Nuclear Energy Technology</institution>, <institution>Ministry of Industry and Information Technology</institution>, <addr-line>Harbin</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Fundamental Science on Nuclear Safety and Simulation Technology Laboratory</institution>, <institution>Harbin Engineering University</institution>, <addr-line>Harbin</addr-line>, <country>China</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>China Nuclear Power Engineering Co, Ltd</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1555736/overview">Vivek Agarwal</ext-link>, Idaho National Laboratory (DOE), United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1869920/overview">Linyu Lin</ext-link>, Idaho National Laboratory (DOE), United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/78949/overview">Xiaojing Liu</ext-link>, Shanghai Jiao Tong University, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Binsen Peng, <email>binsenpeng@hrbeu.edu.cn</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>05</day>
<month>02</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>12</volume>
<elocation-id>1341103</elocation-id>
<history>
<date date-type="received">
<day>19</day>
<month>11</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>12</day>
<month>01</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Peng, Ma and Xia.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Peng, Ma and Xia</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>The challenge of water level control in steam generators, particularly at low power levels, has always been a critical aspect of nuclear power plant operation. To address this issue, this paper introduces an IHA controller. This controller employs a CPI controller as the primary controller for direct water level control, coupled with an agent-based controller optimized through a DRL algorithm. The agent dynamically optimizes the parameters of the CPI controller in real-time based on the system&#x2019;s state, resulting in improved control performance. Firstly, a new observer information is obtained to get the accurate state of the system, and a new reward function is constructed to evaluate the status of the system and guide the agent&#x2019;s learning process. Secondly, a deep ResNet with good generalization performance is used as the approximator of action value function and policy function. Then, the DDPG algorithm is used to train the agent-based controller, and an advanced controller with good performance is obtained after training. Finally, the popular UTSG model is used to verify the effectiveness of the algorithm. The results demonstrate that the proposed method achieves rise times of 73.9&#xa0;s, 13.6&#xa0;s, and 16.4&#xa0;s at low, medium, and high power levels, respectively. Particularly, at low power levels, the IHA controller can restore the water level to its normal state within 200&#xa0;s. These performances surpass those of the comparative methods, indicating that the proposed method excels not only in water level tracking but also in anti-interference capabilities. In essence, the IHA controller can autonomously learn the control strategy and reduce its reliance on the expert system, achieving true autonomous control and delivering excellent control performance.</p>
</abstract>
<kwd-group>
<kwd>U-tube steam generator</kwd>
<kwd>deep reinforcement learning</kwd>
<kwd>deep deterministic policy gradient</kwd>
<kwd>cascaded PI controller</kwd>
<kwd>water level control</kwd>
</kwd-group>
<contract-num rid="cn001">E2017023</contract-num>
<contract-sponsor id="cn001">Natural Science Foundation of Heilongjiang Province<named-content content-type="fundref-id">10.13039/501100005046</named-content>
</contract-sponsor>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Nuclear Energy</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>A typical natural circulation steam generator takes the form of a vertical, UTSG, as depicted in <xref ref-type="fig" rid="F1">Figure 1</xref>. This configuration serves as a critical component within the primary coolant system of a nuclear reactor. Its primary purpose is to function as a heat exchanger, facilitating the transfer of heat extracted from the reactor&#x2019;s primary coolant to a secondary fluid via a bundle of heat transfer tubes (<xref ref-type="bibr" rid="B24">Sui et al., 2020</xref>). This heat exchange process generates saturated steam, which is subsequently conveyed to a steam turbine for electricity generation. Moreover, the steam generator assumes a pivotal role in linking the primary and secondary coolant loops and acts as a safety barrier to prevent the release of radioactive materials. To ensure the safe operation of the UTSG, it is imperative to maintain the water level within a defined range. If the water level becomes excessively low, it can lead to damage to the heat transfer tubes. Conversely, an excessively high water level can impact the steam-water separation process, resulting in a decline in steam quality and potential damage to the steam turbine (<xref ref-type="bibr" rid="B13">Kong et al., 2022</xref>). Therefore, any abnormal water level conditions in the UTSG necessitate a shutdown, which can have adverse consequences on the economic and safety aspects of PWRs.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Steam generator structure diagram.</p>
</caption>
<graphic xlink:href="fenrg-12-1341103-g001.tif"/>
</fig>
<p>UTSG has &#x201c;shrink and swell&#x201d; effects during operation, making it a complex system with non-linear and non-minimum phases, and has a small stability margin, which brings many difficulties to the controller design. In order to solve the UTSG water level control problem, the researchers have done a lot of valuable work in this area. Wan et al. (<xref ref-type="bibr" rid="B29">Wan et al., 2017</xref>), Rao et al. (<xref ref-type="bibr" rid="B19">Rao et al., 2024</xref>), Safarzadeh et al. (<xref ref-type="bibr" rid="B21">Safarzadeh et al., 2011</xref>) and Irving et al. (<xref ref-type="bibr" rid="B10">Irving et al., 1980</xref>) respectively proposed UTSG mathematical models that can accurately reflect the characteristics of the water level. Among them, the model proposed by Irving covers a variety of power level conditions, so it is widely used in control algorithm research, and this model was also used for our control algorithm research. The CPI controller can mitigate the influence of &#x201c;shrink and swell&#x201d; effects to some extent. By utilizing the actual measured water level signal, it undergoes a first-order inertia stage, causing transient signals during water level expansion to be delayed. This delay allows the deviation signal between steam flow and feedwater flow to increase the feedwater amount, thus achieving the correct action. On the other hand, it takes advantage of the characteristic that the flow error output by the water level control unit and the trend of steam flow change in the opposite direction. This characteristic is employed to eliminate the impact of &#x201c;shrink and swell&#x201d; effects. Consequently, the CPI controller remains widely employed in the water level control system of UTSG.</p>
<p>In order to achieve robust stability and optimal dynamic performance, it is necessary to tune the parameters of the PID controller. Online self-tuning methods for PID control parameters possess the capabilities of self-learning, adaptability, and self-organization. They can dynamically adjust the PID model parameters online, adapting to the continuous changes in the object model parameters. So far, researchers have conducted a substantial amount of intriguing studies in this area. The Expert PID control method combines control experience patterns from an expert knowledge base, deriving the parameters of the PID controller through logical reasoning mechanisms. However, it heavily relies on the expert&#x2019;s experience, and the proficiency of the expert determines the effectiveness of the controller (<xref ref-type="bibr" rid="B9">Hu and Liu, 2020</xref>; <xref ref-type="bibr" rid="B31">Xu and Li, 2020</xref>). The Fuzzy PID control method condenses empirical knowledge into a fuzzy rule model, achieving self-tuning of PID parameters through fuzzy reasoning. It similarly depends on human experience, with the configuration of membership functions for process variables having a significant impact on the system (<xref ref-type="bibr" rid="B14">Li et al., 2017</xref>; <xref ref-type="bibr" rid="B17">Maghfiroh et al., 2022</xref>; <xref ref-type="bibr" rid="B34">Zhu et al., 2022</xref>). The Neural Network PID control method utilizes the nonlinear approximation capability of neural networks, dynamically adjusting PID parameters based on the system&#x2019;s input and output data to optimize control performance. However, it faces challenges such as acquiring training data and susceptibility to local optima (<xref ref-type="bibr" rid="B20">Rodriguez-Abreo et al., 2021</xref>; <xref ref-type="bibr" rid="B32">Zhang et al., 2022</xref>). The Genetic PID control method simulates the process of natural selection and genetic mechanisms to optimize controller parameters for improved control performance. It does not require complete information about the controlled object, but it has drawbacks like high computational demands and slow convergence speed (<xref ref-type="bibr" rid="B33">Zhou et al., 2019</xref>; <xref ref-type="bibr" rid="B1">Ahmmed et al., 2020</xref>).</p>
<p>To overcome the limitations of the aforementioned optimization algorithms, we explore the application of DRL algorithm, specifically DDPG, in the water level control of the UTSG. DDPG empowers agents with the capability for self-supervised learning, enabling them to interact autonomously with the environment, make continuous progress through trial and error, and collect training samples stored in an experience replay buffer. This helps reduce the correlation among samples and enhances training stability, all while decreasing the reliance on expert knowledge (<xref ref-type="bibr" rid="B30">Wang and Hong, 2020</xref>). DDPG employs an Actor-Critic structure, where the Actor network is responsible for policy generation, and the Critic network estimates state values or state-action values. These two networks collaborate in learning to improve performance. DDPG offers higher sample efficiency, implying that it can learn good policies in relatively few training steps without requiring extensive computational resources.</p>
<p>To achieve real-time optimization of PI controller parameters and reduce the difficulty of controller design, an IHA controller is proposed. The proposed controller uses the CPI controller as a primary controller and introduces DRL to build an advanced agent-based controller with autonomous control capabilities, which can continuously improve the CPI control strategy according to the state of the environment. The main contributions and innovation of this paper are as follows:<list list-type="simple">
<list-item>
<p>(1) A new reward function is proposed to improve the training effect of the model.</p>
</list-item>
<list-item>
<p>(2) The DDPG algorithm is used to optimize the agent-based controller, which can learn the control strategy independently.</p>
</list-item>
<list-item>
<p>(3) The deep ResNet is used as approximators of action-value and action functions to obtain better generalization performance.</p>
</list-item>
<list-item>
<p>(4) The UTSG water level model is used to verify the effectiveness of the proposed method.</p>
</list-item>
</list>
</p>
<p>The remainder of this paper is organized as follows. In <xref ref-type="sec" rid="s2">Section 2</xref> we present methods. We then present in detail the UTSG model and controller structure in <xref ref-type="sec" rid="s3">Section 3</xref>. The experimental test case results and discussions are provided in <xref ref-type="sec" rid="s4">Section 4</xref>. Finally, <xref ref-type="sec" rid="s5">Section 5</xref> concludes the paper.</p>
</sec>
<sec sec-type="methods" id="s2">
<title>2 Methods</title>
<sec id="s2-1">
<title>2.1 Reinforcement learning</title>
<p>RL is an important branch of machine learning (<xref ref-type="bibr" rid="B6">Carapu&#xe7;o et al., 2018</xref>), but unlike supervised learning and unsupervised learning, it is an active learning process, which does not require specific training data, and agents need to obtain samples in the process of continuous interaction with the environment. As shown in <xref ref-type="fig" rid="F2">Figure 2</xref>, by taking the goal of maximizing the cumulative reward, RL continuously optimizes the strategy based on the state, action, reward and other information, and finally finds the optimal state-action sequence during the training process. The process is very similar to that of human learning, in which strategies are continually improved through interaction and trial and error with the environment.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>The work procession of RL.</p>
</caption>
<graphic xlink:href="fenrg-12-1341103-g002.tif"/>
</fig>
<p>The interactive process can be expressed by Markov decision processes (<xref ref-type="bibr" rid="B5">Bi et al., 2019</xref>). Suppose the environment is completely observable, the state space of the environment is represented by <bold>
<italic>S</italic>
</bold>, and the action space is represented by <bold>
<italic>A</italic>
</bold>; the behavior of the agent is defined by policy &#x3c0;, which defines a probability distribution <italic>p</italic>(<bold>
<italic>A</italic>
</bold>) to represent the relationship between state and action. At time <italic>t</italic>, let <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="bold-italic">S</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> be the state of the environment, and <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> (<inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="bold">A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>) be the action taken by the agent according to the state <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and then at time <italic>t</italic>&#x2b;1, the state transitions to <inline-formula id="inf5">
<mml:math id="m5">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. In this process, the instant reward received by the agent can be expressed as <inline-formula id="inf6">
<mml:math id="m6">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. For the entire process <inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>3</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, the historical state information can be represented by state-action pairs <inline-formula id="inf8">
<mml:math id="m8">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. The sum of discounted future reward returned <inline-formula id="inf9">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> by the agent after performing the action <inline-formula id="inf10">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is defined as<disp-formula id="e1">
<mml:math id="m11">
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b3;</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<p>Where <inline-formula id="inf11">
<mml:math id="m12">
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the discounting factor, and <inline-formula id="inf12">
<mml:math id="m13">
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. It can be found that <inline-formula id="inf13">
<mml:math id="m14">
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> has a great relationship with the action selected by the policy, and RL is to learn the optimal policy to maximize the expected return from the start distribution <inline-formula id="inf14">
<mml:math id="m15">
<mml:mrow>
<mml:mi>J</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>E</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. We represent the discounted state access distribution of policy &#x3c0; as <inline-formula id="inf15">
<mml:math id="m16">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3c1;</mml:mi>
<mml:mi>&#x3c0;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>In order to describe the expected return of the model under the state <inline-formula id="inf16">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, the action <inline-formula id="inf17">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> taken by the agent, and under the guidance of the policy &#x3c0;, an action-value function <inline-formula id="inf18">
<mml:math id="m19">
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mi>&#x3c0;</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is used to express it (<xref ref-type="bibr" rid="B25">Sutton et al., 2000</xref>), which is defined as<disp-formula id="e2">
<mml:math id="m20">
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mi>&#x3c0;</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2265;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2265;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>E</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2265;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>The above formula can be converted into a recursive form through the Bellman equation as:<disp-formula id="e3">
<mml:math id="m21">
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mi>&#x3c0;</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
<mml:msub>
<mml:mi mathvariant="double-struck">E</mml:mi>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mi>&#x3c0;</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
</sec>
<sec id="s2-2">
<title>2.2 Deep deterministic policy gradient</title>
<p>DDPG is a model-free DRL method based on the critic-actor framework and deterministic policy gradient algorithm (<xref ref-type="bibr" rid="B15">Lillic et al., 2016</xref>; <xref ref-type="bibr" rid="B27">Thomas and Brunskill, 2017</xref>). In the processing of high-dimensional state space and action space, DDPG uses deep neural networks (<xref ref-type="bibr" rid="B23">Sen Peng et al., 2018</xref>) as the approximator of action function and action-value function, which also brings a problem. The training process of the neural network needs to assume that the samples follow an independent distribution, but the samples obtained in chronological order obviously do not meet this requirement. To solve this problem, DDPG draws on the experience replay mechanism in deep Q-network (<xref ref-type="bibr" rid="B18">Mnih et al., 2015</xref>) and the minibatch training method in deep neural networks to ensure the stability of the training process of large-scale nonlinear networks.</p>
<p>To avoid the inner expectation of the deterministic policy, the deterministic policy function <inline-formula id="inf19">
<mml:math id="m22">
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="bold-italic">S</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi mathvariant="bold-italic">A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is used to describe the action-value function:<disp-formula id="e4">
<mml:math id="m23">
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
<sec id="s2-2-1">
<title>2.2.1 Experience replay mechanism</title>
<p>In continuous control tasks, samples are usually collected in chronological order, and the data are highly correlated, so the variance between samples is small, which is obviously not conducive to the training of agents. Experience replay is used to solve this problem (<xref ref-type="bibr" rid="B18">Mnih et al., 2015</xref>), and a fixed-size replay buffer is created to cache the collected data. The data collected during each task execution process will be stored in the replay buffer in tuple <inline-formula id="inf20">
<mml:math id="m24">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>. During each training, a minibatch of samples are randomly selected from the replay buffer, which can reduce the correlation between the data and improve training efficiency.</p>
</sec>
<sec id="s2-2-2">
<title>2.2.2 Policy exploration</title>
<p>Policy exploration is a very important part in RL, which is used to explore unknown policies. If the explored policies are superior to the current policies, they can play an evolutionary role for the policies. In order to solve the exploration problem in continuous control tasks, the exploration policy <inline-formula id="inf21">
<mml:math id="m25">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is constructed by adding noise to the policy <inline-formula id="inf22">
<mml:math id="m26">
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>:<disp-formula id="e5">
<mml:math id="m27">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:msubsup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
</p>
<p>Where <inline-formula id="inf23">
<mml:math id="m28">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the actor noise. Considering that the plant used in this paper is an inertial system, Ornstein-Uhlenbeck process (<xref ref-type="bibr" rid="B28">Uhlenbeck and Ornstein, 1930</xref>) is used to generate time-related noise sequences to improve the exploration efficiency of control tasks in the inertial system. Ornstein-Uhlenbeck process is a random process, and its discrete form is<disp-formula id="e6">
<mml:math id="m29">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>&#x3f5;</mml:mi>
<mml:msqrt>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:msqrt>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<p>Where <inline-formula id="inf24">
<mml:math id="m30">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf25">
<mml:math id="m31">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> are the mean and variance of the noise model, <inline-formula id="inf26">
<mml:math id="m32">
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a random number, <inline-formula id="inf27">
<mml:math id="m33">
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a constant, which determines the speed at which the noise model output approaches the mean, and <inline-formula id="inf28">
<mml:math id="m34">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the sampling time.</p>
</sec>
<sec id="s2-2-3">
<title>2.2.3 Function approximators</title>
<p>In order for the agent to learn a better control strategy, it is very vital to select an appropriate function approximators. Considering that the deep neural network has strong adaptability, it can approximate any function in a nonlinear form, so it is also the most used function approximator. We use deep ResNet (<xref ref-type="bibr" rid="B8">He et al., 2016</xref>) as a function approximator, which is constructed with residual structure, shown in <xref ref-type="fig" rid="F3">Figure 3A</xref>. The critic network (<xref ref-type="fig" rid="F3">Figure 3B</xref>) and action network (<xref ref-type="fig" rid="F3">Figure 3C</xref>) are constructed for the value function and action function, respectively. The activation function of the hidden layer of the network approximator is the linear rectification function and the activation function of the output layer is the tanh function.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Network structure. <bold>(A)</bold> ResNet structure; <bold>(B)</bold> Critic network structure; <bold>(C)</bold> Actor network structure.</p>
</caption>
<graphic xlink:href="fenrg-12-1341103-g003.tif"/>
</fig>
</sec>
<sec id="s2-2-4">
<title>2.2.4 Training process</title>
<p>In this paper, the critic network, actor network, target critic network and target actor network are defined as <inline-formula id="inf29">
<mml:math id="m35">
<mml:mrow>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf30">
<mml:math id="m36">
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="" close="|" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf31">
<mml:math id="m37">
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf32">
<mml:math id="m38">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="" close="|" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> respectively, where <inline-formula id="inf33">
<mml:math id="m39">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf34">
<mml:math id="m40">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf35">
<mml:math id="m41">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf36">
<mml:math id="m42">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> are the parameters of each approximator. The main network and the target network have the same network structure, which is defined in <xref ref-type="sec" rid="s2-2-3">Section 2.2.3</xref>.</p>
<p>The pseudocode of the DDPG algorithm is shown in <xref ref-type="table" rid="T1">Table 1</xref>. During the training process, the network needs to be updated at every timestep. To ensure a stable training process, the network is trained using a minibatch training method. Suppose that each time <italic>N</italic> samples are taken from the replay buffer to form the training set <inline-formula id="inf37">
<mml:math id="m43">
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. During the training process, the critic network is optimized by minimizing the loss function:<disp-formula id="e7">
<mml:math id="m44">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>where<disp-formula id="e8">
<mml:math id="m45">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>DDPG algorithm.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Randomly initialize critic network <inline-formula id="inf38">
<mml:math id="m46">
<mml:mrow>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and actor <inline-formula id="inf39">
<mml:math id="m47">
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="" close="|" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> with weight <inline-formula id="inf40">
<mml:math id="m48">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf41">
<mml:math id="m49">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Initialize target network <inline-formula id="inf42">
<mml:math id="m50">
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf43">
<mml:math id="m51">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> with weight <inline-formula id="inf44">
<mml:math id="m52">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
<mml:mo>&#x2190;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf45">
<mml:math id="m53">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
<mml:mo>&#x2190;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">Create and initialize replay buffer <italic>R</italic> with size <italic>L</italic>
</td>
</tr>
<tr>
<td align="left">For episode &#x3d; 1, M do</td>
</tr>
<tr>
<td align="left">&#x2003;Initialize the policy exploration model</td>
</tr>
<tr>
<td align="left">&#x2003;Initialize and store observation state <inline-formula id="inf46">
<mml:math id="m54">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">&#x2003;For t &#x3d; 1: T do</td>
</tr>
<tr>
<td align="left">&#x2003;Select action <inline-formula id="inf47">
<mml:math id="m55">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> with <xref ref-type="disp-formula" rid="e5">formula 5</xref>
</td>
</tr>
<tr>
<td align="left">&#x2003;&#x2003;Execute action <inline-formula id="inf48">
<mml:math id="m56">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and observe reward <inline-formula id="inf49">
<mml:math id="m57">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and observe new state <inline-formula id="inf50">
<mml:math id="m58">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">&#x2003;&#x2003;Store transition <inline-formula id="inf51">
<mml:math id="m59">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> in <italic>R</italic>
</td>
</tr>
<tr>
<td align="left">&#x2003;&#x2003;Calculate loss function and update critic network with <xref ref-type="disp-formula" rid="e7">formula 7</xref>
</td>
</tr>
<tr>
<td align="left">&#x2003;&#x2003;Calculate policy gradient with <xref ref-type="disp-formula" rid="e9">formula 9</xref>
</td>
</tr>
<tr>
<td align="left">&#x2003;&#x2003;Update actor network with <xref ref-type="disp-formula" rid="e10">formula 10</xref>
</td>
</tr>
<tr>
<td align="left">&#x2003;&#x2003;Update target network <inline-formula id="inf52">
<mml:math id="m60">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf53">
<mml:math id="m61">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> with formula 11</td>
</tr>
<tr>
<td align="left">&#x2003;End for</td>
</tr>
<tr>
<td align="left">End for</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The start distribution <inline-formula id="inf54">
<mml:math id="m62">
<mml:mrow>
<mml:mi>J</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> with respect to the actor parameters can be obtained by the following formula:<disp-formula id="e9">
<mml:math id="m63">
<mml:mrow>
<mml:msub>
<mml:mo>&#x2207;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:msub>
<mml:mi>J</mml:mi>
<mml:mo>&#x2248;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mstyle>
<mml:msub>
<mml:mo>&#x2207;</mml:mo>
<mml:mi>a</mml:mi>
</mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="" close="|" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mo>&#x2207;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="" close="|" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>
</p>
<p>Then use gradient <inline-formula id="inf55">
<mml:math id="m64">
<mml:mrow>
<mml:msub>
<mml:mo>&#x2207;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:msub>
<mml:mi>J</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to update the action network:<disp-formula id="e10">
<mml:math id="m65">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mi>&#x3b1;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
<mml:msub>
<mml:mo>&#x2207;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:msub>
<mml:mi>J</mml:mi>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>
</p>
<p>Finally, use the soft update method to update the parameters of the target network <inline-formula id="inf56">
<mml:math id="m66">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="normal">Q</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="" close="|" separators="|">
<mml:mrow>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">a</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msup>
<mml:mi mathvariant="normal">&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf57">
<mml:math id="m67">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="normal">&#xb5;</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="" close="|" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>:<disp-formula id="e11a">
<mml:math id="m68">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b7;</mml:mi>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b7;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(11a)</label>
</disp-formula>
<disp-formula id="e11b">
<mml:math id="m69">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b7;</mml:mi>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b7;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(11b)</label>
</disp-formula>where <inline-formula id="inf58">
<mml:math id="m70">
<mml:mrow>
<mml:mi>&#x3b7;</mml:mi>
<mml:mo>&#x226a;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, which makes the update speed of the target network very slow, thereby greatly improving the robustness of the learning process.</p>
</sec>
</sec>
</sec>
<sec id="s3">
<title>3 UTSG control model</title>
<sec id="s3-1">
<title>3.1 Mathematical model</title>
<p>An adept UTSG model is crucial for the design and testing of control algorithms. Typically, thermal-hydraulic models based on conservation principles of mass, energy, and momentum are employed to precisely simulate the operational characteristics of steam generators. However, such models often exhibit intricate non-linear features, posing challenges in controller design. In practice, a UTSG model that is relatively straightforward yet accurate, faithfully capturing dynamic traits, is preferred. The linear model proposed by Irving (<xref ref-type="bibr" rid="B10">Irving et al., 1980</xref>), derived through a fusion of experimental and theoretical approaches, has undergone rigorous validation across multiple power levels, affirming its precision in replicating operational characteristics. Consequently, it has found extensive application in the realm of control algorithm research. This model establishes a transfer function model related to feed water flow <inline-formula id="inf59">
<mml:math id="m71">
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>e</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, steam flow <inline-formula id="inf60">
<mml:math id="m72">
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>v</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and narrow-range water level <inline-formula id="inf61">
<mml:math id="m73">
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>:<disp-formula id="e12">
<mml:math id="m74">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>e</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>v</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>e</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>v</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>2</mml:mn>
<mml:msubsup>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>4</mml:mn>
<mml:msup>
<mml:mi>&#x3c0;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:msup>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>e</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>
</p>
<p>Where <inline-formula id="inf62">
<mml:math id="m75">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is Laplace variable, <inline-formula id="inf63">
<mml:math id="m76">
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf64">
<mml:math id="m77">
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf65">
<mml:math id="m78">
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are constant, <inline-formula id="inf66">
<mml:math id="m79">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the delay time of the shrink and swell phenomenon, <inline-formula id="inf67">
<mml:math id="m80">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the delay time of the mechanical oscillation, and <inline-formula id="inf68">
<mml:math id="m81">
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the period of the mechanical oscillation. The first term <inline-formula id="inf69">
<mml:math id="m82">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>e</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>v</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> calculates the change in water level by summing the flow in and out, which represents the wide-ranging effect of UTSG. The second term <inline-formula id="inf70">
<mml:math id="m83">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>e</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>v</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is used to describe the inverse kinetic phenomena caused by shrink and swell effects. The third term <inline-formula id="inf71">
<mml:math id="m84">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>2</mml:mn>
<mml:msubsup>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>4</mml:mn>
<mml:msup>
<mml:mi>&#x3c0;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:msup>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>e</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the effect of water level oscillations generated by the feed water in the annular descending channel. The values of the power-related parameters of this model at 5 typical power levels are given in <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>The UTSG model parameters in different power level.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">
<italic>p</italic>/%</th>
<th align="center">
<inline-formula id="inf72">
<mml:math id="m85">
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">
<inline-formula id="inf73">
<mml:math id="m86">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">
<inline-formula id="inf74">
<mml:math id="m87">
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">
<inline-formula id="inf75">
<mml:math id="m88">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">
<inline-formula id="inf76">
<mml:math id="m89">
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">
<inline-formula id="inf77">
<mml:math id="m90">
<mml:mrow>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:mi>v</mml:mi>
</mml:msub>
<mml:mo>/</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mtext>kg</mml:mtext>
<mml:mo>/</mml:mo>
<mml:mi mathvariant="normal">s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">
<inline-formula id="inf78">
<mml:math id="m91">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">5</td>
<td align="center">0.058</td>
<td align="center">41.900</td>
<td align="center">9.630</td>
<td align="center">48.400</td>
<td align="center">0.181</td>
<td align="center">57.400</td>
<td align="center">119.600</td>
</tr>
<tr>
<td align="center">15</td>
<td align="center">0.058</td>
<td align="center">26.300</td>
<td align="center">4.460</td>
<td align="center">21.500</td>
<td align="center">0.226</td>
<td align="center">180.800</td>
<td align="center">60.500</td>
</tr>
<tr>
<td align="center">30</td>
<td align="center">0.058</td>
<td align="center">43.400</td>
<td align="center">1.830</td>
<td align="center">4.500</td>
<td align="center">0.310</td>
<td align="center">381.700</td>
<td align="center">17.700</td>
</tr>
<tr>
<td align="center">50</td>
<td align="center">0.058</td>
<td align="center">34.800</td>
<td align="center">1.050</td>
<td align="center">3.600</td>
<td align="center">0.215</td>
<td align="center">660.000</td>
<td align="center">14.200</td>
</tr>
<tr>
<td align="center">100</td>
<td align="center">0.058</td>
<td align="center">28.600</td>
<td align="center">0.470</td>
<td align="center">3.400</td>
<td align="center">0.105</td>
<td align="center">1,435.000</td>
<td align="center">11.700</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3-2">
<title>3.2 Model dynamic characteristics analysis</title>
<p>In order to understand the dynamic characteristics of the model, this section will briefly analyze the response characteristics of the model when the feed water flow and steam flow step &#x2b;1&#xa0;kg/s respectively in conjunction with the shrink and swell phenomenon.</p>
<p>When the feedwater flow rate experiences a step increase of &#x2b;1&#xa0;kg/s, the corresponding dynamic response of the UTSG water level is illustrated in <xref ref-type="fig" rid="F4">Figure 4A</xref>. It becomes evident that the initial surge in feedwater flow prompts a surge in water level, irrespective of the power levels. Subsequently, as the feedwater temperature falls below the saturation temperature, leading to an augmentation in subcooling within the bundle and consequent steam condensation, the water level descends. Given that the feedwater flow surpasses the steam flow, the water level sustains an upward trajectory, a phenomenon colloquially referred to as the &#x2018;shrink effect&#x2019;.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Water level response. <bold>(A)</bold> When the feed water flow steps 1&#x00a0;kg/s; <bold>(B)</bold> When the steam flow steps 1&#x00a0;kg/s.</p>
</caption>
<graphic xlink:href="fenrg-12-1341103-g004.tif"/>
</fig>
<p>When a step increase of &#x2b;1&#xa0;kg/s in steam flow is applied, the associated dynamic response of the UTSG water level is depicted in <xref ref-type="fig" rid="F4">Figure 4B</xref>. It becomes evident that, across varying power levels, as steam flow escalates, the pressure within the steam dome diminishes, leading to a reduction in the saturation temperature of the water and an augmentation in boiling within the bundle area. Consequently, the water level initially experiences an ascent. As the steam flow surpasses that of the feedwater, a sustained decline in the water level ensues, a phenomenon commonly referred to as the &#x2018;swell effect&#x2019;.</p>
<p>Simultaneously, it is noteworthy that, for distinct power levels, identical disturbances in feedwater flow or steam flow yield varying degrees of both the shrink and swell effects. Additionally, it is observed that the transition time during low-power operations exceeds that observed during high-power conditions. This phenomenon underscores the inherent challenges in regulating water levels within the low-power range.</p>
</sec>
<sec id="s3-3">
<title>3.3 Cascaded PI controller</title>
<p>Compared with single control loop, the cascaded control is more controllable and safer, and has better robustness (<xref ref-type="bibr" rid="B11">Jia et al., 2020</xref>). Therefore, CPI controller is adopted as the basic controller in this paper. The working process of CPI controller is shown in <xref ref-type="fig" rid="F8">Figure 8</xref>. In the outer loop control, the difference between the expected water level and the model output water level is used as the input of the controller. Its function is mainly used to control the water level to track the change of the expected value. In the inner loop control, the sum of the output of the outer loop controller and the steam flow rate minus the feed water flow is used as the input of the controller, which is mainly used to suppress the steam flow disturbance.</p>
<p>The working principle of PI controller is expressed as:<disp-formula id="e13">
<mml:math id="m92">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mi>i</mml:mi>
<mml:msubsup>
<mml:mo>&#x222b;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mi>t</mml:mi>
</mml:msubsup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>
</p>
<p>Where <inline-formula id="inf79">
<mml:math id="m93">
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the error, <inline-formula id="inf80">
<mml:math id="m94">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the proportional coefficient, and <inline-formula id="inf81">
<mml:math id="m95">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the integral coefficient; in this paper, the proportional and integral coefficients of the outer loop controller are defined as <inline-formula id="inf82">
<mml:math id="m96">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf83">
<mml:math id="m97">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>i</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, and the corresponding parameters of the inner loop controller are defined as <inline-formula id="inf84">
<mml:math id="m98">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf85">
<mml:math id="m99">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>i</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</sec>
<sec id="s3-4">
<title>3.4 Controller design</title>
<p>The IHA controller proposed in this paper uses a double level controller structure, shown in <xref ref-type="fig" rid="F5">Figure 5</xref>. The CPI controller is used as primary controller, which is responsible for directly controlling the water level of the UTSG model; the advanced controller uses an agent-based controller with intelligent characteristics, which is responsible for online adjustment of the parameters of the CPI controller. In control process, the primary controller and the advanced controller work together to adjust the control policy in real time according to the state of the system and realize intelligent autonomous control.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>UTSG water level control process.</p>
</caption>
<graphic xlink:href="fenrg-12-1341103-g005.tif"/>
</fig>
</sec>
<sec id="s3-5">
<title>3.5 Observer information</title>
<p>The accurate observer information should be provided to represent the dynamic characteristics of the controlled object. In the controller system, the error and the reciprocal of error are often used to indicate the state of the system, which is more suitable for single-target control. However, the UTSG needs to change between different water levels, and various system states need to be considered using the above state expression. To improve this phenomenon, the relative error and reciprocal of relative error are used to represent the state of system. In this way, different target control can be achieved by designing only one state representation, which greatly simplifies the complexity. At the same time, this paper draws on the ideas of (<xref ref-type="bibr" rid="B18">Mnih et al., 2015</xref>). In continuous control tasks, the continuous-time environmental state is related, and the observed variable for a period is embraced as the environmental state representation, which can more accurately reflect the state of the system.</p>
<p>In this paper, the values of relative error <inline-formula id="inf86">
<mml:math id="m100">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and reciprocal <inline-formula id="inf87">
<mml:math id="m101">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula> in consecutive 3s are used as observer information to obtain the observer vector <inline-formula id="inf88">
<mml:math id="m102">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> of the error term and the observation vector <inline-formula id="inf89">
<mml:math id="m103">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> of the reciprocal term, which are defined as follows:<disp-formula id="e14a">
<mml:math id="m104">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mi mathvariant="normal">T</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mi mathvariant="normal">T</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mi mathvariant="normal">T</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(14a)</label>
</disp-formula>
<disp-formula id="e14b">
<mml:math id="m105">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mi mathvariant="normal">T</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mi mathvariant="normal">T</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mi mathvariant="normal">T</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(14b)</label>
</disp-formula>
<disp-formula id="e14c">
<mml:math id="m106">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(14c)</label>
</disp-formula>where <inline-formula id="inf90">
<mml:math id="m107">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the target value, <inline-formula id="inf91">
<mml:math id="m108">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf92">
<mml:math id="m109">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are normalization coefficients, which is used to transform the value to the interval [0,1], to promote the training efficiency of the neural network. At time <inline-formula id="inf93">
<mml:math id="m110">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf94">
<mml:math id="m111">
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. Finally, the observation vectors <inline-formula id="inf95">
<mml:math id="m112">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf96">
<mml:math id="m113">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> are combined to obtain a comprehensive observation matrix <inline-formula id="inf97">
<mml:math id="m114">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> with a dimension of 3 &#xd7; 2:<disp-formula id="e15">
<mml:math id="m115">
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>
</p>
<p>Therefore, the dimension of observer information is determined to be 3 &#xd7; 2, and the dimension of action information is determined to be 4 &#xd7; 1. The network structure of action network and critic network is further confirmed, as shown in <xref ref-type="table" rid="T3">Table 3</xref>.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>The number of neurons in different layers.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Layer name</th>
<th align="center">Neuron dimension</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Observation input layer</td>
<td align="center">
<inline-formula id="inf98">
<mml:math id="m116">
<mml:mrow>
<mml:mn>3</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">Action input layer</td>
<td align="center">
<inline-formula id="inf99">
<mml:math id="m117">
<mml:mrow>
<mml:mn>4</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">Fully connected layer</td>
<td align="center">
<inline-formula id="inf100">
<mml:math id="m118">
<mml:mrow>
<mml:mn>50</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">Rescale layer</td>
<td align="center">
<inline-formula id="inf101">
<mml:math id="m119">
<mml:mrow>
<mml:mn>4</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">Critic output layer</td>
<td align="center">
<inline-formula id="inf102">
<mml:math id="m120">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="center">Action output layer</td>
<td align="center">
<inline-formula id="inf103">
<mml:math id="m121">
<mml:mrow>
<mml:mn>4</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3-6">
<title>3.6 Reward function</title>
<p>The reward function can also be called the evaluation function. A good reward function not only speeds up the learning process, but also makes it easier to find the global optimal solution. The commonly used evaluation functions are ITAE, ITSE and integral of squared time weighted errors. However, these functions are suitable for evaluating the entire control process. In the RL process, the control effect of each step needs to be evaluated, and it has a strong guiding effect on the learning process. Therefore, a new evaluation function is needed to evaluate the learning process.</p>
<p>In fact, in the control process, when the water level error is large, the PI controller needs a large gain to obtain a large response speed, and when the error is small, the value of the gain needs to be reduced to avoid overshoot. Therefore, this paper constructs a segmented evaluation function <inline-formula id="inf104">
<mml:math id="m122">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, which can guide the learning process by adjusting some parameters. Experiments show that this function can quickly and effectively guide the agent&#x2019;s learning, which will be introduced below.</p>
<p>According to the difference in absolute value of relative error <inline-formula id="inf105">
<mml:math id="m123">
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>, we specifies that <inline-formula id="inf106">
<mml:math id="m124">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>200</mml:mn>
</mml:mrow>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is the abnormal area, <inline-formula id="inf107">
<mml:math id="m125">
<mml:mrow>
<mml:mrow>
<mml:mn>200</mml:mn>
<mml:mo>%</mml:mo>
<mml:mo>&#x2265;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>15</mml:mn>
</mml:mrow>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is the large error area, and <inline-formula id="inf108">
<mml:math id="m126">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2264;</mml:mo>
<mml:mn>15</mml:mn>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is the low error area.</p>
<p>Within the abnormal region, where the system strays significantly from the target value, a proactive approach is adopted. The ongoing task is promptly terminated, and a fresh training process is initiated to conserve valuable training time. Simultaneously, a correspondingly modest reward value is prescribed to guide the agent away from this undesirable state. In contrast, the expansive error zone warrants a heightened emphasis on speed of response, with the overarching goal being swifter rectification without excessive deliberation. Consequently, when the relative error resides within this territory, the reward value is uniformly designated as &#x2212;2. Within the realm of low error, where the system&#x2019;s output closely approximates the desired value but is susceptible to overshooting and necessitates prolonged adjustment, the formulation of the reward function assumes paramount significance.</p>
<p>Considering the relative error <inline-formula id="inf109">
<mml:math id="m127">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and the reciprocal <inline-formula id="inf110">
<mml:math id="m128">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula> can accurately represent the state of the system; Therefore, we set the reward function as a function related to them. At the same time, the power function is introduced to further optimize the reward function. As shown in <xref ref-type="fig" rid="F6">Figure 6</xref>, we plotted the power function <inline-formula id="inf111">
<mml:math id="m129">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mi mathvariant="normal">&#x3b1;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> in the interval <inline-formula id="inf112">
<mml:math id="m130">
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.15</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>, from which we can see that when <inline-formula id="inf113">
<mml:math id="m131">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf114">
<mml:math id="m132">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is not sensitive to the change of <inline-formula id="inf115">
<mml:math id="m133">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. When <inline-formula id="inf116">
<mml:math id="m134">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, y is more sensitive to the change of <inline-formula id="inf117">
<mml:math id="m135">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and the smaller <inline-formula id="inf118">
<mml:math id="m136">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is, the more sensitive it is, so it can play a magnifying effect on the local small features. Below we introduce in detail the low error area reward function.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>The curve of <inline-formula id="inf119">
<mml:math id="m137">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mi mathvariant="normal">&#x3b1;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> under different <inline-formula id="inf120">
<mml:math id="m138">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</caption>
<graphic xlink:href="fenrg-12-1341103-g006.tif"/>
</fig>
<p>The evaluation term <inline-formula id="inf121">
<mml:math id="m139">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> of relative error, defined in <xref ref-type="disp-formula" rid="e16">formula 16</xref>, is used to evaluate the degree of deviation of the system state from the expected value. When the steady-state error is 0, <inline-formula id="inf122">
<mml:math id="m140">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> takes the maximum value of 0.<disp-formula id="e16">
<mml:math id="m141">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(16)</label>
</disp-formula>where, <inline-formula id="inf123">
<mml:math id="m142">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the exponential adjustment factor, which can adjust the local feature. When <inline-formula id="inf124">
<mml:math id="m143">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, it means to reduce the local micro feature; when <inline-formula id="inf125">
<mml:math id="m144">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, it means to enlarge the local micro feature.</p>
<p>The evaluation item <inline-formula id="inf126">
<mml:math id="m145">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> of the reciprocal of the relative error, defined in <xref ref-type="disp-formula" rid="e17">formula 17</xref>, is used to evaluate the degree of fluctuation of the system state. When the system state is stable, <inline-formula id="inf127">
<mml:math id="m146">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> takes the maximum value of 0.<disp-formula id="e17">
<mml:math id="m147">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2202;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(17)</label>
</disp-formula>where <inline-formula id="inf128">
<mml:math id="m148">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the exponential adjustment factor, its effect is consistent with that of <inline-formula id="inf129">
<mml:math id="m149">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>In order to prevent the influence of parameter mutation in the control process, especially in the case of sudden step of reference value, the reciprocal of error is very large. Therefore, we built a <inline-formula id="inf130">
<mml:math id="m150">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> function, as shown in <xref ref-type="disp-formula" rid="e18">formula 18</xref>, which can limit the value to a certain range.<disp-formula id="e18">
<mml:math id="m151">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x3e;</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(18)</label>
</disp-formula>
</p>
<p>The <inline-formula id="inf131">
<mml:math id="m152">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> function is used to process <inline-formula id="inf132">
<mml:math id="m153">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, and the following results are obtained:<disp-formula id="e19">
<mml:math id="m154">
<mml:mrow>
<mml:msubsup>
<mml:mi>r</mml:mi>
<mml:mn>2</mml:mn>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(19)</label>
</disp-formula>
</p>
<p>By adding <inline-formula id="inf133">
<mml:math id="m155">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf134">
<mml:math id="m156">
<mml:mrow>
<mml:msubsup>
<mml:mi>r</mml:mi>
<mml:mn>2</mml:mn>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, the reward function of the low error area is obtained:<disp-formula id="e20">
<mml:math id="m157">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mi>r</mml:mi>
<mml:mn>2</mml:mn>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(20)</label>
</disp-formula>
</p>
<p>In summary, the final reward function is obtained:<disp-formula id="e21">
<mml:math id="m158">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>100</mml:mn>
<mml:mo>,</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>200</mml:mn>
</mml:mrow>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>15</mml:mn>
<mml:mo>%</mml:mo>
<mml:mo>&#x3c;</mml:mo>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2264;</mml:mo>
<mml:mn>200</mml:mn>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>15</mml:mn>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(21)</label>
</disp-formula>
</p>
<p>In order to reduce the complexity, this paper defines <inline-formula id="inf135">
<mml:math id="m159">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and to determine the values of <inline-formula id="inf136">
<mml:math id="m160">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf137">
<mml:math id="m161">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, a water level tracking simulation experiment was carried out. The power level of the model used is 5%. At 10s, the water level reference value is adjusted from 0&#xa0;mm to 100&#xa0;mm, the simulation time is set to 600s, and the number of trainings is set to 1,200. At the end of the training, we tested the best performance of the agent obtained under different parameters as shown in <xref ref-type="table" rid="T4">Table 4</xref>, from which we can see that when <inline-formula id="inf138">
<mml:math id="m162">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.8</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, the shortest setting time can be obtained, indicating that good control effect can be obtained at this time.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Test results under different values of <inline-formula id="inf139">
<mml:math id="m163">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf140">
<mml:math id="m164">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">
<inline-formula id="inf141">
<mml:math id="m165">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">Setting time (s)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">
<inline-formula id="inf142">
<mml:math id="m166">
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">497</td>
</tr>
<tr>
<td align="center">1.5</td>
<td align="center">461</td>
</tr>
<tr>
<td align="center">1</td>
<td align="center">435</td>
</tr>
<tr>
<td align="center">0.9</td>
<td align="center">398</td>
</tr>
<tr>
<td align="center">0.8</td>
<td align="center">375</td>
</tr>
<tr>
<td align="center">0.7</td>
<td align="center">392</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec sec-type="results|discussion" id="s4">
<title>4 Results and discussion</title>
<sec id="s4-1">
<title>4.1 Training results</title>
<p>In this paper, the water level adjustment performance is trained to obtain the best control performance. The detailed training content is consistent with <xref ref-type="sec" rid="s3-5">Section 3.5</xref>. The main parameter Settings of the program are given in <xref ref-type="table" rid="T5">Table 5</xref>, which are determined by suggestions given in paper (<xref ref-type="bibr" rid="B18">Mnih et al., 2015</xref>) and several experimental tests.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Parameter settings.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Parameters</th>
<th align="center">Value</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">
<italic>L</italic>
</td>
<td align="center">1,000</td>
</tr>
<tr>
<td align="center">
<italic>N</italic>
</td>
<td align="center">64</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf143">
<mml:math id="m167">
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">600</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf144">
<mml:math id="m168">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">3</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf145">
<mml:math id="m169">
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">1,200(5% power level); 250(50% power level)</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf146">
<mml:math id="m170">
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">0.993</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf147">
<mml:math id="m171">
<mml:mrow>
<mml:mi>&#x3b7;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">1e-4</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf148">
<mml:math id="m172">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">0.07</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf149">
<mml:math id="m173">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">0.15</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf150">
<mml:math id="m174">
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">1e-4</td>
</tr>
<tr>
<td align="center">
<italic>L</italic>
</td>
<td align="center">1e5</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Considering that similar results can be achieved across different power levels, we present training results for only the 5% power level and 50% power level. These training results are depicted in <xref ref-type="fig" rid="F7">Figure 7</xref>. From the figures, it becomes evident that in the initial stages of the training process, when the agent has not yet collected sufficient experience and undergone an insufficient number of training steps, the episode reward remains low, indicating an exploratory phase. As the number of episodes increases, the agent progressively discerns patterns, and the control performance improves. During this phase, the episode reward exhibits an upward trend. After a substantial number of training episodes, the agent starts to converge, with convergence values around &#x2212;220 for the 5% power level and around &#x2212;315 for the 50% power level. During this period, there is no distinct trend in episode rewards, signifying that the optimal control policy has been achieved.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Training results. <bold>(A)</bold> Training results at 5% power level; <bold>(B)</bold> Training results at 50% power level.</p>
</caption>
<graphic xlink:href="fenrg-12-1341103-g007.tif"/>
</fig>
<p>Subsequently, an assessment of the trained controller&#x2019;s performance is scheduled, encompassing three distinctive tests: a water level tracking test, an anti-interference test, and a comparative analysis against findings within publicly available literature. Concurrently, two meticulously optimized controllers, distinguished by their commendable performance, serve as benchmarking mechanisms for each power level. The first of these controllers, christened &#x2018;FCPI,&#x2019; benefits from parameter optimization via a fuzzy logic algorithm, incorporating modules such as fuzzification, fuzzy rules, fuzzy inference, and defuzzification. The FCPI controller parameters can adapt with both power levels and water level errors. Due to space constraints, readers are encouraged to refer to the paper (<xref ref-type="bibr" rid="B16">Liu et al., 2010</xref>; <xref ref-type="bibr" rid="B4">Aulia et al., 2021</xref>) for details on the configuration strategy. The second controller, known as &#x2018;ACPI,&#x2019; attains its optimized parameters through the gain scheduling algorithm and the relationship between the parameters of the CPI controller and power <inline-formula id="inf151">
<mml:math id="m175">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is expressed by <xref ref-type="disp-formula" rid="e22">formula 22</xref>.<disp-formula id="e22">
<mml:math id="m176">
<mml:mrow>
<mml:mfenced open="{" close="" separators="|">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>0.5991</mml:mn>
<mml:mo>&#x2a;</mml:mo>
<mml:msup>
<mml:mi>p</mml:mi>
<mml:mn>3</mml:mn>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>0.6281</mml:mn>
<mml:mo>&#x2a;</mml:mo>
<mml:msup>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>0.7725</mml:mn>
<mml:mo>&#x2a;</mml:mo>
<mml:mi>p</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>0.0018</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>100</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mn>90</mml:mn>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0.3117</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>32.68</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mi>i</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>8.97</mml:mn>
<mml:mo>&#x2a;</mml:mo>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>6</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2a;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>4.876</mml:mn>
<mml:mo>&#x2a;</mml:mo>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>5</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mi>i</mml:mi>
<mml:mn>2</mml:mn>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(22)</label>
</disp-formula>
</p>
<p>In order to gauge the efficacy of control, we use the evaluation indices ITSE and ITAE. These indices have been thoughtfully introduced, as they offer a practical framework for assessing the performance of the control system. They effectively encapsulate the system&#x2019;s precision and responsiveness, with smaller values indicating superior performance.<disp-formula id="e23">
<mml:math id="m177">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mo>&#x222b;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mi>&#x221e;</mml:mi>
</mml:msubsup>
<mml:mi>t</mml:mi>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
<label>(23)</label>
</disp-formula>
<disp-formula id="e24">
<mml:math id="m178">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mo>&#x222b;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mi>&#x221e;</mml:mi>
</mml:msubsup>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(24)</label>
</disp-formula>where <inline-formula id="inf152">
<mml:math id="m179">
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the error.</p>
</sec>
<sec id="s4-2">
<title>4.2 Test 1 water level tracking test</title>
<p>This section mainly tests the system&#x2019;s output response under the action of step function, so as to show the dynamic performance of the system. The initial value of the reference water level is set to 0mm, and then jumps to 100&#xa0;mm at 10s. The control effects of the three methods are compared at low power level (5%), medium power level (50%) and high power level (100%), respectively.</p>
<p>
<xref ref-type="fig" rid="F8">Figures 8</xref>, <xref ref-type="fig" rid="F9">9</xref>, <xref ref-type="fig" rid="F10">10</xref> show the comparison results of the three methods at different power levels. It can be seen from these figures that the three methods can track the change of water level and have good control effect. At the low power level, the proposed method achieves a rise time of 73.9&#xa0;s, which is 23.5% faster than the FCPI method and 59.4% faster than the ACPI method. At the medium power level, the proposed method achieves a rise time of 13.6&#xa0;s, which is 29.6% faster than the FCPI method and 56.5% faster than the ACPI method. At the high power level, the proposed method achieves a rise time of 16.4&#xa0;s, which is 10.4% faster than the FCPI method and 28.6% faster than the ACPI method. The above statements emphasize that the proposed method offers a faster response speed and superior control performance in terms of water level tracking. The proposed method, IHA, can iteratively engage with the steam generator model to acquire the water level control strategy. It employs deep neural networks to comprehend the intricate nonlinear relationship between system states and optimized actions. This adaptation allows the controller parameters to accommodate the dynamic variations of the system without the necessity of manual design for optimization strategies, as required in methods like FCPI and ACPI. Given the prolonged delay in false water level generation and the extended response time in low-power scenarios, more time is required to achieve control.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Water level tracking test results at 5% power level.</p>
</caption>
<graphic xlink:href="fenrg-12-1341103-g008.tif"/>
</fig>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>The test results of water level tracking at 50% power level. <bold>(A)</bold> Kp1; <bold>(B)</bold> Ki1; <bold>(C)</bold> Kp2; <bold>(D)</bold> Ki2.</p>
</caption>
<graphic xlink:href="fenrg-12-1341103-g009.tif"/>
</fig>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>The test results of water level tracking at 100% power level.</p>
</caption>
<graphic xlink:href="fenrg-12-1341103-g010.tif"/>
</fig>
<p>
<xref ref-type="table" rid="T6">Table 6</xref> shows the comparison results of ITSE and ITAE of different methods, from which under different power levels, the values of ITSE and ITAE are IHA &#x3c; FCPI &#x3c; ACPI. At the low power level, the proposed method exhibits an ITSE that is 10.7% lower than FCPI and 38.4% lower than ACPI. Additionally, the ITAE of the proposed method is 26.2% lower than FCPI and 83.3% lower than ACPI. At the medium power level, the proposed method achieves an ITSE that is 1.3% lower than FCPI and 7.1% lower than ACPI. Furthermore, the ITAE of the proposed method is 6.2% lower than FCPI and 23.7% lower than ACPI. At the high power level, the proposed method demonstrates an ITSE that is 3.2% lower than FCPI and 10.3% lower than ACPI. Likewise, the ITAE of the proposed method is 6.8% lower than FCPI and 20.7% lower than ACPI. The statements above highlight that the IHA method excels in terms of control accuracy and speed, particularly evident at low power levels. In summary, the IHA method exhibits the best control performance, followed by FCPI and ACPI, which aligns with the conclusions drawn from the figures.</p>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>The ITSE and ITAE of different method.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="center">Method power (%)</th>
<th colspan="3" align="center">ITSE</th>
<th colspan="3" align="center">ITAE</th>
</tr>
<tr>
<th align="center">IHA</th>
<th align="center">FCPI</th>
<th align="center">ACPI</th>
<th align="center">IHA</th>
<th align="center">FCPI</th>
<th align="center">ACPI</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">5</td>
<td align="center">3.304e8</td>
<td align="center">3.657e8</td>
<td align="center">4.574e8</td>
<td align="center">3.976e6</td>
<td align="center">5.019e6</td>
<td align="center">7.290e6</td>
</tr>
<tr>
<td align="center">50</td>
<td align="center">8.046e6</td>
<td align="center">8.148e6</td>
<td align="center">8.614e6</td>
<td align="center">8.963e4</td>
<td align="center">9.517e4</td>
<td align="center">1.109e5</td>
</tr>
<tr>
<td align="center">100</td>
<td align="center">2.989e6</td>
<td align="center">3.085e6</td>
<td align="center">3.296e6</td>
<td align="center">4.208e4</td>
<td align="center">4.493e4</td>
<td align="center">5.078e4</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>
<xref ref-type="fig" rid="F11">Figures 11</xref>, <xref ref-type="fig" rid="F12">12</xref>, <xref ref-type="fig" rid="F13">13</xref> depict the variation curves of the IHA controller parameters <inline-formula id="inf153">
<mml:math id="m180">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf154">
<mml:math id="m181">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>i</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf155">
<mml:math id="m182">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf156">
<mml:math id="m183">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>i</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> at different power levels. It is evident that the controller parameters adaptively change with the system&#x2019;s state during the control process. In theory, the integral coefficient <inline-formula id="inf157">
<mml:math id="m184">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> of the PI controller primarily works to reduce steady-state error, while the proportional coefficient Kp reflects the system&#x2019;s response speed, rapidly reducing error. Consequently, <inline-formula id="inf158">
<mml:math id="m185">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> has a noticeable effect towards the end of the control process, whereas <inline-formula id="inf159">
<mml:math id="m186">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:msup>
<mml:mi>p</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> impact is more pronounced in the early stages. In the parameter curve results, when the water level error is significant, the proportional coefficient <inline-formula id="inf160">
<mml:math id="m187">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> plays a major role. At such times, <inline-formula id="inf161">
<mml:math id="m188">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> assumes larger values across all three power levels, such as the time range for the 5% power level from 10s to 200s, the 50% power level from 10s to 37s, and the 100% power level from 10s to 25s. However, the integral coefficient shows no distinct pattern because it has little influence when the error is substantial. As the water level error decreases, the value of <inline-formula id="inf162">
<mml:math id="m189">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> decreases as well, reducing the likelihood of overshoot. For instance, the time range for the 5% power level shifts to 200s&#x2013;350s, the 50% power level to 37s&#x2013;50s, and the 100% power level to 25s&#x2013;40s. When the error approaches zero, <inline-formula id="inf163">
<mml:math id="m190">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> stabilizes and does not have a fixed value, while <inline-formula id="inf164">
<mml:math id="m191">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>i</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf165">
<mml:math id="m192">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>i</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> generally assume larger values. This is because when the error is zero, <inline-formula id="inf166">
<mml:math id="m193">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> has minimal effect, and increasing the integral coefficient is beneficial for reducing steady-state error. Simultaneously, <inline-formula id="inf167">
<mml:math id="m194">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> does not exhibit a clear pattern throughout the control cycle. Furthermore, manual parameter adjustments reveal that <inline-formula id="inf168">
<mml:math id="m195">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> has little influence on the results within a certain range. Hence, under varying power levels, the proposed method can autonomously generate optimized strategies for the CPI controller parameters (<inline-formula id="inf169">
<mml:math id="m196">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf170">
<mml:math id="m197">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf171">
<mml:math id="m198">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>i</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf172">
<mml:math id="m199">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>i</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>) based on the system&#x2019;s state. This assurance ensures that the system can efficiently regulate the water level to the specified position in the shortest possible time, optimizing overall performance.</p>
<fig id="F11" position="float">
<label>FIGURE 11</label>
<caption>
<p>The changing curve of controller parameters at 5% power level. <bold>(A)</bold> Kp1; <bold>(B)</bold> Ki1; <bold>(C)</bold> Kp2; <bold>(D)</bold> Ki2.</p>
</caption>
<graphic xlink:href="fenrg-12-1341103-g011.tif"/>
</fig>
<fig id="F12" position="float">
<label>FIGURE 12</label>
<caption>
<p>The changing curve of controller parameters at 50% power level.</p>
</caption>
<graphic xlink:href="fenrg-12-1341103-g012.tif"/>
</fig>
<fig id="F13" position="float">
<label>FIGURE 13</label>
<caption>
<p>The changing curve of controller parameters at 100% power level. <bold>(A)</bold> Kp1; <bold>(B)</bold> Ki1; <bold>(C)</bold> Kp2; <bold>(D)</bold> Ki2.</p>
</caption>
<graphic xlink:href="fenrg-12-1341103-g013.tif"/>
</fig>
<p>Under the work of the ACPI method, the control law can adapt to changes in power levels but struggles to adapt effectively to variations in water level states. Consequently, the ACPI method falls short of achieving an optimal control effect. The FCPI method, while capable of adjusting the control law adaptively with both power level and water level state, relies heavily on the design of fuzzy membership functions and fuzzy rules, which are inherently influenced by human experience. This design challenge makes it difficult to encompass all possible system states, making it also challenging for the FCPI method to achieve an optimal control effect. However, the proposed method excels in achieving an ideal control effect across all power levels, primarily due to its efficient reinforcement learning mechanism. Throughout the control process, both gain and control law can adaptively evolve in response to changes in power levels and state information. During the learning process, the controller agent accumulates control experience continuously through repeated interactions with the environment. It autonomously learns from this experience and explores new strategies within the control policy space. Over time, the controller agent matures and evolves into a master of control, thus achieving exceptional control performance.</p>
</sec>
<sec id="s4-3">
<title>4.3 Test 2 anti-interference test</title>
<p>To assess the anti-interference capability of the proposed controller, we conducted a steam flow disturbance benchmark test on models at different power levels. During the test, a step disturbance in steam flow of &#x2b;35.88&#xa0;kg/s was introduced at 10&#xa0;s (<xref ref-type="bibr" rid="B16">Liu et al., 2010</xref>; <xref ref-type="bibr" rid="B3">Ansarifar et al., 2012</xref>). The test results are depicted in <xref ref-type="fig" rid="F14">Figures 14</xref>, <xref ref-type="fig" rid="F15">15</xref>, <xref ref-type="fig" rid="F16">16</xref>. From these figures, it is evident that all three methods exhibit strong anti-interference capabilities and swiftly restore the water level to its normal state. Moreover, the oscillation amplitude and adjustment time of the water level decrease as the power level increases, signifying more efficient water level control at higher power levels compared to lower ones. Notably, at the 5% power level, the proposed method restores the water level to its normal state in approximately 200&#xa0;s in <xref ref-type="fig" rid="F14">Figure 14A</xref>, outperforming the FCPI and ACPI methods. This rapid recovery demonstrates the exceptional anti-interference performance of the IHA controller across various power levels. In <xref ref-type="fig" rid="F14">Figure 14B</xref>- <xref ref-type="fig" rid="F16">Figure 16B</xref>, we observe the changes in feed water flow and steam flow under the control of the IHA method. It is apparent that the feed water flow quickly tracks the variations in steam flow. However, due to the non-minimum phase characteristics of the system at low power levels, the system&#x2019;s recovery time is longer in this scenario.</p>
<fig id="F14" position="float">
<label>FIGURE 14</label>
<caption>
<p>Anti-interference test result at 5% power level. <bold>(A)</bold> water level change curve; <bold>(B)</bold> flow change curve.</p>
</caption>
<graphic xlink:href="fenrg-12-1341103-g014.tif"/>
</fig>
<fig id="F15" position="float">
<label>FIGURE 15</label>
<caption>
<p>Anti-interference test result at 50% power level. <bold>(A)</bold> water level change curve; <bold>(B)</bold> flow change curve.</p>
</caption>
<graphic xlink:href="fenrg-12-1341103-g015.tif"/>
</fig>
<fig id="F16" position="float">
<label>FIGURE 16</label>
<caption>
<p>Anti-interference test result at 100% power level. <bold>(A)</bold> water level change curve; <bold>(B)</bold> flow change curve.</p>
</caption>
<graphic xlink:href="fenrg-12-1341103-g016.tif"/>
</fig>
<p>
<xref ref-type="table" rid="T7">Table 7</xref> provides a comparison of ITSE and ITAE results in the anti-interference tests of different methods. At the 5% power level, the ITSE of the IHA method is 77.8% lower than that of FCPI and 34.2% lower than that of ACPI. Similarly, the ITAE of the IHA method is 84.4% lower than that of FCPI and 70.2% lower than that of ACPI. These results clearly demonstrate that the IHA method outperforms the other two methods significantly in terms of both ITSE and ITAE at the 5% power level. Conversely, the comparison results among the three methods show similarity at the 50% and 100% power levels, which aligns with the observations in <xref ref-type="fig" rid="F14">Figure 14A</xref>- <xref ref-type="fig" rid="F16">Figure 16A</xref>.</p>
<table-wrap id="T7" position="float">
<label>TABLE 7</label>
<caption>
<p>The comparison results of ITSE and ITAE of different methods.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="center">Method power (%)</th>
<th colspan="3" align="center">ITSE</th>
<th colspan="3" align="center">ITAE</th>
</tr>
<tr>
<th align="center">IHA</th>
<th align="center">FCPI</th>
<th align="center">ACPI</th>
<th align="center">IHA</th>
<th align="center">FCPI</th>
<th align="center">ACPI</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">5</td>
<td align="center">2.988e6</td>
<td align="center">1.350e7</td>
<td align="center">4.538e6</td>
<td align="center">1.597e5</td>
<td align="center">1.022e6</td>
<td align="center">5.361e5</td>
</tr>
<tr>
<td align="center">50</td>
<td align="center">4.436e4</td>
<td align="center">4.346e4</td>
<td align="center">4.343e4</td>
<td align="center">6.851e3</td>
<td align="center">7.393e3</td>
<td align="center">7.617e3</td>
</tr>
<tr>
<td align="center">100</td>
<td align="center">6.628e3</td>
<td align="center">7.167e3</td>
<td align="center">6.733e3</td>
<td align="center">2.204e3</td>
<td align="center">2.275e3</td>
<td align="center">2.222e3</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In summary, the proposed method exhibits a strong anti-interference effect, particularly evident at certain power levels. However, it does not consistently demonstrate clear advantages across all power levels. This limitation stems from the focus of this paper, which primarily investigated water level tracking tasks during the training process of deep reinforcement learning. The development of a comprehensive anti-interference strategy is a potential area for future optimization and research.</p>
</sec>
<sec id="s4-4">
<title>4.4 Test 3 comparison of research results with public literature</title>
<p>It is well known that the water level of UTSG is the most difficult to control at low power level (<xref ref-type="bibr" rid="B7">Choi et al., 1989</xref>). In order to highlight the advantages of the proposed method at low power level, we compare the water level tracking effect of the IHA controller at 5% power level with the research results in the public literature, and the test content is to adjust the water level from 0mm to 100&#xa0;mm.</p>
<p>The setting time serves as the evaluation index, defined as the minimum time required for the water level to reach and stabilize within &#xb1;5% of the set value. The comparison results are shown in <xref ref-type="table" rid="T8">Table 8</xref>, from which we can see that the proposed method can shorten the adjustment time to 375s, with a considerable advantage over other methods, fully embodies the advantages of reinforcement learning.</p>
<table-wrap id="T8" position="float">
<label>TABLE 8</label>
<caption>
<p>Comparison results.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Methods</th>
<th align="center">Setting time(s)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Proposed method</td>
<td align="center">375</td>
</tr>
<tr>
<td align="center">SVR (<xref ref-type="bibr" rid="B12">Kavaklioglu, 2014</xref>)</td>
<td align="center">408</td>
</tr>
<tr>
<td align="center">FOPID (<xref ref-type="bibr" rid="B22">Salehi et al., 2019</xref>)</td>
<td align="center">&#x3e;400</td>
</tr>
<tr>
<td align="center">IMC (<xref ref-type="bibr" rid="B26">Tan, 2011</xref>)</td>
<td align="center">&#x3e;400</td>
</tr>
<tr>
<td align="center">DSMC (<xref ref-type="bibr" rid="B2">Ansarifar et al., 2011</xref>)</td>
<td align="center">&#x3e;800</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To underscore the merits of the proposed method under conditions of low power level, we juxtapose the water level tracking efficacy of the IHA controller at a power level of 5% with the discoveries derived from other public research. The experimental setting entails the modulation of water levels ranging from 0mm to 100&#xa0;mm. It is worth noting that the referenced investigations introduced methodologies such as SVR, FOPID, IMC, and DSMC, all of which featured test content and equipment models consistent with our current study. Consequently, this paper directly assimilates their resultant data for the purpose of comparative scrutiny.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s5">
<title>5 Conclusion</title>
<p>Aiming at the water level control of UTSG, an intelligent controller IHA based on CPI controller and DRL is proposed in this paper, which does not require prior knowledge of the model&#x2019;s dynamic characteristics. Instead, it autonomously explores the model during the training process, gathers pertinent data, and subsequently leverages this experience to iteratively enhance control performance. Through extensive training, this approach yields a controller with commendable control performance and robustness. The primary contributions of this paper are outlined as follows:<list list-type="simple">
<list-item>
<p>(1) A new reward function is proposed to evaluate the control effect and improve the training quality. The results demonstrate significant improvements in training effectiveness, offering valuable insights for other analogous control systems.</p>
</list-item>
<list-item>
<p>(2) The application of the DDPG algorithm for learning the CPI control policy, enabling the algorithm to accumulate experience through continuous exploration of the environment, without heavy reliance on extensive expert experience. After continuous training, the model&#x2019;s performance stabilizes and ultimately converges to an ideal state, with convergence values reaching approximately &#x2212;220 for the 5% power level and about &#x2212;315 for the 50% power level.</p>
</list-item>
<list-item>
<p>(3) In the water level tracking test, at low, medium, and high power levels, the proposed method achieves rise times of 73.9&#xa0;s, 13.6&#xa0;s, and 16.4&#xa0;s, respectively. These results indicate superior control performance compared to other methods, and the controller parameters can be dynamically adjusted based on the system&#x2019;s state. When contrasted with outcomes from traditional control algorithms and publicly available literature, the substantial reduction in setting time clearly demonstrates the evident advantages of the proposed method.</p>
</list-item>
<list-item>
<p>(4) In the anti-interference test, at low power levels, the IHA controller can restore the water level to its normal state within 200&#xa0;s, which is considerably faster than other methods. Additionally, the feed water flow promptly adapts to variations in steam flow, effectively mitigating the impact of steam flow disturbances on the water level.</p>
</list-item>
</list>
</p>
<p>In summary, the controller proposed in this paper demonstrates effective control across various power levels, as reinforcement learning autonomously learns optimization strategies for controller parameters without relying on expert knowledge. However, it is crucial to acknowledge that the designed control method has been exclusively validated on the steam generator model presented in this paper, yielding favorable results. Its efficacy has not been verified for water level control in other steam generator models, presenting a challenge for our team to address in the future. Given the operational similarities among different steam generator models, our team aims to transfer the acquired control strategies to other models through imitation learning, thereby achieving the migration of advanced control strategies.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/Supplementary Material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s7">
<title>Author contributions</title>
<p>BP: Methodology, Software, Validation, Writing&#x2013;original draft, Writing&#x2013;review and editing. XM: Supervision, Validation, Writing&#x2013;review and editing. HX: Funding acquisition, Resources, Validation, Writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s8">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This project was supported by the China North Artificial Intelligence &#x26; Innovation Research Institute and the Natural Science Foundation of Heilongjiang Province, China (Grant NO. E2017023).</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>Author XM was employed by China Nuclear Power Engineering Co, Ltd.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s11">
<title>Abbreviations</title>
<p>ACPI, Adaptive Cascaded Proportional-Integral; CPI, Cascaded Proportional-Integral; DDPG, Deep Deterministic Policy Gradients; DRL, Deep reinforcement learning; FCPI, Fuzzy Cascaded Proportional-Integral; IHA, Intelligent hierarchical autonomous; ITAE, Integral of time weighted absolute error; ITSE, integral of time weighted squared errors; PID, Proportional-Integral-Derivative; PWR, Pressurized Water Reactors; ResNet, Residual neural network; RL, Reinforcement learning; UTSG, U-Tube Steam Generator.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ahmmed</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Akhter</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Rezaul Karim</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Sabbir Ahamed</surname>
<given-names>F. A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Genetic algorithm based PID parameter optimization</article-title>. <source>Am. J. Intelligent Syst.</source> <volume>10</volume> (<issue>1</issue>), <fpage>8</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.5923/j.ajis.20201001.02</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ansarifar</surname>
<given-names>G. R.</given-names>
</name>
<name>
<surname>Davilu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Talebi</surname>
<given-names>H. A.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Gain scheduled dynamic sliding mode control for nuclear steam generators</article-title>. <source>Prog. Nucl. Energy</source> <volume>53</volume>, <fpage>651</fpage>&#x2013;<lpage>663</lpage>. <pub-id pub-id-type="doi">10.1016/j.pnucene.2011.04.029</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ansarifar</surname>
<given-names>G. R.</given-names>
</name>
<name>
<surname>Talebi</surname>
<given-names>H. A.</given-names>
</name>
<name>
<surname>Davilu</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Adaptive estimator-based dynamic sliding mode control for the water level of nuclear steam generators</article-title>. <source>Prog. Nucl. Energy</source> <volume>56</volume>, <fpage>61</fpage>&#x2013;<lpage>70</lpage>. <pub-id pub-id-type="doi">10.1016/j.pnucene.2011.12.008</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Aulia</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Yustin</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Hilman</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Annisa</surname>
<given-names>A. R.</given-names>
</name>
<name>
<surname>Wibowo</surname>
<given-names>E. W. K.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Fuzzy gain scheduling for cascaded PI-control for DC motor</article-title>,&#x201d; in <conf-name>5th IEEE Conference on Energy Conversion, CENCON 2021</conf-name>, <conf-loc>Johor Bahru, Malaysia</conf-loc>, <conf-date>25-25 October 2021</conf-date>. <pub-id pub-id-type="doi">10.1109/CENCON51869.2021.9627292</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bi</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>C. C.</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Evacuation route recommendation using auto-encoder and Markov decision process</article-title>. <source>Appl. Soft Comput. J.</source> <volume>84</volume>, <fpage>105741</fpage>. <pub-id pub-id-type="doi">10.1016/j.asoc.2019.105741</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Carapu&#xe7;o</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Neves</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Horta</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Reinforcement learning applied to Forex trading</article-title>. <source>Appl. Soft Comput. J.</source> <volume>73</volume>, <fpage>783</fpage>&#x2013;<lpage>794</lpage>. <pub-id pub-id-type="doi">10.1016/j.asoc.2018.09.017</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Choi</surname>
<given-names>J. I.</given-names>
</name>
<name>
<surname>Meyer</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Lanning</surname>
<given-names>D. D.</given-names>
</name>
</person-group> (<year>1989</year>). <article-title>Automatic controller for steam generator water level during low power operation</article-title>. <source>Nucl. Eng. Des.</source> <volume>117</volume>, <fpage>263</fpage>&#x2013;<lpage>274</lpage>. <pub-id pub-id-type="doi">10.1016/0029-5493(89)90175-1</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>He</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Ren</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Deep residual learning for image recognition</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition</conf-name>, <conf-loc>Las Vegas, NV, USA</conf-loc>, <conf-date>27-30 June 2016</conf-date>, <fpage>770</fpage>&#x2013;<lpage>778</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR.2016.90</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Hu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Research on UAV balance control based on expert-fuzzy adaptive PID</article-title>,&#x201d; in <conf-name>Proceedings of 2020 IEEE International Conference on Advances in Electrical Engineering and Computer Applications, AEECA 2020</conf-name>, <conf-loc>Dalian, China</conf-loc>, <conf-date>25-27 August 2020</conf-date>. <pub-id pub-id-type="doi">10.1109/AEECA49918.2020.9213511</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Irving</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Miossec</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Tassart</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>1980</year>). <source>Towards efficient full automatic operation of the pwr steam generator with water level adaptive control</source>, <fpage>309</fpage>&#x2013;<lpage>329</lpage>.</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jia</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chai</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>C. Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A signal compensation based cascaded PI control for an industrial heat exchange system</article-title>. <source>Control Eng. Pract.</source> <volume>98</volume>, <fpage>104372</fpage>. <pub-id pub-id-type="doi">10.1016/j.conengprac.2020.104372</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kavaklioglu</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Support vector regression model based predictive control of water level of U-tube steam generators</article-title>. <source>Nucl. Eng. Des.</source> <volume>278</volume>, <fpage>651</fpage>&#x2013;<lpage>660</lpage>. <pub-id pub-id-type="doi">10.1016/j.nucengdes.2014.08.018</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kong</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Geng</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Performance optimization of a steam generator level control system via a revised simplex search-based data-driven optimization methodology</article-title>. <source>Processes</source> <volume>10</volume>, <fpage>264</fpage>. <pub-id pub-id-type="doi">10.3390/pr10020264</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Mao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>An</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Design of a fuzzy-PID controller for a nonlinear hydraulic turbine governing system by using a novel gravitational search algorithm based on Cauchy mutation and mass weighting</article-title>. <source>Appl. Soft Comput. J.</source> <volume>52</volume>, <fpage>290</fpage>&#x2013;<lpage>305</lpage>. <pub-id pub-id-type="doi">10.1016/j.asoc.2016.10.035</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Lillicrap</surname>
<given-names>T. P.</given-names>
</name>
<name>
<surname>Hunt</surname>
<given-names>J. J.</given-names>
</name>
<name>
<surname>Pritzel</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Heess</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Erez</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Tassa</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). &#x201c;<article-title>Continuous control with deep reinforcement learning</article-title>,&#x201d; in <conf-name>4th International Conference on Learning Representations, ICLR 2016 - Conference Track Proceedings</conf-name>.</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>F. Y.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>P controller with partial feed forward compensation and decoupling control for the steam generator water level</article-title>. <source>Nucl. Eng. Des.</source> <volume>240</volume>, <fpage>181</fpage>&#x2013;<lpage>190</lpage>. <pub-id pub-id-type="doi">10.1016/j.nucengdes.2009.09.014</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maghfiroh</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ahmad</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ramelan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Adriyanto</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Fuzzy-PID in BLDC motor speed control using MATLAB/simulink</article-title>. <source>J. Robotics Control (JRC)</source> <volume>3</volume>, <fpage>8</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.18196/jrc.v3i1.10964</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mnih</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Kavukcuoglu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Silver</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Rusu</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Veness</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bellemare</surname>
<given-names>M. G.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Human-level control through deep reinforcement learning</article-title>. <source>Nature</source> <volume>518</volume>, <fpage>529</fpage>&#x2013;<lpage>533</lpage>. <pub-id pub-id-type="doi">10.1038/nature14236</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Calculate of an additional resistance with reverse flow in steam generator under steady-state conditions</article-title>. <source>Ann. Nucl. Energy</source> <volume>198</volume>, <fpage>110302</fpage>. <pub-id pub-id-type="doi">10.1016/J.ANUCENE.2023.110302</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rodriguez-Abreo</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Rodriguez-Resendiz</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Fuentes-Silva</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Hernandez-Alvarado</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Falcon</surname>
<given-names>M. D. C. P. T.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Self-tuning neural network PID with dynamic response control</article-title>. <source>IEEE Access</source> <volume>9</volume>, <fpage>65206</fpage>&#x2013;<lpage>65215</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2021.3075452</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Safarzadeh</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Khaki-Sedigh</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Shirani</surname>
<given-names>A. S.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Identification and robust water level control of horizontal steam generators using quantitative feedback theory</article-title>. <source>Energy Convers. Manag.</source> <volume>52</volume>, <fpage>3103</fpage>&#x2013;<lpage>3111</lpage>. <pub-id pub-id-type="doi">10.1016/j.enconman.2011.04.023</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Salehi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Safarzadeh</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Kazemi</surname>
<given-names>M. H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Fractional order PID control of steam generator water level for nuclear steam supply systems</article-title>. <source>Nucl. Eng. Des.</source> <volume>342</volume>, <fpage>45</fpage>&#x2013;<lpage>59</lpage>. <pub-id pub-id-type="doi">10.1016/j.nucengdes.2018.11.040</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sen Peng</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Xia</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y. K.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>S. M.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Research on intelligent fault diagnosis method for nuclear power plant based on correlation analysis and deep belief network</article-title>. <source>Prog. Nucl. Energy.</source> <volume>108</volume>, <fpage>419</fpage>&#x2013;<lpage>427</lpage>. <pub-id pub-id-type="doi">10.1016/j.pnucene.2018.06.003</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sui</surname>
<given-names>Z. G.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X. Y.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Numerical investigation of the thermal-hydraulic characteristics of AP1000 steam generator U-tubes</article-title>. <source>Int. J. Adv. Nucl. React. Des. Technol.</source> <volume>2</volume>, <fpage>52</fpage>&#x2013;<lpage>59</lpage>. <pub-id pub-id-type="doi">10.1016/j.jandt.2020.09.001</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sutton</surname>
<given-names>R. S.</given-names>
</name>
<name>
<surname>McAllester</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mansour</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Policy gradient methods for reinforcement learning with function approximation</article-title>. <source>Adv. Neural Inf. Process. Syst.</source>, <fpage>1057</fpage>&#x2013;<lpage>1063</lpage>.</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tan</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Water level control for a nuclear steam generator</article-title>. <source>Nucl. Eng. Des.</source> <volume>241</volume>, <fpage>1873</fpage>&#x2013;<lpage>1880</lpage>. <pub-id pub-id-type="doi">10.1016/j.nucengdes.2010.12.010</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Thomas</surname>
<given-names>P. S.</given-names>
</name>
<name>
<surname>Brunskill</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Policy gradient methods for reinforcement learning with function approximation and action-dependent baselines</article-title>. <comment>arXiv preprint arXiv:1706.06643. Available at: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.48550/arXiv.1706.06643">https://doi.org/10.48550/arXiv.1706.06643</ext-link>.</comment>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Uhlenbeck</surname>
<given-names>G. E.</given-names>
</name>
<name>
<surname>Ornstein</surname>
<given-names>L. S.</given-names>
</name>
</person-group> (<year>1930</year>). <article-title>On the theory of the Brownian motion</article-title>. <source>Phys. Rev.</source> <volume>36</volume>, <fpage>823</fpage>&#x2013;<lpage>841</lpage>. <pub-id pub-id-type="doi">10.1103/PhysRev.36.823</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Dynamic modeling of AP1000 steam generator for control system design and simulation</article-title>. <source>Ann. Nucl. Energy</source> <volume>109</volume>, <fpage>648</fpage>&#x2013;<lpage>657</lpage>. <pub-id pub-id-type="doi">10.1016/j.anucene.2017.05.016</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Hong</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Reinforcement learning for building controls: the opportunities and challenges</article-title>. <source>Appl. Energy</source> <volume>269</volume>, <fpage>115036</fpage>. <pub-id pub-id-type="doi">10.1016/j.apenergy.2020.115036</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Torque control of DC torque motor based on expert PID</article-title>. <source>J. Phys. Conf. Ser.</source> <volume>1626</volume>, <fpage>012073</fpage>. <pub-id pub-id-type="doi">10.1088/1742-6596/1626/1/012073</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Xue</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ren</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Neural network PID control for combustion instability</article-title>. <source>Combust. Theory Model.</source> <volume>26</volume>, <fpage>383</fpage>&#x2013;<lpage>398</lpage>. <pub-id pub-id-type="doi">10.1080/13647830.2022.2025908</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Control design of the wave compensation system based on the genetic PID algorithm</article-title>. <source>Adv. Mater. Sci. Eng.</source> <volume>2019</volume>, <fpage>1</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1155/2019/2152914</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Fuzzy PID control of the three-degree-of-freedom parallel mechanism based on genetic algorithm</article-title>. <source>Appl. Sci. Switz.</source> <volume>12</volume> (<issue>21</issue>), <fpage>11128</fpage>. <pub-id pub-id-type="doi">10.3390/app122111128</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>