<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Front. Therm. Eng.</journal-id>
<journal-title>Frontiers in Thermal Engineering</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Front. Therm. Eng.</abbrev-journal-title>
<issn pub-type="epub">2813-0456</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1391602</article-id>
<article-id pub-id-type="doi">10.3389/fther.2024.1391602</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Thermal Engineering</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Home energy management strategy to schedule multiple types of loads and energy storage device with consideration of user comfort: a deep reinforcement learning based approach</article-title>
<alt-title alt-title-type="left-running-head">Pan et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fther.2024.1391602">10.3389/fther.2024.1391602</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Pan</surname>
<given-names>Tingzhe</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2666589/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhu</surname>
<given-names>Zean</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Luo</surname>
<given-names>Hongxuan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Chao</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Jin</surname>
<given-names>Xin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Meng</surname>
<given-names>Zijie</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Cai</surname>
<given-names>Xinlei</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Southern Power Grid Research Institute Co., Ltd.</institution>, <addr-line>Guangzhou</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Power Dispatch Control Center of Guangdong Power Grid Co., Ltd.</institution>, <addr-line>Guangzhou</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2382006/overview">Dibyendu Roy</ext-link>, Durham University, United Kingdom</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1712142/overview">Ghulam Hafeez</ext-link>, University of Engineering and Technology, Mardan, Pakistan</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2645561/overview">Mrinal Bhowmik</ext-link>, Durham University, United Kingdom</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2697786/overview">Sk Arafat Zaman</ext-link>, Indian Institute of Engineering Science and Technology, India</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Tingzhe Pan, <email>nanwangsouthern@163.com</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>05</day>
<month>06</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>4</volume>
<elocation-id>1391602</elocation-id>
<history>
<date date-type="received">
<day>26</day>
<month>02</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>13</day>
<month>05</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Pan, Zhu, Luo, Li, Jin, Meng and Cai.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Pan, Zhu, Luo, Li, Jin, Meng and Cai</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>With the increase in the integration of renewable sources, the home energy management system (HEMS) has become a promising approach to improve grid energy efficiency and relieve network stress. In this context, this paper proposes an optimization dispatching strategy for HEMS to reduce total cost with full consideration of uncertainties, while ensuring the users&#x2019; comfort. Firstly, a HEMS dispatching model is constructed to reasonably schedule the start/stop time of the dispatchable appliances and energy storage system to minimize the total cost for home users. Besides, this dispatching strategy also controls the switching time of temperature-controlled load such as air conditioning to reduce the energy consumption while maintaining the indoor temperature in a comfortable level. Then, the optimal dispatching problem of HEMS is modeled as a Markov decision process (MDP) and solved by a deep reinforcement learning algorithm called deep deterministic policy gradient. The example results verify the effectiveness and superiority of the proposed method. The energy cost can be effectively reduced by 21.9% at least compared with other benchmarks and the indoor temperature can be well maintained.</p>
</abstract>
<kwd-group>
<kwd>home energy management system</kwd>
<kwd>dispatchable load</kwd>
<kwd>optimal dispatching strategy</kwd>
<kwd>users&#x2019; comfort</kwd>
<kwd>deep reinforcement learning</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Advancements in Cooling and Heating</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<sec id="s1-1">
<title>1.1 Background</title>
<p>The rapid increase in population growth and energy consumption has brought about many environmental problems such as global warming (<xref ref-type="bibr" rid="B27">Weil et al., 2023</xref>) and energy crisis (<xref ref-type="bibr" rid="B9">Hafeez et al., 2020a</xref>). Among all energy consumption, household energy consumption is an important component (<xref ref-type="bibr" rid="B34">Zhang et al., 2023</xref>). To optimize the energy structure of households and reduce energy consumption, energy consuming equipment such as rooftop photovoltaics (PV), heat pumps, electric vehicle (EV), and batteries have been widely promoted. With the rapid increase in the number of distributed PV (<xref ref-type="bibr" rid="B16">Li et al., 2024</xref>) and EVs (<xref ref-type="bibr" rid="B32">Yin and Qin, 2022</xref>), home energy system management (HEMS) has become the most important aspect of achieving demand-side energy management in smart grids (<xref ref-type="bibr" rid="B10">Hafeez et al., 2021</xref>; <xref ref-type="bibr" rid="B13">Huy et al., 2023</xref>). The HEMS can make decisions for demand response based on current electricity prices, predicted photovoltaic output, user preferences, and device characteristics, achieving intelligent scheduling of home equipment and reducing electricity costs (<xref ref-type="bibr" rid="B14">Kikusato et al., 2019</xref>; <xref ref-type="bibr" rid="B7">Gomes et al., 2023</xref>). The HEMS is a key component in achieving zero-energy homes and has the potential for widespread application in residential distribution systems. The scheduling strategies used in HEMS mainly include real-time energy allocation, day ahead scheduling, and closed-loop energy management. Among them, day ahead scheduling can reduce computational complexity and improve computational efficiency, which is widely accepted and applied (<xref ref-type="bibr" rid="B18">Ren et al., 2024</xref>).</p>
</sec>
<sec id="s1-2">
<title>1.2 Related works</title>
<p>There are lots of related work have been conducted based on HEMS. Liu et al. in (<xref ref-type="bibr" rid="B17">Liu et al., 2022</xref>) proposes a HEMS for residential users that incorporates the uncertainty of data-driven results to achieve the best trade-off between electricity cost and the preference level. Tostada-Veliz et al. in (<xref ref-type="bibr" rid="B21">Tostado-V&#xe9;liz et al., 2022</xref>) develops a HEMS that includes three novel demand response routines focused on peak clipping and demand flattening strategies. Chakir et al. in (<xref ref-type="bibr" rid="B3">Chakir et al., 2022</xref>) propose a management system for a future household equipped with controllable electric loads and an electric vehicle equipped with a PV&#x2013;Wind&#x2013;Battery hybrid renewable system connected to the national grid. However, these studies only consider the dispatch strategy of single type of load, which may not in line with real usage scenarios. In the real home energy system, there are multi-types of loads, such as dispatchable load and non-dispatchable load, all these types loads should be considered in the constructed system. To this end, Rehman et al. in (<xref ref-type="bibr" rid="B25">ur Rehman et al., 2022</xref>) proposed a holistic method to optimize the use of different types of home appliances according to the prosumers preferences and defined schedule. Dorahaki et al. in (<xref ref-type="bibr" rid="B5">Dorahaki et al., 2022</xref>) presents develop a behavioral home energy management model based on time-driven prospect theory incorporating energy storage devices, distributed energy resources, and smart flexible home appliances, which considers the dispatch of different types of appliances. Nezhad et al. in (<xref ref-type="bibr" rid="B6">Esmaeel Nezhad et al., 2021</xref>) proposes a new model for the self-scheduling problem using a home energy management system (HEMS), considering the presence of different types of loads, such as an air conditioner and EV. When temperature-controlled load such as air conditioner contained in the HEMS, the users&#x2019; comfort should be considered in the dispatch strategy. Song et al. in (<xref ref-type="bibr" rid="B20">Song et al., 2022</xref>) presents an intelligent HEMS with three adjustable strategies to maximize economic benefits and consumers&#x2019; comfort. Youssef et al. in (<xref ref-type="bibr" rid="B33">Youssef et al., 2024</xref>) proposes strategies that are evaluated in terms of consumer comfort, and cost, with waiting time used to assess user comfort. Once the users&#x2019; comfort is taken into account, the single objective optimization will change into a multi-objective optimization. It is difficult and important to balance performance of different objectives to obtain the optimal dispatch strategy in the multi-objective optimization. To this end, several studies (<xref ref-type="bibr" rid="B24">Ullah et al., 2021</xref>; <xref ref-type="bibr" rid="B1">Alzahrani et al., 2023</xref>) are proposed for tackling this problem.</p>
<p>Then, how to obtain the optimal dispatch strategy of the HEMS is a crucial problem (<xref ref-type="bibr" rid="B29">Xiong et al., 2024</xref>). Normally, the optimization-based methods such as stochastic programming method (SP) (<xref ref-type="bibr" rid="B12">Hussain et al., 2023</xref>) and robust optimization method (RO) (<xref ref-type="bibr" rid="B26">Wang et al., 2024</xref>) are utilized to solve the optimization problem of HEMS. Tostado et al. in (<xref ref-type="bibr" rid="B23">Tostado-V&#xe9;liz et al., 2023a</xref>) develops a novel SP-based home energy management model considering negawatt trading. Kim et al. in (<xref ref-type="bibr" rid="B15">Kim et al., 2023</xref>) proposes an SP-based algorithm to reduce computation time while preserving the stochastic properties of generated scenarios based on the Wasserstein-1 distance. Nevertheless, the SP-based method requires both vast computational ability and accurate distribution of random variables that may not be realized in practice (<xref ref-type="bibr" rid="B30">Xiong et al., 2023a</xref>). In this context, the RO-based methods are widely applied. Tostado et al. in (<xref ref-type="bibr" rid="B22">Tostado-V&#xe9;liz et al., 2023b</xref>) proposes a fully robust home energy management model, which accounts for all the inherent uncertainties that may arise in domestic installations. Wang et al. in (<xref ref-type="bibr" rid="B26">Wang et al., 2024</xref>) proposes a multi-objective two-stage robust optimization to address the inherent uncertainty of DES, aiming to concurrently realize energy savings, carbon emission reduction, and load smoothing. However, the optimization results calculated by RO method are usually conservative and utilize only one dispatch solution to deal with all uncertainties of whole dispatch period. To this end, the learning-based methods have been utilized to solve this problem (<xref ref-type="bibr" rid="B8">Hafeez et al., 2020b</xref>; <xref ref-type="bibr" rid="B2">Ben Slama and Mahmoud, 2023</xref>; <xref ref-type="bibr" rid="B18">Ren et al., 2024</xref>).</p>
<p>To bridge these gaps, this paper proposes an optimized scheduling model for home energy management to minimize the electricity cost with consideration of users&#x2019; comfort. Then, a novel deep reinforcement learning (DRL) based algorithm is utilized to deal with the uncertainties. The main contributions of this paper are summarized as follows:<list list-type="simple">
<list-item>
<p>1) This paper develops an optimized scheduling model for home energy management to schedule both interruptible load and uninterruptible load, which takes consideration of time-of-use price and users&#x2019; comfort. Different from the Refs. (<xref ref-type="bibr" rid="B3">Chakir et al., 2022</xref>; <xref ref-type="bibr" rid="B21">Tostado-V&#xe9;liz et al., 2022</xref>), The optimized strategy for scheduling multi types of loads based on the time-of-use electricity price and real-time energy storage system charging status, which can reduce user electricity costs while ensuring users&#x2019; comfort.</p>
</list-item>
<list-item>
<p>2) The optimization problem of the HEMS is modeled as a Markov decision process (MDP) and then solved by deep deterministic policy gradient (DDPG) algorithm. Moreover, compared with the optimization-based methods in Refs. (<xref ref-type="bibr" rid="B12">Hussain et al., 2023</xref>; <xref ref-type="bibr" rid="B26">Wang et al., 2024</xref>; <xref ref-type="bibr" rid="B29">Xiong et al., 2024</xref>), the applied DDPG method can achieve fast decision making since the learned policy can be generalized to other situations without resolving the optimization model after the agent is trained.</p>
</list-item>
</list>
</p>
<p>The following sections are organized: The proposed system is described in detail in <xref ref-type="sec" rid="s2">Section 2</xref>. The mathematical modeling and optimization algorithm are discussed in <xref ref-type="sec" rid="s3">Section 3</xref>. <xref ref-type="sec" rid="s4">Section 4</xref> presents and analyzes the simulation results obtained for the proposed system. The paper concludes in <xref ref-type="sec" rid="s5">Section 5</xref>.</p>
</sec>
</sec>
<sec id="s2">
<title>2 System model</title>
<p>The modelled HEMS architecture is shown in <xref ref-type="fig" rid="F1">Figure 1</xref>. It can be obtained that the constructed system includes HEMS, PV, energy storage and different types of loads. Note that the load can be divided into dispatchable load and non-dispatchable load. Besides, the dispatchable can be further divided into interruptible load and uninterruptible load, which are specifically shown in the <xref ref-type="fig" rid="F1">Figure 1</xref>. The HEMS updates electricity prices, weather, and other information in real time. The HEMS controller is the core component of the entire system, which collects information from upper-level suppliers such as daily electricity prices and household load usage preferences, and calculates the most economical scheduling strategy based on various constraints. In this paper, the HEMS is modelled as a DRL agent for improving the control efficiency.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Structure of the constructed home energy system.</p>
</caption>
<graphic xlink:href="fther-04-1391602-g001.tif"/>
</fig>
<sec id="s2-1">
<title>2.1 PV model</title>
<p>To construct the model of PV, temperature and light radiation intensity are the key factors for determining the output of PV (<xref ref-type="bibr" rid="B31">Xiong et al., 2023b</xref>). These factors can be represented in the following model:<disp-formula id="e1">
<mml:math id="m1">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>V</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>V</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfrac>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>where <inline-formula id="inf1">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>V</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the rated output of PV in the normal operating condition; <inline-formula id="inf2">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the rated temperature under normal test conditions. <inline-formula id="inf3">
<mml:math id="m4">
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf4">
<mml:math id="m5">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf5">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the light radiation intensity, power temperature coefficient, and atmospheric temperature, respectively. The details of parameters of the PV model are shown in the <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Parameters of PV model.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Parameter</th>
<th align="center">Value</th>
<th align="center">Parameter</th>
<th align="center">Value</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">
<inline-formula id="inf6">
<mml:math id="m7">
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">1,000 <inline-formula id="inf7">
<mml:math id="m8">
<mml:mrow>
<mml:mi mathvariant="normal">W</mml:mi>
<mml:mo>/</mml:mo>
<mml:msup>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">
<inline-formula id="inf8">
<mml:math id="m9">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">0.005</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf9">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">25&#xb0;C</td>
<td align="center">
<inline-formula id="inf10">
<mml:math id="m11">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>V</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">5.2&#xa0;kW</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2-2">
<title>2.2 Load model</title>
<p>The household electricity load can be divided into dispatchable load, non-dispatchable load, and temperature-controlled load based on the degree of controllability (<xref ref-type="bibr" rid="B11">Hafeez et al., 2020c</xref>).</p>
<sec id="s2-2-1">
<title>2.2.1 Non-dispatchable load</title>
<p>The non-dispatchable load refers to a load does not adjust operating power or operating time, such as lighting fixtures, televisions, etc. Thus, the non-dispatchable load does not participate in scheduling, but is directly incorporated into the total energy consumption as an important load.</p>
</sec>
<sec id="s2-2-2">
<title>2.2.2 Dispatchable load</title>
<p>Dispatchable load refers to the load with certain elasticity time, which can participate in system dispatching, such as sweeping robots, dryers and other equipment. Dispatchable load can only be started and stopped within the specified operation time, and all other times are closed. The specific constraints are as follows:<disp-formula id="e2">
<mml:math id="m12">
<mml:mrow>
<mml:mfenced open="{" close="" separators="&#x7c;">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="&#x7c;">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2209;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>where <inline-formula id="inf11">
<mml:math id="m13">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the auxiliary variable of the dispatchable load equipment. When <inline-formula id="inf12">
<mml:math id="m14">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is 1, the equipment is turned on, and when <inline-formula id="inf13">
<mml:math id="m15">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is 0, the equipment is turned off; <inline-formula id="inf14">
<mml:math id="m16">
<mml:mrow>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represent the starting and ending times of the operating range of dispatchable load equipment; <inline-formula id="inf15">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the rated working time of the dispatchable load; <italic>H</italic> is the number of sub time periods with equal time length. In this article, a day is divided into 24 parts, that is, <italic>H</italic> is 24, per unit time period <inline-formula id="inf16">
<mml:math id="m18">
<mml:mrow>
<mml:mo>&#x394;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>Furthermore, the dispatchable load can be divided into interruptible load and uninterruptible load. The interruptible load can be modelled as:<disp-formula id="e3">
<mml:math id="m19">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>where the subscript <italic>in</italic> represents interruptible flexible loads; <inline-formula id="inf17">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the total rated energy consumption of device <italic>i</italic>; <inline-formula id="inf18">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the unit time power of device <italic>i</italic>.</p>
<p>The mathematical model for uninterruptible flexible loads is:<disp-formula id="e4">
<mml:math id="m22">
<mml:mrow>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2265;</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>where the subscript <italic>un</italic> represents uninterruptible loads; &#x3c4; is the time node. Eq. <xref ref-type="disp-formula" rid="e4">4</xref> indicates that if device <italic>i</italic> starts working at time &#x3c4;&#x2b;1, it must continue working for at least <inline-formula id="inf19">
<mml:math id="m23">
<mml:mrow>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> periods.</p>
</sec>
<sec id="s2-2-3">
<title>2.2.3 Temperature-controlled load</title>
<p>Temperature-controlled load refers to household equipment with indirect energy storage characteristics, such as air conditioning. The comfort index for residents in this paper is indoor temperature, so the following constraints need to be met (<xref ref-type="bibr" rid="B4">Dongdong, 2020</xref>):<disp-formula id="e5">
<mml:math id="m24">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>min</mml:mi>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>max</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>where <inline-formula id="inf20">
<mml:math id="m25">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the indoor temperature; <inline-formula id="inf21">
<mml:math id="m26">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>min</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf22">
<mml:math id="m27">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>max</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the minimum and maximum indoor temperatures allowed, respectively.</p>
<p>Due to changes in outdoor temperature, it is not possible to directly set the rated operating time of the air conditioner. Its thermo-dynamic model and working time model can be expressed as:<disp-formula id="e6">
<mml:math id="m28">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x394;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>R</mml:mi>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi mathvariant="normal">C</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">C</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x394;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>&#x394;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
<disp-formula id="e7">
<mml:math id="m29">
<mml:mrow>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>where <inline-formula id="inf23">
<mml:math id="m30">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the outdoor temperature; <italic>C</italic> is the equivalent thermal capacitance; <italic>R</italic> is the equivalent thermal resistance; <inline-formula id="inf24">
<mml:math id="m31">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi mathvariant="normal">C</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the rated operating power of the air conditioner; <inline-formula id="inf25">
<mml:math id="m32">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">C</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the operating status of the air conditioner throughout the entire working range, with the air conditioner on as 1 and the air conditioner off as 0; <inline-formula id="inf26">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf27">
<mml:math id="m34">
<mml:mrow>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the start and end times of the air conditioning operation interval; <inline-formula id="inf28">
<mml:math id="m35">
<mml:mrow>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mi>C</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the working time, determined by specific working conditions.</p>
</sec>
</sec>
<sec id="s2-3">
<title>2.3 Battery model</title>
<p>Energy storage devices participate in scheduling through charging and discharging, balancing power fluctuations and improving system flexibility. This article reflects the remaining capacity of energy storage devices through the State of Charge (SOC), which can be expressed as:<disp-formula id="e8">
<mml:math id="m36">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>O</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mi>O</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>&#x3b7;</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>/</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>p</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>/</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>p</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:msub>
<mml:mi>&#x3b7;</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x394;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
<disp-formula id="e9">
<mml:math id="m37">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>max</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>max</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>
<disp-formula id="e10">
<mml:math id="m38">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>O</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>min</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mi>O</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mi>O</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>max</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>where <inline-formula id="inf29">
<mml:math id="m39">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>O</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the SOC of the battery at the time-step <italic>t</italic>; <inline-formula id="inf30">
<mml:math id="m40">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf31">
<mml:math id="m41">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the charge and discharge power of the battery at the time-step <italic>t</italic>; <inline-formula id="inf32">
<mml:math id="m42">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b7;</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf33">
<mml:math id="m43">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b7;</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the charge and discharge efficiency at the time-step <italic>t</italic>; <inline-formula id="inf34">
<mml:math id="m44">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>O</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>min</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf35">
<mml:math id="m45">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>O</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>max</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the minimum and maximum of the state of charge; <inline-formula id="inf36">
<mml:math id="m46">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the rated power of the battery. The details of parameters of the battery model are shown in the <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Parameters of battery model.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Parameter</th>
<th align="center">Value</th>
<th align="center">Parameter</th>
<th align="center">Value</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">
<inline-formula id="inf37">
<mml:math id="m47">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b7;</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">0.9</td>
<td align="center">
<inline-formula id="inf38">
<mml:math id="m48">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b7;</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">0.9</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf39">
<mml:math id="m49">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>O</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>min</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">0.2</td>
<td align="center">
<inline-formula id="inf40">
<mml:math id="m50">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>O</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>max</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">0.9</td>
</tr>
<tr>
<td align="center">
<inline-formula id="inf41">
<mml:math id="m51">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">3&#xa0;kWh</td>
<td align="left"/>
<td align="left"/>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2-4">
<title>2.4 Problem formulation</title>
<p>To meet the power balance needs of household residents and the demand for excess photovoltaic power grid, HEMS needs to interact with the power grid for energy exchange, which can be expressed as:<disp-formula id="e11">
<mml:math id="m52">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">M</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi mathvariant="normal">C</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">C</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">A</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">A</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>
<disp-formula id="e12">
<mml:math id="m53">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">G</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">P</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">B</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">B</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">E</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">E</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>where <inline-formula id="inf42">
<mml:math id="m54">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the total power of the load; <inline-formula id="inf43">
<mml:math id="m55">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">A</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf44">
<mml:math id="m56">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">M</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represent the power of dispatchable loads and non-dispatchable loads, respectively; <inline-formula id="inf45">
<mml:math id="m57">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">G</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the interaction energy with the power grid; <inline-formula id="inf46">
<mml:math id="m58">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">E</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the EV switch state.</p>
<p>This paper aims to minimize the total cost with consideration of comfort, so the optimization objective can be formulated as:<disp-formula id="e13">
<mml:math id="m59">
<mml:mrow>
<mml:mi>min</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>G</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>
<disp-formula id="e14">
<mml:math id="m60">
<mml:mrow>
<mml:mfenced open="{" close="" separators="&#x7c;">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">G</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">G</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">G</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2a7e;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">G</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">G</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">P</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">G</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>
<disp-formula id="e15">
<mml:math id="m61">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x394;</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>
</p>
<p>Where <inline-formula id="inf47">
<mml:math id="m62">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">G</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the cost generated by the interaction energy between the system and the power grid; <inline-formula id="inf48">
<mml:math id="m63">
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the real-time electricity price prediction information (RTP) for the day ahead; <inline-formula id="inf49">
<mml:math id="m64">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">P</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the electricity price for photovoltaic surplus electricity; <inline-formula id="inf50">
<mml:math id="m65">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a weight coefficient, which aims to balance the energy cost saving and maintenance of user&#x2019;s comfort during the optimization process. <inline-formula id="inf51">
<mml:math id="m66">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the index of comfort; <inline-formula id="inf52">
<mml:math id="m67">
<mml:mrow>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the actual starting time of the electrical appliance; <inline-formula id="inf53">
<mml:math id="m68">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the desired starting time; <inline-formula id="inf54">
<mml:math id="m69">
<mml:mrow>
<mml:mo>&#x394;</mml:mo>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the allowed working time.</p>
</sec>
</sec>
<sec id="s3">
<title>3 Applied deep reinforcement learning algorithm</title>
<p>In this paper, a novel DRL algorithm called deep deterministic policy gradient (DDPG) is applied to solve the optimization problem for improving the solving efficiency (<xref ref-type="bibr" rid="B19">Shi et al., 2023</xref>).</p>
<sec id="s3-1">
<title>3.1 Formulate the optimization problem as an MDP</title>
<p>When applying the DRL algorithm, the optimization problem should first be modeled as a Markov Decision Process (MDP), which can be expressed as follows:</p>
<p>State set <italic>S</italic>: the state set is composed of the state of agent at each time-step <italic>t</italic>, which can be represents <inline-formula id="inf55">
<mml:math id="m70">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. The state of agent at each time-step t can be denotes as:<disp-formula id="e16">
<mml:math id="m71">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>V</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>S</mml:mi>
<mml:mi>O</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>S</mml:mi>
<mml:mi>O</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>V</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">P</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(16)</label>
</disp-formula>where <inline-formula id="inf56">
<mml:math id="m72">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the non-dispatchable load at time-step <italic>t</italic>.</p>
<p>Action set <italic>A</italic>: the action set is composed of the action of agent at each time-step <italic>t</italic>, which can be represents <inline-formula id="inf57">
<mml:math id="m73">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>.<disp-formula id="e17">
<mml:math id="m74">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>C</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>V</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(17)</label>
</disp-formula>where <inline-formula id="inf58">
<mml:math id="m75">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the switching variables of interruptible load, uninterruptible load and air conditioning, respectively; <inline-formula id="inf59">
<mml:math id="m76">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>C</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the action of battery at the time-step; <inline-formula id="inf60">
<mml:math id="m77">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>V</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the action of battery at the time-step.</p>
<p>Reward function <italic>R</italic>: The reward at time <italic>t</italic> <inline-formula id="inf61">
<mml:math id="m78">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents an immediate reward, which is obtained when the agent executes action <inline-formula id="inf62">
<mml:math id="m79">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> based on state information <inline-formula id="inf63">
<mml:math id="m80">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. The real-time reward can be formulated as:<disp-formula id="e18">
<mml:math id="m81">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>G</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(18)</label>
</disp-formula>
</p>
<p>Transition Probability <italic>P</italic>: once the current information (such as <inline-formula id="inf64">
<mml:math id="m82">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>) is determined, the probability of transitioning to the next state <inline-formula id="inf65">
<mml:math id="m83">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is fixed.</p>
</sec>
<sec id="s3-2">
<title>3.2 Applied the DDPG algorithm to solve the MDP</title>
<p>Then, the modeled MDP can be solved by proposed DDPG algorithm to obtain the optimal dispatch strategy, which is illustrated in <xref ref-type="fig" rid="F2">Figure 2</xref>. The DDPG algorithm, as an advanced deep reinforcement learning algorithm, is very suitable for solving complex multidimensional optimization problems in continuous action spaces (<xref ref-type="bibr" rid="B35">Zheng et al., 2023</xref>). In the DDPG algorithm, the policy function maps the state to the expected output, while the critical function maps the state and action to the expected maximum output <italic>R</italic>
<sub>t</sub>, which maximizes the action value function <italic>Q</italic>
<sup>
<italic>&#x3c0;</italic>
</sup>(<italic>s</italic>
<sub>t</sub>, <italic>a</italic>
<sub>t</sub>). The calculation formula for the action value function <italic>Q</italic>
<sup>
<italic>&#x3c0;</italic>
</sup>(<italic>s</italic>
<sub>t</sub>, <italic>a</italic>
<sub>t</sub>) is as follows:<disp-formula id="e19">
<mml:math id="m84">
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mi>&#x3c0;</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="double-struck">E</mml:mi>
<mml:mi>&#x3c0;</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
<mml:msub>
<mml:mi mathvariant="double-struck">E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mi>&#x3c0;</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(19)</label>
</disp-formula>
</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Flow chart of the applied DDPG algorithm.</p>
</caption>
<graphic xlink:href="fther-04-1391602-g002.tif"/>
</fig>
<p>The DDPG algorithm is based on the actor critic framework, which consists of two main parts (actor network and critic network), with each part containing two networks (i.e., the main network and the target network). The actor network adjusts the value of the parameters <italic>&#x3b8;</italic>
<sup>
<italic>&#x3bc;</italic>
</sup> in the policy function <italic>&#x3bc;</italic>(<italic>s</italic>&#x7c;<italic>&#x3b8;</italic>
<sup>
<italic>&#x3bc;</italic>
</sup>) by fitting the current state to the corresponding actions. The critic network is used to adjust the value of the parameters <italic>&#x3b8;</italic>
<sup>
<italic>Q</italic>
</sup> in the action-value function <italic>Q</italic> (<italic>s,a</italic>&#x7c;<italic>&#x3b8;</italic>
<sup>
<italic>Q</italic>
</sup>).</p>
<p>The parameters <italic>&#x3b8;</italic>
<sup>
<italic>Q</italic>
</sup> in the critic network are updated by minimizing the value of the loss function &#x2713;(<italic>&#x3b8;</italic>
<sup>
<italic>Q</italic>
</sup>), which is expressed as follows:<disp-formula id="e20">
<mml:math id="m85">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(20)</label>
</disp-formula>where <inline-formula id="inf66">
<mml:math id="m86">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>In the actor network, the parameters <italic>&#x3b8;</italic>
<sup>
<italic>&#x3bc;</italic>
</sup> are updated through the policy gradient function as follows:<disp-formula id="e21">
<mml:math id="m87">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mo>&#x2207;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:msub>
<mml:msup>
<mml:mi>J</mml:mi>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:msup>
<mml:mo>&#x2248;</mml:mo>
<mml:msub>
<mml:mi mathvariant="double-struck">E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:msup>
<mml:mi>&#x3c1;</mml:mi>
<mml:mi>&#x3b2;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mo>&#x2207;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:msub>
<mml:mi>Q</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mfenced open="" close="|" separators="&#x7c;">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mo>&#x2207;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="double-struck">E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:msup>
<mml:mi>&#x3c1;</mml:mi>
<mml:mi>&#x3b2;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mo>&#x2207;</mml:mo>
<mml:mi>a</mml:mi>
</mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
</mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="&#x7c;">
<mml:mrow>
<mml:mfenced open="" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>&#x3b8;</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mo>&#x2207;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(21)</label>
</disp-formula>where <italic>&#x3c1;</italic> represents the discount factor; <italic>&#x3b2;</italic> represents the specific strategy corresponding to the current policy &#x3c0;.</p>
<p>In order to improve the stability and reliability of the learning process of the DDPG algorithm, two different target networks are added to the actor network and the critic network, respectively. They are the target actor network <italic>&#x3bc;&#x27;</italic> (<italic>s</italic>&#x7c;<italic>&#x3b8;</italic>
<sup>
<italic>&#x3bc;&#x27;</italic>
</sup>) and the target critic network <italic>Q&#x27;</italic> (<italic>s</italic>, <italic>a</italic>&#x7c;<italic>&#x3b8;</italic>
<sup>
<italic>Q&#x27;</italic>
</sup>). In each iteration, the weight factors (<italic>&#x3b8;</italic>
<sup>
<italic>&#x3bc;&#x27;</italic>
</sup> and <italic>&#x3b8;</italic>
<sup>
<italic>Q&#x27;</italic>
</sup>) will be soft updated according to the following formulas:<disp-formula id="e22">
<mml:math id="m88">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
<mml:mtext>&#x2002;</mml:mtext>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="{" close="" separators="&#x7c;">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msup>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(22)</label>
</disp-formula>where <italic>&#x3c4;</italic> represents the soft update coefficient, and &#x3c4;&#x3c;&#x3c; 1.</p>
<p>The specific training process of the proposed algorithm is described in <xref ref-type="statement" rid="Algorithm_1">Algorithm 1</xref>, which is shown as below:</p>
<p>
<statement content-type="algorithm" id="Algorithm_1">
<label>Algorithm 1</label>
<p>Training procedures of proposed DDPG method.<list list-type="simple">
<list-item>
<p>1:&#x2003;<bold>Input:</bold> states of agent <inline-formula id="inf67">
<mml:math id="m89">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>2:&#x2003;<bold>Output:</bold> action of agent <inline-formula id="inf68">
<mml:math id="m90">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>3:&#x2003;<bold>Initialize:</bold> the weights of actor and critic networks <inline-formula id="inf69">
<mml:math id="m91">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf70">
<mml:math id="m92">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>; the weights of target networks <inline-formula id="inf71">
<mml:math id="m93">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2322;</mml:mo>
</mml:mover>
<mml:mi>Q</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf72">
<mml:math id="m94">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2322;</mml:mo>
</mml:mover>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>4:&#x2003;<bold>for</bold> episode &#x3d; 1 <bold>to</bold> max episode <bold>do</bold>
</p>
</list-item>
<list-item>
<p>5:&#x2003;&#x2003;Initialize <bold>Environment</bold>
</p>
</list-item>
<list-item>
<p>6:&#x2003;&#x2003;<bold>for</bold> time step &#x3d; 1 <bold>to</bold> max step <bold>do</bold>
</p>
</list-item>
<list-item>
<p>7:&#x2003;&#x2003;&#x2003;Select action <inline-formula id="inf73">
<mml:math id="m95">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> based on <inline-formula id="inf74">
<mml:math id="m96">
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mi>&#x3c0;</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="" close="|" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>8:&#x2003;&#x2003;&#x2003;Execute the actions and obtain the reward <inline-formula id="inf75">
<mml:math id="m97">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, observe the set of next state <inline-formula id="inf76">
<mml:math id="m98">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>9:&#x2003;&#x2003;&#x2003;Store the transition pair <inline-formula id="inf77">
<mml:math id="m99">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> in the replay buffer.</p>
</list-item>
<list-item>
<p>10:&#x2003;&#x2003;<bold>end for</bold>
</p>
</list-item>
<list-item>
<p>11:&#x2003;&#x2003;<bold>If</bold> time step &#x3e;&#x3d; update step <bold>do</bold>
</p>
</list-item>
<list-item>
<p>12:&#x2003;&#x2003;&#x2003;Sample a mini-batch transition from the replay buffer.</p>
</list-item>
<list-item>
<p>13:&#x2003;&#x2003;&#x2003;Minimize the loss function to update the weights of critic network <inline-formula id="inf78">
<mml:math id="m100">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> as Eq. <xref ref-type="disp-formula" rid="e20">20</xref> shows.</p>
</list-item>
<list-item>
<p>14:&#x2003;&#x2003;&#x2003;Update the weights of actor network <inline-formula id="inf79">
<mml:math id="m101">
<mml:mrow>
<mml:msup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> by computed the policy gradient based on Eq. <xref ref-type="disp-formula" rid="e21">21</xref>.</p>
</list-item>
<list-item>
<p>15:&#x2003;&#x2003;&#x2003;Update the weights of target networks based on Eq. <xref ref-type="disp-formula" rid="e22">22</xref>.</p>
</list-item>
<list-item>
<p>16:&#x2003;&#x2003;<bold>end if</bold>
</p>
</list-item>
<list-item>
<p>17:&#x2003;<bold>end for</bold>
</p>
</list-item>
</list>
</p>
</statement>
</p>
</sec>
</sec>
<sec id="s4">
<title>4 Cased study</title>
<sec id="s4-1">
<title>4.1 Case setting</title>
<p>To verify the effectiveness of the proposed method, a smart home energy system is constructed. The simulation period is set as 1&#xa0;day with 24&#xa0;h from 00:00&#x2013;24:00. There are six dispatchable devices in the home, which are shown in the <xref ref-type="table" rid="T3">Table 3</xref>. Note that the superscript &#x201c;&#x2a;&#x201d; in the first column of <xref ref-type="table" rid="T1">Table 1</xref> indicates the household appliance is an uninterrupted load. The PV generation and non-dispatchable load are shown in the <xref ref-type="fig" rid="F3">Figure 3</xref>. The capacity of battery is 3&#xa0;kWh, while the charging/discharging efficiency is 0.95. The minimum and maximum of the state of charge <inline-formula id="inf80">
<mml:math id="m102">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>O</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>min</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf81">
<mml:math id="m103">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>O</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>max</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are 0.2 and 0.9. To meet comfort constraints, the indoor temperature must be limited between 25&#xb0;C and 27&#xb0;C when the air-conditioning is running. The simulation model is constructed in MATLAB 2018b and the training procedure of DRL method is conducted in Python based on a workstation computer with 32&#xa0;GB RAM and Intel Core i9-10920X CPU.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Parameters of dispatchable load (<xref ref-type="bibr" rid="B28">WU et al., 2019</xref>).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Household appliances</th>
<th align="center">Power/kW</th>
<th align="center">Working interval</th>
<th align="center">Required working hours/h</th>
<th align="center">Minimum continuous working hours/h</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Washing machine&#x2a;</td>
<td align="center">1</td>
<td align="center">14:00&#x2013;21:00</td>
<td align="center">2</td>
<td align="center">2</td>
</tr>
<tr>
<td align="center">Rice cooker&#x2a;</td>
<td align="center">0.8</td>
<td align="center">06:00&#x2013;12:00</td>
<td align="center">1</td>
<td align="center">1</td>
</tr>
<tr>
<td align="center">Dishwasher&#x2a;</td>
<td align="center">0.73</td>
<td align="center">18:00&#x2013;24:00</td>
<td align="center">1</td>
<td align="center">1</td>
</tr>
<tr>
<td align="center">Electric vehicle</td>
<td align="center">2.5</td>
<td align="center">00:00&#x2013;08:00</td>
<td align="center">5</td>
<td align="center">1</td>
</tr>
<tr>
<td align="center">Electric bicycle</td>
<td align="center">0.9</td>
<td align="center">00:00&#x2013;08:00</td>
<td align="center">3</td>
<td align="center">1</td>
</tr>
<tr>
<td align="center">Sweeping robot</td>
<td align="center">0.35</td>
<td align="center">10:00&#x2013;18:00</td>
<td align="center">2</td>
<td align="center">1</td>
</tr>
<tr>
<td rowspan="2" align="center">Air-conditioning</td>
<td rowspan="2" align="center">2.1</td>
<td align="center">00:00&#x2013;07:00</td>
<td rowspan="2" align="center">-</td>
<td rowspan="2" align="center">1</td>
</tr>
<tr>
<td align="center">18:00&#x2013;24:00</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>The daily generation data of PV and non-dispatchable load.</p>
</caption>
<graphic xlink:href="fther-04-1391602-g003.tif"/>
</fig>
</sec>
<sec id="s4-2">
<title>4.2 Optimization results obtained by the applied DDPG method</title>
<p>To obtain the optimal dispatch strategy of HEMS, the DDPG algorithm is applied. The hyper-parameters of the agent are set as the <xref ref-type="table" rid="T4">Table 4</xref> shown. The total training episodes is 8,000 for ensuring convergence of agent. Besides, the learning rate of actor and critic network are set as 0.002 and 0.001 for ensuring the exploring ability and decision-making ability, respectively. The soft update coefficient and batch size are set as 0.001 and 256 to stable the training process.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Hyper-parameters settings of the applied DRL model.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Parameter</th>
<th align="center">Value</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Train episodes</td>
<td align="center">8,000</td>
</tr>
<tr>
<td align="center">Learning rate of actor network</td>
<td align="center">0.002</td>
</tr>
<tr>
<td align="center">Learning rate of critic network</td>
<td align="center">0.001</td>
</tr>
<tr>
<td align="center">Soft update coefficient</td>
<td align="center">0.001</td>
</tr>
<tr>
<td align="center">Batch size</td>
<td align="center">256</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>At each episode, the agent gets current state from the constructed home energy system, and then give the decided action back. The changes of reward of the applied DRL method during the whole training episodes is illustrated in the <xref ref-type="fig" rid="F4">Figure 4</xref>. It can be obtained that the reward stays in a low range with an average value &#x2212;21 in the first 2000 episodes, which indicates that the agent cannot finds the optimal policy for HMS dispatching. Then, the reward rises gradually to &#x2212;14 and then converges to &#x2212;13 after the ceaseless interaction between agents and environment, which means the agent can obtain better strategy for dispatching the system.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Reward curve during the training episodes.</p>
</caption>
<graphic xlink:href="fther-04-1391602-g004.tif"/>
</fig>
<p>After the agent is well-trained, the optimal energy management strategy for HES can be obtained. The results of the dispatch optimization for devices are presented in <xref ref-type="fig" rid="F5">Figure 5</xref>. The needs of non-dispatchable devices are satisfied first. Then, the dispatchable devices should be dispatch with consideration of the real-time electricity price and permitted working interval of each device. It can be observed that all the uninterruptable devices are scheduled at a relatively low price point for saving the total cost. For example, the work time-point of washing machine is scheduled at 19:00 and 20:00 caused by the low price. Thus, the dispatch strategy of the uninterruptible devices is quite reasonable.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>The optimization result of the dispatchable and non-dispatchable devices.</p>
</caption>
<graphic xlink:href="fther-04-1391602-g005.tif"/>
</fig>
<p>Furthermore, the interruptible devices can be dispatchable at discontinuous time-point, whose dispatch strategy can be more flexible. When dispatching the interruptible devices, the system cost should be the first and only consider factor. It can be obtained that the EV and Ebike are scheduled to charge during the 00:00&#x2013;06:00 cause the lower electricity price. Therefore, both the interruptible and uninterruptible devices can be reasonably scheduled after the agent is well-trained, which means that the proposed method can effectively realize the HEM optimal operation.</p>
<p>When dispatching the air-conditioning device, the comfort factor should be taken into account. The indoor temperature changes like a non-linear process when the air-conditioning working. Thus, the air-conditioning does not need to working continuous with consideration of cost saving. The dispatching result of air-conditioning and the indoor temperature are shown in <xref ref-type="fig" rid="F6">Figure 6</xref>. Note that the comfort constraint only set between 00:00&#x2013;07:00 and 18:00&#x2013;24:00. It can be obtained that the air-conditioning is scheduled to work at 5&#xa0;hours for keep the indoor temperature between 25&#xb0;C&#x2013;27&#xb0;C. As the temperature curve shows, the indoor temperature always stays between 25&#xb0;C and 27&#xb0;C, which indicates the comfort constraint can be well limited.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Simulation curves of indoor temperature.</p>
</caption>
<graphic xlink:href="fther-04-1391602-g006.tif"/>
</fig>
<p>Generally, the energy storage device can store electricity during lower electricity price periods and release it during higher prices to reduce system costs. Thus, an energy storage device is equipped in the paper. The SOC curve of the applied energy storage device is illustrated in <xref ref-type="fig" rid="F7">Figure 7</xref>. It can be found that the energy storage device charging when electricity price is low and discharging when the price is high, which can effectively reduce the system cost. Hence, the results effectively demonstrate that the proposed approach can efficiently schedule the energy storage device in real-time to reduce the operating cost after the agent is well-trained.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>SOC curve of the test day.</p>
</caption>
<graphic xlink:href="fther-04-1391602-g007.tif"/>
</fig>
</sec>
<sec id="s4-3">
<title>4.3 Comparison results with other benchmarks</title>
<p>The above results have verified the effectiveness of the proposed method. To further verify the effectiveness and progressiveness of the proposed method, the proposed method is separately compared with the optimization method based on stochastic programming (SP) and the optimization method based on deterministic optimization (DO) (<xref ref-type="bibr" rid="B1">Alzahrani et al., 2023</xref>). The difference between SP and DO is that DO only consider optimization problems in deterministic scenarios, which does not consider uncertainties of PV and loads.</p>
<p>The optimization results of the three algorithms are shown in <xref ref-type="table" rid="T5">Table 5</xref>. Compared to traditional optimization methods, the proposed method can better cope with the uncertainty of PV output and load demand to achieve better optimization results. It can be obtained that the proposed method can achieve the lowest total cost compared with other two method, which the total operation cost can be reduced by 21.9% at least. The proposed method can reasonably schedule the different types of appliances for reducing the cost of purchasing electricity and improving revenue from selling electricity. Besides, the proposed can maintain the highest comfort for the home users by reasonably dispatching the switching time of air-conditioning. The DO method solves the modelled optimization problem under deterministic conditions, and the final cancelled optimization effect is not significantly different from the optimization effect of the proposed method. This also fully demonstrates the effectiveness of the proposed method. However, the DO method cannot address the issue of output uncertainty and is not applicable to actual operating conditions. Therefore, the proposed method is more suitable for optimizing the operation of the HES in uncertain environments.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Comparison results of different methods.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Method</th>
<th align="center">Total cost/RMB</th>
<th align="center">Cost of purchasing electricity/RMB</th>
<th align="center">Revenue from selling electricity/RMB</th>
<th align="center">Average comfort</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">SP</td>
<td align="center">34.73</td>
<td align="center">50.21</td>
<td align="center">15.48</td>
<td align="center">0.79</td>
</tr>
<tr>
<td align="center">DO</td>
<td align="center">32.78</td>
<td align="center">45.63</td>
<td align="center">12.85</td>
<td align="center">0.86</td>
</tr>
<tr>
<td align="center">Proposed</td>
<td align="center">26.89</td>
<td align="center">40.60</td>
<td align="center">13.71</td>
<td align="center">0.89</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec sec-type="conclusion" id="s5">
<title>5 Conclusion</title>
<p>This paper proposes an optimized scheduling model for home energy management to minimize costs of household users with consideration of comfort of user. To enhance solution efficiency, a novel DRL-based algorithm call DDPG is applied to solve the optimization problem. Firstly, the results show that the proposed method can effectively dispatch both interruptible and uninterruptible loads, so the total cost of household user is obviously reduced while maintain high comfort. The optimal dispatch problem of HEMS is modeled as a MDP and solved by DDPG algorithm. The agent has converged after 8,000 episodes training, which means that the proposed DRL method can obtain the optimal policy for dispatching the HEMS. In the future work, the multi-agent deep reinforcement learning algorithm will be used to improve the efficiency of model training and decision making.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The raw data supporting the conclusion of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s7">
<title>Author contributions</title>
<p>TP: Conceptualization, Software, Writing&#x2013;original draft. ZZ: Conceptualization, Data curation, Formal Analysis, Writing&#x2013;review and editing. HL: Conceptualization, Investigation, Methodology, Software, Writing&#x2013;original draft. CL: Investigation, Methodology, Project administration, Resources, Writing&#x2013;original draft. XJ: Conceptualization, Methodology, Writing&#x2013;review and editing. ZM: Conceptualization, Methodology, Supervision, Writing&#x2013;review and editing. XC: Conceptualization, Investigation, Writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s8">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This work was supported by the Southern Power Grid Corporation Technology Project under Grant 036000KK52222004 (GDKJXM20222117).</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>Authors TP, HL, and XJ were employed by Southern Power Grid Research Institute Co., Ltd. Authors ZZ, CL, ZM, and XC were employed by Power Dispatch Control Center of Guangdong Power Grid Co., Ltd.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alzahrani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rahman</surname>
<given-names>M. U.</given-names>
</name>
<name>
<surname>Hafeez</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Rukh</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Ali</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Murawwat</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>A strategy for multi-objective energy optimization in smart grid considering renewable energy and batteries energy storage system</article-title>. <source>IEEE Access</source> <volume>11</volume>, <fpage>33872</fpage>&#x2013;<lpage>33886</lpage>. <pub-id pub-id-type="doi">10.1109/access.2023.3263264</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ben Slama</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mahmoud</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A deep learning model for intelligent home energy management system using renewable energy</article-title>. <source>Eng. Appl. Artif. Intell.</source> <volume>123</volume>, <fpage>106388</fpage>. <pub-id pub-id-type="doi">10.1016/j.engappai.2023.106388</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chakir</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Abid</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Tabaa</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hachimi</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Demand-side management strategy in a smart home using electric vehicle and hybrid renewable energy system</article-title>. <source>Energy Rep.</source> <volume>8</volume>, <fpage>383</fpage>&#x2013;<lpage>393</lpage>. <pub-id pub-id-type="doi">10.1016/j.egyr.2022.07.018</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dongdong</surname>
<given-names>Y. G. M. Z. Z. L. L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Home energy management strategy for Co-scheduling of electric vehicle and energy storage device</article-title>. <source>Proc. CSU-EPSA</source> <volume>32</volume>, <fpage>25</fpage>&#x2013;<lpage>33</lpage>.</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dorahaki</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rashidinejad</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Fatemi Ardestani</surname>
<given-names>S. F.</given-names>
</name>
<name>
<surname>Abdollahi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Salehizadeh</surname>
<given-names>M. R.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A home energy management model considering energy storage and smart flexible appliances: a modified time-driven prospect theory approach</article-title>. <source>J. Energy Storage</source> <volume>48</volume>, <fpage>104049</fpage>. <pub-id pub-id-type="doi">10.1016/j.est.2022.104049</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Esmaeel Nezhad</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rahimnejad</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Gadsden</surname>
<given-names>S. A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Home energy management system for smart buildings with inverter-based air conditioning system</article-title>. <source>Int. J. Electr. Power &#x26; Energy Syst.</source>, <fpage>133</fpage>. <pub-id pub-id-type="doi">10.1016/j.ijepes.2021.107230</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gomes</surname>
<given-names>I. L. R.</given-names>
</name>
<name>
<surname>Ruano</surname>
<given-names>M. G.</given-names>
</name>
<name>
<surname>Ruano</surname>
<given-names>A. E.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>MILP-based model predictive control for home energy management systems: a real case study in Algarve, Portugal</article-title>. <source>Energy Build.</source> <volume>281</volume>, <fpage>112774</fpage>. <pub-id pub-id-type="doi">10.1016/j.enbuild.2023.112774</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hafeez</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Alimgeer</surname>
<given-names>K. S.</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2020b</year>). <article-title>Electric load forecasting based on deep learning and optimized by heuristic algorithm in smart grid</article-title>. <source>Appl. Energy</source> <volume>269</volume>, <fpage>114915</fpage>. <pub-id pub-id-type="doi">10.1016/j.apenergy.2020.114915</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hafeez</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Alimgeer</surname>
<given-names>K. S.</given-names>
</name>
<name>
<surname>Wadud</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Usman</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Qazi</surname>
<given-names>A. B.</given-names>
</name>
<etal/>
</person-group> (<year>2020a</year>). <article-title>An innovative optimization strategy for efficient energy management with day-ahead demand response signal and energy consumption forecasting in smart grid using artificial neural network</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>84415</fpage>&#x2013;<lpage>84433</lpage>. <pub-id pub-id-type="doi">10.1109/access.2020.2989316</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hafeez</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Jan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Shah</surname>
<given-names>I. A.</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>F. A.</given-names>
</name>
<name>
<surname>Derhab</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A novel hybrid load forecasting framework with intelligent feature engineering and optimization algorithm in smart grid</article-title>. <source>Appl. Energy</source> <volume>299</volume>, <fpage>117178</fpage>. <pub-id pub-id-type="doi">10.1016/j.apenergy.2021.117178</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hafeez</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wadud</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>I. U.</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Shafiq</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Usman</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2020c</year>). <article-title>Efficient energy management of IoT-enabled smart homes under price-based demand response program in smart grid</article-title>. <source>Sensors</source> <volume>20</volume> (<issue>11</issue>), <fpage>3155</fpage>. <pub-id pub-id-type="doi">10.3390/s20113155</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hussain</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Imran Azim</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lai</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Eicker</surname>
<given-names>U.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Multi-stage optimization for energy management and trading for smart homes considering operational constraints of a distribution network</article-title>. <source>Energy Build.</source> <volume>301</volume>, <fpage>113722</fpage>. <pub-id pub-id-type="doi">10.1016/j.enbuild.2023.113722</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huy</surname>
<given-names>T. H. B.</given-names>
</name>
<name>
<surname>Truong Dinh</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ngoc Vo</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Real-time energy scheduling for home energy management systems with an energy storage system and electric vehicle based on a supervised-learning-based strategy</article-title>. <source>Energy Convers. Manag.</source> <volume>292</volume>, <fpage>117340</fpage>. <pub-id pub-id-type="doi">10.1016/j.enconman.2023.117340</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kikusato</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Mori</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Yoshizawa</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Fujimoto</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Asano</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Hayashi</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Electric vehicle charge&#x2013;discharge management for utilization of photovoltaic by coordination between home and grid energy management systems</article-title>. <source>IEEE Trans. Smart Grid</source> <volume>10</volume> (<issue>3</issue>), <fpage>3186</fpage>&#x2013;<lpage>3197</lpage>. <pub-id pub-id-type="doi">10.1109/tsg.2018.2820026</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Jeong</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Stochastic optimization of home energy management system using clustered quantile scenario reduction</article-title>. <source>Appl. Energy</source> <volume>349</volume>, <fpage>121555</fpage>. <pub-id pub-id-type="doi">10.1016/j.apenergy.2023.121555</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Lei</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Olawoore</surname>
<given-names>V. S.</given-names>
</name>
<name>
<surname>Shuai</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Numerical model study on influences of photovoltaic plants on local microclimate</article-title>. <source>Renew. Energy</source> <volume>221</volume>, <fpage>119551</fpage>. <pub-id pub-id-type="doi">10.1016/j.renene.2023.119551</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xing</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A home energy management system incorporating data-driven uncertainty-aware user preference</article-title>. <source>Appl. Energy</source> <volume>326</volume>, <fpage>119911</fpage>. <pub-id pub-id-type="doi">10.1016/j.apenergy.2022.119911</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ren</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Nie</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>A data-driven DRL-based home energy management system optimization framework considering uncertain household parameters</article-title>. <source>Appl. Energy</source> <volume>355</volume>, <fpage>122258</fpage>. <pub-id pub-id-type="doi">10.1016/j.apenergy.2023.122258</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shi</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lao</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>K. Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>DDPG-based load frequency control for power systems with renewable energy by DFIM pumped storage hydro unit</article-title>. <source>Renew. Energy</source> <volume>218</volume>, <fpage>119274</fpage>. <pub-id pub-id-type="doi">10.1016/j.renene.2023.119274</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Song</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Multi-objective optimization strategy for home energy management system including PV and battery energy storage</article-title>. <source>Energy Rep.</source> <volume>8</volume>, <fpage>5396</fpage>&#x2013;<lpage>5411</lpage>. <pub-id pub-id-type="doi">10.1016/j.egyr.2022.04.023</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tostado-V&#xe9;liz</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ar&#xe9;valo</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Kamel</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zawbaa</surname>
<given-names>H. M.</given-names>
</name>
<name>
<surname>Jurado</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Home energy management system considering effective demand response strategies and uncertainties</article-title>. <source>Energy Rep.</source> <volume>8</volume>, <fpage>5256</fpage>&#x2013;<lpage>5271</lpage>. <pub-id pub-id-type="doi">10.1016/j.egyr.2022.04.006</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tostado-V&#xe9;liz</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hasanien</surname>
<given-names>H. M.</given-names>
</name>
<name>
<surname>Turky</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Rezaee Jordehi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mansouri</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Jurado</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2023b</year>). <article-title>A fully robust home energy management model considering real time price and on-board vehicle batteries</article-title>. <source>J. Energy Storage</source> <volume>72</volume>, <fpage>108531</fpage>. <pub-id pub-id-type="doi">10.1016/j.est.2023.108531</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tostado-V&#xe9;liz</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Rezaee Jordehi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hasanien</surname>
<given-names>H. M.</given-names>
</name>
<name>
<surname>Turky</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Jurado</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2023a</year>). <article-title>A novel stochastic home energy management system considering negawatt trading</article-title>. <source>Sustain. Cities Soc.</source> <volume>97</volume>, <fpage>104757</fpage>. <pub-id pub-id-type="doi">10.1016/j.scs.2023.104757</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ullah</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Hafeez</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Jan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Javaid</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A multi-objective energy optimization in smart grid with high penetration of renewable energy sources</article-title>. <source>Appl. Energy</source> <volume>299</volume>, <fpage>117104</fpage>. <pub-id pub-id-type="doi">10.1016/j.apenergy.2021.117104</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>ur Rehman</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Yaqoob</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Adil Khan</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Optimal power management framework for smart homes using electric vehicles and energy storage</article-title>. <source>Int. J. Electr. Power &#x26; Energy Syst.</source> <volume>134</volume>, <fpage>107358</fpage>. <pub-id pub-id-type="doi">10.1016/j.ijepes.2021.107358</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Qiu</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Robust energy management through aggregation of flexible resources in multi-home micro energy hub</article-title>. <source>Appl. Energy</source> <volume>357</volume>, <fpage>122471</fpage>. <pub-id pub-id-type="doi">10.1016/j.apenergy.2023.122471</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Weil</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Bibri</surname>
<given-names>S. E.</given-names>
</name>
<name>
<surname>Longchamp</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Golay</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Alahi</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Urban digital twin challenges: a systematic review and perspectives for sustainable smart cities</article-title>. <source>Sustain. Cities Soc.</source> <volume>99</volume>, <fpage>104862</fpage>. <pub-id pub-id-type="doi">10.1016/j.scs.2023.104862</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>H. W. C.</given-names>
</name>
<name>
<surname>Zuo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Home energy system optimization based on time-of-use price and real-time control strategy of battery</article-title>. <source>Power Syst. Prot. Control</source> <volume>47</volume> (<issue>19</issue>), <fpage>23</fpage>&#x2013;<lpage>30</lpage>.</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xia</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Fang</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Hybrid robust-stochastic optimal scheduling for multi-objective home energy management with the consideration of uncertainties</article-title>. <source>Energy</source> <volume>290</volume>, <fpage>130047</fpage>. <pub-id pub-id-type="doi">10.1016/j.energy.2023.130047</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2023a</year>). <article-title>Coordinated volt/VAR control for photovoltaic inverters: a soft actor-critic enhanced droop control approach</article-title>. <source>Int. J. Electr. Power &#x26; Energy Syst.</source> <volume>149</volume>, <fpage>109019</fpage>. <pub-id pub-id-type="doi">10.1016/j.ijepes.2023.109019</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2023b</year>). <article-title>Coordinated energy management strategy for multi-energy hub with thermo-electrochemical effect based power-to-ammonia: a multi-agent deep reinforcement learning enabled approach</article-title>. <source>Renew. Energy</source> <volume>214</volume>, <fpage>216</fpage>&#x2013;<lpage>232</lpage>. <pub-id pub-id-type="doi">10.1016/j.renene.2023.05.067</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yin</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Qin</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Cooperative optimization strategy for large-scale electric vehicle charging and discharging</article-title>. <source>Energy</source> <volume>258</volume>, <fpage>124969</fpage>. <pub-id pub-id-type="doi">10.1016/j.energy.2022.124969</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Youssef</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kamel</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Hassan</surname>
<given-names>M. H.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Safaraliev</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>A smart home energy management approach incorporating an enhanced northern goshawk optimizer to enhance user comfort, minimize costs, and promote efficient energy consumption</article-title>. <source>Int. J. Hydrogen Energy</source> <volume>49</volume>, <fpage>644</fpage>&#x2013;<lpage>658</lpage>. <pub-id pub-id-type="doi">10.1016/j.ijhydene.2023.10.174</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Chan</surname>
<given-names>A. P. C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Developing smart buildings to reduce indoor risks for safety and health of the elderly: a systematic and bibliometric analysis</article-title>. <source>Saf. Sci.</source> <volume>168</volume>, <fpage>106310</fpage>. <pub-id pub-id-type="doi">10.1016/j.ssci.2023.106310</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Tao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hartikainen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Duan</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>DDPG based LADRC trajectory tracking control for underactuated unmanned ship under environmental disturbances</article-title>. <source>Ocean. Eng.</source> <volume>271</volume>, <fpage>113667</fpage>. <pub-id pub-id-type="doi">10.1016/j.oceaneng.2023.113667</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>