<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mech. Eng</journal-id>
<journal-title>Frontiers in Mechanical Engineering</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mech. Eng</abbrev-journal-title>
<issn pub-type="epub">2297-3079</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">840310</article-id>
<article-id pub-id-type="doi">10.3389/fmech.2022.840310</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Mechanical Engineering</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Modeling and Predicting Heavy-Duty Vehicle Engine-Out and Tailpipe Nitrogen Oxide (<italic>NO</italic>
<sub>
<italic>x</italic>
</sub>) Emissions Using Deep Learning</article-title>
<alt-title alt-title-type="left-running-head">Pillai et&#x20;al.</alt-title>
<alt-title alt-title-type="right-running-head">Modeling NOx Using Deep Learning</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Pillai</surname>
<given-names>Rinav</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1607142/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Triantopoulos</surname>
<given-names>Vassilis</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1679828/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Berahas</surname>
<given-names>Albert S.</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1617837/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Brusstar</surname>
<given-names>Matthew</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1608218/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sun</surname>
<given-names>Ruonan</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1624250/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Nevius</surname>
<given-names>Tim</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1679339/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Boehman</surname>
<given-names>Andr&#xe9; L.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">
<sup>&#x2a;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1450091/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Mechanical Engineering</institution>, <institution>University of Michigan</institution>, <addr-line>Ann Arbor</addr-line>, <addr-line>MI</addr-line>, <country>United&#x20;States</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Plasma Science and Fusion Center</institution>, <institution>Massachusetts Institute of Technology</institution>, <addr-line>Cambridge</addr-line>, <addr-line>MA</addr-line>, <country>United&#x20;States</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Industrial and Operations Engineering</institution>, <institution>University of Michigan</institution>, <addr-line>Ann Arbor</addr-line>, <addr-line>MI</addr-line>, <country>United&#x20;States</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>National Vehicle and Fuel Emissions Laboratory</institution>, <institution>U.S. EPA</institution>, <addr-line>Ann Arbor</addr-line>, <addr-line>MI</addr-line>, <country>United&#x20;States</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Horiba Instruments Inc.</institution>, <addr-line>Saline</addr-line>, <addr-line>MI</addr-line>, <country>United&#x20;States</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1072915/overview">Weiqi Ji</ext-link>, Robert Bosch, United&#x20;States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1609453/overview">Florian Vom Lehn</ext-link>, RWTH Aachen University, Germany</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1609902/overview">Weiyu Cao</ext-link>, Rivian Automotive LLC, United&#x20;States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1495056/overview">Opeoluwa Owoyele</ext-link>, Louisiana State University, United&#x20;States</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Andr&#xe9; L. Boehman, <email>boehman@umich.edu</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Engine and Automotive Engineering, a section of the journal Frontiers in Mechanical Engineering</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>03</day>
<month>03</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>8</volume>
<elocation-id>840310</elocation-id>
<history>
<date date-type="received">
<day>21</day>
<month>12</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>02</day>
<month>02</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Pillai, Triantopoulos, Berahas, Brusstar, Sun, Nevius and Boehman.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Pillai, Triantopoulos, Berahas, Brusstar, Sun, Nevius and Boehman</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these&#x20;terms.</p>
</license>
</permissions>
<abstract>
<p>As emissions regulations for transportation become stricter, it is increasingly important to develop accurate nitrogen oxide (<italic>NO</italic>
<sub>
<italic>x</italic>
</sub>) emissions models for heavy-duty vehicles. However, estimation of transient <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions using physics-based models is challenging due to its highly dynamic nature, which arises from the complex interactions between power demand, engine operation, and exhaust aftertreatment efficiency. As an alternative to physics-based models, a multi-dimensional data-driven approach is proposed as a framework to estimate <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions across an extensive set of representative engine and exhaust aftertreatment system operating conditions. This paper employs Deep Neural Networks (DNN) to develop two models, an engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> and a tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> model, to predict heavy-duty vehicle <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions. The DNN models were developed using variables that are available from On-board Diagnostics from two datasets, an engine dynamometer and a chassis dynamometer dataset. Results from trained DNN models using the engine dynamometer dataset showed that the proposed approach can predict <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions with high accuracy, where <italic>R</italic>
<sup>2</sup> scores are higher than 0.99 for both engine-out and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> models on cold/hot Federal Test Procedure (FTP) and Ramped Mode Cycle (RMC) data. Similarly, the engine-out and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> models using the chassis dynamometer dataset achieved <italic>R</italic>
<sup>2</sup> scores of 0.97 and 0.93, respectively. All models developed in this study have a mean absolute error percentage of approximately 1% relative to maximum <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> in the datasets, which is comparable to that of physical <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions measurement analyzers. The input feature importance studies conducted in this work indicate that high accuracy DNN models (<italic>R</italic>
<sup>2</sup> &#x3d; 0.92&#x2013;0.95) could be developed by utilizing minimal significant engine and aftertreatment inputs. This study also demonstrates that DNN <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions models can be very effective tools for fault detection in Selective Catalytic Reduction (SCR) systems.</p>
</abstract>
<kwd-group>
<kwd>heavy-duty vehicles</kwd>
<kwd>nitrogen oxide emissions</kwd>
<kwd>data-driven modelling</kwd>
<kwd>deep learning</kwd>
<kwd>artificial neural networks</kwd>
<kwd>optimization</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Heavy-duty vehicles employ compression ignition engines due to their high power density, reliability and powertrain efficiency. Even with the anticipated changes in Greenhouse Gas regulations, diesel engine-powered trucks will continue to be used in heavy-duty transportation for several years, especially in the legacy fleet (<xref ref-type="bibr" rid="B22">EPA, 2021b</xref>). Also, the heavy-duty transportation sector is more challenging to electrify due to the need for high energy storage, fast charging rates and high ranges for long-haul movement of goods (<xref ref-type="bibr" rid="B6">Askin et&#x20;al., 2015</xref>; <xref ref-type="bibr" rid="B14">Brown et&#x20;al., 2020</xref>). However, diesel engines emit significant amounts of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> (nitric oxide and nitrogen dioxide) which is designated as a criteria pollutant by the EPA (<xref ref-type="bibr" rid="B58">Winkler et&#x20;al., 2018</xref>), and has been shown to cause respiratory illness such as asthma and chronic lung disease upon prolonged exposure. <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> is also a contributor to the formation of smog, acid rain and ozone at ground levels (<xref ref-type="bibr" rid="B12">Boningari and Smirniotis, 2016</xref>). Stringent emissions regulations have therefore been put in place to curb vehicular <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions (<xref ref-type="bibr" rid="B22">EPA, 2021b</xref>). This has put tremendous pressure on the diesel engine industry to design and develop technologies that limit <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions from the engine and from the tailpipe using exhaust aftertreatment systems. Accurate estimation of instantaneous engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions has therefore become essential to improve engine control strategies for <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> reduction. From a regulations perspective, accurate models for tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> predictions are important in understanding the potential for future emissions reductions, and as a tool for identifying possible modes of non-compliance during in-use operation.</p>
<p>Formation of engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> is the result of complex chemical reactions at high temperature within the combustion chamber, and therefore strongly depends on the engine operating condition. On the other hand, tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions are highly dependent on the performance of the Selective Catalytic Reduction (SCR) aftertreatment system. Past studies have made use of thermophysical and chemical models to estimate <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions. <xref ref-type="bibr" rid="B40">Mentink et&#x20;al. (2017)</xref> developed a virtual engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> sensor using a physics-based nitric oxide (<italic>NO</italic>) formation model and an empirical correlation to determine nitrogen dioxide (<italic>NO</italic>
<sub>2</sub>) fraction of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>. A semi-empirical two-zone model was developed by <xref ref-type="bibr" rid="B47">Provataris et&#x20;al. (2017)</xref> that made use of measured in-cylinder pressure data and a physics-based model to estimate <italic>NO</italic> formation in the combustion chamber. <xref ref-type="bibr" rid="B15">Camporeale et&#x20;al. (2017)</xref> used in-cylinder pressure sensor signal to create a grey-box <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> raw emissions model. The model uses combustion parameters such as adiabatic flame temperature and heat release rate to estimate engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions. Multiple studies have also used computational fluid dynamics models to model the changes in temperature and composition in the combustion chamber to better estimate <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions production (<xref ref-type="bibr" rid="B42">Mobasheri et&#x20;al., 2012</xref>; <xref ref-type="bibr" rid="B18">Dahifale and Patil, 2017</xref>). These models use first principles to estimate <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions and therefore can achieve high extrapolation capabilities. However, they require high computational time and cost, large number of assumptions and the need for laborious manual configuration for different engines.</p>
<p>In the past several years, increasing large quantities of data is being collected through engine and chassis dynamometer laboratory tests due to complex powertrain units with greater number of actuators and finer control. This has led to growing interest in the use of machine learning to develop predictive models for <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions. Using machine learning, accurate data-driven models can be developed without requiring explicit solution to the governing equations that describe the physics of the system. <xref ref-type="bibr" rid="B45">Selvam et&#x20;al. (2021)</xref> used measurements from On-board Diagnostics (OBD) sensors to calculate combustion variables like adiabatic flame temperature, oxygen concentration and combustion time. These variables were then used as inputs to an ensemble based method called Random Forests to estimate engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions for five different heavy-duty engines. The model was evaluated to have an average <italic>R</italic>
<sup>2</sup> value of 0.72 and a mean absolute error (MAE) of 78 parts per million (ppm). A hybrid model consisting of a physics-based model and a machine learning approach was proposed by <xref ref-type="bibr" rid="B43">Mohammad et&#x20;al. (2021)</xref>. The model combined a physical and chemical model developed in GT-Suite with a Support Vector Machine and Feed-Forward Artificial Neural Network (FFNN). The model was validated using 772 steady state operating points for a 13L heavy-duty diesel engine and showed good accuracy with an <italic>R</italic>
<sup>2</sup> score of 0.99 and root mean square error (RMSE) of 23&#xa0;ppm. However, this model was not tested on transient operating conditions. <xref ref-type="bibr" rid="B31">Johri and Filipi (2014)</xref> describes the development of a Neuro-Fuzzy Model to predict transient <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> and soot emissions. This model divides the problem into multiple sub-problems which are individually identified using a simpler class of models. Polynomial and neural network models were used as choices for the local models with validity functions that determine the regions of input space where the local model is active. The model was tested on Unites States Federal city-driving schedule (FTP75) cycle data for a 6.4L heavy-duty engine. The model predictions were in good agreement with the total cumulative <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> measured over the test&#x20;cycle.</p>
<p>Deep Learning has been shown to be adept at discovering intricate structures in high-dimensional data and has applications in various domains such as science, business and government (<xref ref-type="bibr" rid="B37">LeCun et&#x20;al., 2015</xref>; <xref ref-type="bibr" rid="B29">Goodfellow et&#x20;al., 2016</xref>). A virtual <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> sensor using Recurrent Neural Network (RNN) was proposed by <xref ref-type="bibr" rid="B5">Arsie et&#x20;al. (2013)</xref>. Data for training was collected by running different test cycles like the New European Driving Cycle (NEDC) on a 1.3L light-duty diesel engine. Engine Control Unit (ECU) variables including engine speed, air mass flow, boost pressure, fuel mass, start of injection (SOI) and air fuel ratio (AFR) were used as inputs to the network. Pruning techniques were used to improve generalization capability of the model. The authors reported <italic>R</italic>
<sup>2</sup> values between 0.83&#x2013;0.91 for different test sets with RMSE values ranging from 47&#x2013;122&#xa0;ppm. <xref ref-type="bibr" rid="B25">Fischer (2013)</xref> developed a virtual <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> sensor for a 2.2L light-duty diesel engine using Self-Organizing Map algorithm&#x2014;a type of ANN which makes use of a selector and estimator layer. Six input parameters including engine speed, fuel quantity, lambda, air mass flow, boost pressure and exhaust gas temperature were used to estimate engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>. The model showed good accuracy on the Artemis Urban Test Cycle with an error of 1.57% between the total measured and predicted <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> over the cycle. <xref ref-type="bibr" rid="B61">Zhang et&#x20;al. (2015)</xref> proposed using an ANN model to predict engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions using ECU variables like engine speed, torque, injection timing, air flow rate, rail pressure and oil temperature as inputs. The train and test data consisted of a &#x201c;Chirp&#x201d; cycle, Hot and Cold Start drive cycles collected from a 2L light-duty diesel engine. Total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> estimated from the model deviated on an average about 5.8% from the measured total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> over the test cycle. <xref ref-type="bibr" rid="B9">Bellone et&#x20;al. (2020)</xref> compared a Convolutional Neural Network (CNN) and Long-Short Term Memory (LSTM) network models to predict engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emission, soot and fuel consumption for a heavy-duty 8L diesel engine. Input parameters included ECU variables such as engine speed, fuel flow/cyl., injection angle, rail pressure, wastegate position, exhaust gas recirculation (EGR) position, exhaust temperature, main, post and pre-injection quantity, inlet pressure, pre-injection angle and throttle position. The CNN model captured 98.64% of the total test cycle <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions with an <italic>R</italic>
<sup>2</sup> of 0.993, while the LSTM model captured 99% of the total test cycle <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions with and <italic>R</italic>
<sup>2</sup> of&#x20;0.995.</p>
<p>
<xref ref-type="bibr" rid="B51">Shin et&#x20;al. (2020)</xref> developed an engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions model for 1.6L light-duty diesel engine using Deep Neural Networks (DNN) by training the model on the Worldwide Harmonized Light Vehicles Test Procedure (WLTP) cycle. They used Bayesian hyperparameter optimization to find the optimal DNN architecture. The accuracy of the model was indicated by an <italic>R</italic>
<sup>2</sup> value of 0.9675 and MAE of 17&#xa0;ppm using 14 input variables from the ECU. <xref ref-type="bibr" rid="B60">Yu et&#x20;al. (2021)</xref> presented a method for estimating tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions by complete ensemble empirical model decomposition with adaptive noise and an LSTM network. They used on-road data from the OBD sensors of a diesel bus to train the network. They reported good model accuracy with an <italic>R</italic>
<sup>2</sup> value of 0.98 with RMSE of 46.11&#xa0;ppm on the test data. A steady state engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions model was proposed by <xref ref-type="bibr" rid="B38">Lee et&#x20;al. (2021)</xref> using a DNN model. The model used 8 ECU parameters including engine speed, brake mean effective pressure, EGR rate, air mass, fuel mass, injection timing, boost pressure and injection pressure as inputs. 696 steady state conditions were evaluated using the model with good accuracy indicated by an <italic>R</italic>
<sup>2</sup> of&#x20;0.98.</p>
<p>However, the previously conducted studies have developed DNN models for engine-out and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions without taking into account the effect of SCR performance which is an essential component affecting overall <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> production. With the light-duty industry expected to be increasingly electrified in the near future, more emphasis also needs to be placed on developing accurate models for estimation of heavy-duty diesel engine <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions - both engine-out and tailpipe. Additionally, many of the models also make use of ECU variables, such as fuel injection timing, swirl ratio, and injection angle (<xref ref-type="bibr" rid="B9">Bellone et&#x20;al., 2020</xref>; <xref ref-type="bibr" rid="B51">Shin et&#x20;al., 2020</xref>; <xref ref-type="bibr" rid="B38">Lee et&#x20;al., 2021</xref>), which may not be readily available except with proprietary access. Deep Learning models can be highly accurate, but are inherently considered to be black-box models, and therefore it is difficult to interpret their predictions. However, it is crucial to understand for example why <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions are higher than expected under given engine operating conditions. Multiple papers have used percent of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> captured or total test cycle <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> error as an evaluation metric for <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> prediction using DNN (<xref ref-type="bibr" rid="B25">Fischer, 2013</xref>; <xref ref-type="bibr" rid="B31">Johri and Filipi, 2014</xref>; <xref ref-type="bibr" rid="B9">Bellone et&#x20;al., 2020</xref>). The limitations of using this metric in evaluating DNN model accuracy for predicting <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions has been discussed in the current work and new improved instantaneous error metrics for this application have been proposed.</p>
<p>This work tries to address the aforementioned shortcomings of the existing work in the literature by developing accurate engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> models using DNN. The DNN models were trained and tested using two different datasets - an engine-aftertreatment dynamometer dataset and a chassis dynamometer dataset on two 6.7L heavy-duty bus engines (different model years) using non-proprietary variables that are available from the OBD as model inputs. The outline and contributions of this work are as follows:<list list-type="simple">
<list-item>
<p>&#x2022; Application of DNN to develop engine-out and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions models for heavy-duty diesel engines using physics inspired inputs readily available from the OBD, while demonstrating high accuracy on both train and test datasets.</p>
</list-item>
<list-item>
<p>&#x2022; Development of a DNN model for tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions using SCR aftertreatment information such as SCR inlet and outlet temperatures and exhaust mass flow rate that captures the effect of SCR performance on tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions.</p>
</list-item>
<list-item>
<p>&#x2022; Analysis and development of holistic error metrics that help visualize instantaneous as well as total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions prediction errors of DNN <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> models over the DNN training process.</p>
</list-item>
<list-item>
<p>&#x2022; Interpretability study of models to enhance the physical understanding of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions estimation using DNN. Evaluation of model accuracy using minimal number of &#x201c;relatively important&#x201d; input parameters physically affecting production of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions, thereby illustrating DNN model interpretation of complex transient <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions.</p>
</list-item>
<list-item>
<p>&#x2022; Detailed analysis of a potential application of developed DNN models to fault detection in SCR aftertreatment systems.</p>
</list-item>
</list>
</p>
<sec id="s1-1">
<title>Organization</title>
<p>The paper is organized as follows. In <xref ref-type="sec" rid="s2">Section 2</xref> we discuss engine-out and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> formation, our choice of input features, and the data. Our research methodology is described in <xref ref-type="sec" rid="s3">Section 3</xref>. In <xref ref-type="sec" rid="s4">Section 4</xref> we present our results followed by an in depth discussion of input feature importance and potential application of developed DNN models in <xref ref-type="sec" rid="s5">Section 5</xref>. We conclude with final remarks in <xref ref-type="sec" rid="s6">Section&#x20;6</xref>.</p>
</sec>
</sec>
<sec id="s2">
<title>2 Input Features and Data Description</title>
<p>In this section, the thermophysical and chemical phenomena affecting production of engine-out and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions are explained, which lays the groundwork for the selection of the input parameters for each models. A detailed description of the datasets used to train and test the DNN models is also provided.</p>
<sec id="s2-1">
<title>2.1 Input Feature Selection</title>
<p>In the current study, a DNN model using physics inspired features as inputs has been developed. Therefore, significant engine and vehicle parameters from literature that affect engine-out and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> formation were measured in the tests conducted to develop the datasets used to train the DNN models, while also taking into consideration their ease of availability from vehicle OBD. A brief description of engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> formation has been provided in the following sections to explain the different inputs selected for each&#x20;model.</p>
<sec id="s2-1-1">
<title>2.1.1&#x20;Engine-Out NOx Formation</title>
<p>
<italic>NO</italic>
<sub>
<italic>x</italic>
</sub> is composed of nitric oxide (<italic>NO</italic>) and nitrogen dioxide (<italic>NO</italic>
<sub>2</sub>). Diesel engine <italic>NO</italic> formation is described by Three mechanisms - Thermal <italic>NO</italic>, Prompt <italic>NO</italic> and Fuel <italic>NO</italic> (<xref ref-type="bibr" rid="B30">Heywood, 2019</xref>). Prompt <italic>NO</italic> is formed in fuel-rich conditions and is not highly temperature dependent. Fuel <italic>NO</italic> formation is dependent on the presence of nitrogen-based compounds in the fuel. However, the primary mechanism for diesel <italic>NO</italic> formation within the combustion chamber is defined by the Extended Zeldovich Mechanism (<xref ref-type="bibr" rid="B36">Lavoie et&#x20;al., 1970</xref>) referred to as Thermal <italic>NO</italic>. Thermal <italic>NO</italic> formation is due to oxidation of nitrogen in the air. The principal reactions governing the formation of Thermal <italic>NO</italic> are given by<disp-formula id="e1">
<mml:math id="m1">
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>O</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mi>O</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>N</mml:mi>
</mml:math>
<label>(1)</label>
</disp-formula>
<disp-formula id="e2">
<mml:math id="m2">
<mml:mi>N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>O</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mi>O</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>O</mml:mi>
</mml:math>
<label>(2)</label>
</disp-formula>
<disp-formula id="e3">
<mml:math id="m3">
<mml:mi>N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>O</mml:mi>
<mml:mi>H</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mi>O</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>.</mml:mo>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
<p>These reactions (<xref ref-type="disp-formula" rid="e1">(1)</xref>, <xref ref-type="disp-formula" rid="e2">(2)</xref>, <xref ref-type="disp-formula" rid="e3">(3)</xref>) are highly dependent on the combustion temperature (&#x3e; 2000 K), in-cylinder oxygen (<italic>O</italic>
<sub>2</sub>) concentrations, and residence time of the reacting mixture at peak temperatures and lean air-fuel mixtures (<xref ref-type="bibr" rid="B13">Bowman, 1975</xref>). <italic>NO</italic>
<sub>2</sub> on the other hand is formed due to partial oxidation of <italic>NO</italic> further downstream of the cylinder which can be explained by the following reaction <xref ref-type="disp-formula" rid="e4">(4)</xref> (<xref ref-type="bibr" rid="B41">Merryman and Levy, 1975</xref>):<disp-formula id="e4">
<mml:math id="m5">
<mml:mtable class="align" columnalign="left">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mi>N</mml:mi>
<mml:mi>O</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>H</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>O</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>N</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>O</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>O</mml:mi>
<mml:mi>H</mml:mi>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
<p>Engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> formation is primarily controlled by the temperature of the burned gas and <italic>O</italic>
<sub>2</sub> concentration in the combustion chamber. These parameters vary based on different engine operating and control variables such as intake air mass flow rate, fuel flow rate, intake manifold temperature and pressure, engine speed and load. Therefore, these variables were selected as inputs for modeling engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>. Exhaust gas recirculation (EGR) is an important engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> control strategy typically employed on diesel engines. The introduction of EGR, which is composed primarily of nitrogen (<italic>N</italic>
<sub>2</sub>), carbon dioxide (<italic>CO</italic>
<sub>2</sub>), and water (<italic>H</italic>
<sub>2</sub>
<italic>O</italic>), displaces air in the cylinder, and results in lower <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> formation. The primary mechanisms for the decrease in <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> formation due to EGR are the reduction in the mixture&#x2019;s oxygen concentration, and decrease in the combustion temperatures due to presence of higher specific heat capacity triatomic molecules. As a result, EGR mass flow rate was also included as an input to the DNN when available.</p>
</sec>
<sec id="s2-1-2">
<title>2.1.2 Tailpipe NOx Emissions</title>
<p>Tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions are highly dependent on the performance of the SCR. SCR uses ammonia (<italic>NH</italic>
<sub>3</sub>) in the form of aqueous urea as a <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> reduction reagent to convert <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> to <italic>N</italic>
<sub>2</sub> and <italic>H</italic>
<sub>2</sub>
<italic>O</italic>. However, the dominant catalytic reactions require a certain optimal temperature range (520&#x2013;725&#xa0;K) (<xref ref-type="bibr" rid="B32">Khair and Majewski, 2006</xref>). There are three important SCR reactions given by (<xref ref-type="bibr" rid="B34">Koebel et&#x20;al., 2002</xref>):<disp-formula id="equ1">
<mml:math id="m6">
<mml:mtable class="align-star" columnalign="left">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mn>4</mml:mn>
<mml:mi>N</mml:mi>
<mml:mi>O</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>4</mml:mn>
<mml:mi>N</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>O</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>4</mml:mn>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>6</mml:mn>
<mml:msub>
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
</mml:mrow>
<mml:mn>5</mml:mn>
<mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mn>4</mml:mn>
<mml:mi>N</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi>N</mml:mi>
<mml:mi>O</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi>N</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>O</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>4</mml:mn>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>6</mml:mn>
<mml:msub>
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
</mml:mrow>
<mml:mn>6</mml:mn>
<mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mn>4</mml:mn>
<mml:mi>N</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>3</mml:mn>
<mml:mi>N</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>O</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>3.5</mml:mn>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>6</mml:mn>
<mml:msub>
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mi>O</mml:mi>
<mml:mo>.</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
</mml:mrow>
<mml:mn>7</mml:mn>
<mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula>
</p>
<p>The standard SCR reaction (5) occurs in a <italic>NO</italic> dominant environment and requires high temperature. At lower SCR temperatures, a faster reaction rate conversion (6) takes place using equimolar amounts of <italic>NO</italic> and <italic>NO</italic>
<sub>2</sub>. This helps to improve SCR performance at lower SCR temperatures. However, an excess of <italic>NO</italic>
<sub>2</sub> results in a slower reaction rate reaction (7). Therefore, SCR conversion efficiency is highly dependent on the ratio of <italic>NO</italic>
<sub>2</sub> to <italic>NO</italic> entering the&#x20;SCR.</p>
<p>The SCR performance is greatly influenced by the residence time available for reactants in the optimal temperature range, which is a function of the space velocity. The space velocity of the reactor is a function of the measured exhaust gas flow rate through the SCR. The rate of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> removal is also dependent on the inlet <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> concentration, as <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> levels determine the rate of reactions (<xref ref-type="bibr" rid="B30">Heywood, 2019</xref>). Therefore, exhaust aftertreatment variables such as SCR inlet and outlet temperatures, exhaust mass flow rate and measured engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> were considered as inputs to the tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> DNN model. Coolant temperature was also included as an input to the model to provide information regarding cold start or hot start conditions for the test cycles. Other variables affecting SCR performance such as urea injection quantity and timing, and ammonia (<italic>NH</italic>
<sub>3</sub>) slip were not considered, as they are not readily available from the OBD data without proprietary access.</p>
</sec>
</sec>
<sec id="s2-2">
<title>2.2 Data Sources and Dynamometer Test Cycles</title>
<p>Data was collected from two different sources: engine dynamometer testing and chassis dynamometer testing. A 6.7L heavy-duty bus engine (different model years) was tested on an engine and chassis dynamometer using multiple dynamometer test cycles representative of urban, highway, idle, transient and cold start conditions, thereby comprehensively encompassing known sources of transient <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions.</p>
<sec id="s2-2-1">
<title>2.2.1 Engine Dynamometer Data and Test Cycles</title>
<p>Engine dynamometer testing was conducted at the United&#x20;States EPA, National Vehicle and Fuel Emissions Laboratory (NVFEL), Ann Arbor, on a 6.7L heavy-duty engine (2010) using certification diesel fuel. Test data included engine parameters as well as after-treatment parameters as described in <xref ref-type="sec" rid="s2-1">Section 2.1</xref>. Both engine-out and tailpipe-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions were measured using exhaust gas analyzers at 10&#xa0;Hz frequency. All parameters measured using test cell instruments were also recorded at 10&#xa0;Hz frequency. The dynamometer test cycles included three separate runs of a cold Federal Test Procedure (FTP) cycle, a hot FTP cycle and a Ramped Mode Cycle (RMC) as shown in <xref ref-type="fig" rid="F1">Figures 1A,B</xref>. These tests encompass a variety of engine operating conditions including cold-start, hot-start and transient. This dataset will be referred to as Dataset 1 in this paper. Dataset 1 had a total of 127,223 samples for training the DNN after pre-processing.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Engine and Chassis dynamometer test cycles used to develop datasets for Deep Learning <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> Models. <bold>(A)</bold> Engine Dyno FTP Cycle. <bold>(B)</bold> Engine Dyno RMC Cycle. <bold>(C)</bold> Chassis Dyno Cold Start Super Cycle. <bold>(D)</bold> Chassis Dyno Hot Start Cycles. <bold>(E)</bold> Chassis Dyno On Road Cycle. <bold>(F)</bold> Chassis Dyno Ramp Cycle.</p>
</caption>
<graphic xlink:href="fmech-08-840310-g001.tif"/>
</fig>
<p>In literature, data for testing DNN <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> models was found to be split from the dataset in two different ways: keeping a separate run of a complete test cycle as test set (<xref ref-type="bibr" rid="B5">Arsie et&#x20;al., 2013</xref>; <xref ref-type="bibr" rid="B25">Fischer, 2013</xref>; <xref ref-type="bibr" rid="B61">Zhang et&#x20;al., 2015</xref>; <xref ref-type="bibr" rid="B9">Bellone et&#x20;al., 2020</xref>) or randomly selecting points from the dataset as test set (<xref ref-type="bibr" rid="B51">Shin et&#x20;al., 2020</xref>; <xref ref-type="bibr" rid="B38">Lee et&#x20;al., 2021</xref>; <xref ref-type="bibr" rid="B60">Yu et&#x20;al., 2021</xref>). In this study, both methods were adopted to analyze and compare the effect they had on the model performance. Subsequently. the dataset was first randomly shuffled and then split into train, validation and test sets. 75% of the data was used as train set, of which 25% was used as validation set. The remaining 25% of the total data was used to test the model after training and validation. Splitting of the dataset was also done by using two runs of each test cycle as train set and one run of each test cycle (unseen by the DNN models) as test&#x20;set.</p>
</sec>
<sec id="s2-2-2">
<title>2.2.2 Chassis Dynamometer Data and Test Cycles</title>
<p>Chassis dynamometer testing was conducted on a hybrid bus that operates on a parallel hybrid architecture using a 6.7L heavy-duty engine (2011) and a 650V nickel-metal hydride battery using certification diesel fuel. The tests were conducted at the Heavy-Duty Chassis Dynamometer Test Facility at the United&#x20;States EPA NVFEL, Ann Arbor. The facility is capable of simulating on-road conditions for transient and loaded conditions with the help of a road speed modulated vehicle cooling fan with high precision. Continuous tailpipe exhaust measurements were made using a heated dilution tunnel and a Horiba MEXA-One gaseous emissions bench, while engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> measurements were sampled directly from the exhaust prior to the SCR, both at 10&#xa0;Hz frequency. A Cold Start Super Cycle (CSSC), a Hot Start Cycle which was a combination of New York Bus Cycle (NYBC), Orange County Bus Cycle (OCBC), NREL Transient Cycle, an On-Road bus cycle and a Ramp Cycle (<xref ref-type="fig" rid="F1">Figures 1C&#x2013;F</xref>) were successfully run on the chassis dynamometer to generate a comprehensive dataset encompassing different vehicle operating conditions. OBD data was collected from the bus using Controller Area Network (CAN) at 10&#xa0;Hz frequency. This included engine and after-treatment parameters similar to the engine dynamometer tests as described in <xref ref-type="sec" rid="s2-1">Section 2.1</xref>. Battery parameters including state of charge, charging and discharging current were also included as inputs to the engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> model to incorporate the influence of hybridization on the <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions of the bus (<xref ref-type="bibr" rid="B7">Bagheri et&#x20;al., 2021</xref>). This dataset will be referred to as Dataset 2 in this&#x20;paper.</p>
<p>Dataset 2 had a total of 442,623 samples after pre-processing. As described for Dataset 1, Dataset 2 was also split randomly into equivalent train, validation and test set ratios. The data was also split using 16 runs of complete test cycles as train set and 3 separate runs of test cycles (unseen by the DNN models) as test set. Further details on the train, validation and test splits for both the datasets has been provided in <xref ref-type="table" rid="T1">Table&#x20;1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Description of datasets (engine and chassis dynamometer. Types of data splits (random and test cycles) and train/validation/test splits.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Dataset</th>
<th colspan="2" align="center">Engine dynamometer</th>
<th colspan="2" align="center">Chassis dynamometer</th>
</tr>
<tr>
<th align="left">Type of Data split</th>
<th align="center">Random</th>
<th align="center">Test cycles</th>
<th align="center">Random</th>
<th align="center">Test cycles</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Total Samples</td>
<td align="center">127,223</td>
<td align="center">127,223</td>
<td align="center">442,623</td>
<td align="center">442,623</td>
</tr>
<tr>
<td align="left">Train</td>
<td align="center">76,334</td>
<td align="center">64,883</td>
<td align="center">265,574</td>
<td align="center">301,569</td>
</tr>
<tr>
<td align="left">Validation</td>
<td align="center">19,083</td>
<td align="center">16,221</td>
<td align="center">66,393</td>
<td align="center">75,392</td>
</tr>
<tr>
<td align="left">Test</td>
<td align="center">31,806</td>
<td align="center">46,119</td>
<td align="center">110,656</td>
<td align="center">65,662</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
</sec>
<sec id="s3">
<title>3 Research Methodology</title>
<p>In this section, the research methodology followed in this paper is described. Specifically, we discuss the model and its associated hyperparameters and the data pre-processing strategy employed.</p>
<sec id="s3-1">
<title>3.1 Feed Forward Deep Neural Network</title>
<p>An Artificial Neural Network (ANN) makes use of representative data to establish empirical relationships between input features and some target output (<xref ref-type="bibr" rid="B37">LeCun et&#x20;al., 2015</xref>; <xref ref-type="bibr" rid="B29">Goodfellow et&#x20;al., 2016</xref>). ANNs have been shown to be adept at establishing complex relationships without the need of strict assumptions or mathematical equations, and as a result have had tremendous success in applications such as image classification (<xref ref-type="bibr" rid="B16">Ciregan et&#x20;al., 2012</xref>), speech recognition (<xref ref-type="bibr" rid="B4">Amodei et&#x20;al., 2016</xref>), and games (<xref ref-type="bibr" rid="B52">Silver et&#x20;al., 2016</xref>); see (<xref ref-type="bibr" rid="B11">Bishop, 1994</xref>; <xref ref-type="bibr" rid="B2">Abiodun et&#x20;al., 2018</xref>) for more examples. In its most primitive form, an ANN is a composition of connected neurons arranged in an input layer, possibly a set of hidden layers, and an output layer. Neurons in adjacent layers are connected through edges, or weights. Information flows from the input layer to the output layer through activation functions at each neuron that attempt to capture nonlinearity in the input-output relationship. This process is called &#x201c;Feed-Forward Propagation&#x201d;. Training an ANN amounts to adjusting the weights, commonly via the process of &#x201c;Back Propagation&#x201d; (<xref ref-type="bibr" rid="B49">Rumelhart et&#x20;al., 1986</xref>), in order to minimize the &#x201c;objective or loss&#x201d; function which measures the deviation between the true target output and the predicted output of the network. One Feed-Forward and one Back-Propagation constitute one training &#x201c;epoch&#x201d; of the&#x20;ANN.</p>
<p>Deep Learning Neural Networks (DNN) are a multi-layer manifestation of ANNs (i.e.,&#x20;more than one hidden layer). The predictive power of DNNs is positive correlated with the volume of data and the size of the network, i.e.,&#x20;both the height (number of neurons per layer) and the width (number of layers) (<xref ref-type="bibr" rid="B56">Sun et&#x20;al., 2017</xref>). An important aspect of DNNs is their capability to learn good representations of complex phenomena using &#x201c;feature learning&#x201d; (<xref ref-type="bibr" rid="B10">Bengio, 2012</xref>). This enables DNNs to learn nonlinear mappings of input features to outputs by generating &#x201c;high level&#x201d; features using &#x201c;low level&#x201d; (input) training data. These intrinsic characteristics of DNNs make them suitable for modeling and predicting <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions using simple accessible&#x20;data.</p>
<p>In this study, DNNs are used in a supervised learning regression task, i.e.,&#x20;the training data has a labelled output and the output (<italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions) is continuous. The proposed DNNs are trained over multiple epochs to develop <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions models with high predictive power using the different training datasets (see <xref ref-type="sec" rid="s2-2">Section 2.2</xref>). The DNNs have multiple &#x201c;hyperparameters&#x201d; (e.g., number of hidden layers and nodes, batch size, learning rate) that determine the structure of the neural network and guide the learning process. These parameters are tuned, using exhaustive grid searches (<xref ref-type="bibr" rid="B24">Feurer and Hutter, 2019</xref>), to give the best possible performance for a given model, dataset, and computational budget. Prior to training the DNNs, the available datasets are pre-processed in order to help with the training (learning), and as a result increase the predictive power of the DNNs for the given computational budget (<xref ref-type="bibr" rid="B60">Yu et&#x20;al., 2021</xref>); see <xref ref-type="sec" rid="s3-2">Section 3.2</xref> for more details. <xref ref-type="fig" rid="F2">Figure&#x20;2</xref> shows the schematic for the DNN engine-out (2a) and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> (2b) models to describe the DNN architecture.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Deep neural network model architecture, inputs/outputs, and activation functions. <bold>(A)</bold> Engine Out NOx Model. <bold>(B)</bold> Tailpipe NOx Model.</p>
</caption>
<graphic xlink:href="fmech-08-840310-g002.tif"/>
</fig>
</sec>
<sec id="s3-2">
<title>3.2 Data Pre-Processing</title>
<p>Transient data was collected from the engine and chassis dynamometer testing, therefore, it is necessary to eliminate time delay between the input parameters and the measured <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions before training the DNN models to improve model performance (<xref ref-type="bibr" rid="B5">Arsie et&#x20;al., 2013</xref>; <xref ref-type="bibr" rid="B25">Fischer, 2013</xref>; <xref ref-type="bibr" rid="B31">Johri and Filipi, 2014</xref>; <xref ref-type="bibr" rid="B61">Zhang et&#x20;al., 2015</xref>). For the engine dynamometer data, the dataset was time-aligned according to 40 Code of Federal Regulations (CFR) Part &#xa7;1,065 to account for delays in exhaust gas transport and instrument responses (<xref ref-type="bibr" rid="B21">EPA, 2021a</xref>). An empirical time constant was derived by cross-correlating the input parameters with the measured <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions for the chassis dynamometer data for time-alignment of the complete dataset. The effectiveness of the time alignment method was demonstrated with a Pearson&#x2019;s correlation coefficient of&#x20;0.99.</p>
<p>Further, any data points that had negative values due to instrument or calibration errors were removed. This ensures that noisy or unreasonable data does not contaminate the DNN models. The application of DNN to instantaneous <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> prediction is difficult as it involves the prediction of a continuous variable at every point. To mitigate this issue several studies in the literature have used box plots or median methods to determine and remove &#x201c;outliers&#x201d; in the data to improve network performance (<xref ref-type="bibr" rid="B19">Donateo and Filomena, 2020</xref>; <xref ref-type="bibr" rid="B60">Yu et&#x20;al., 2021</xref>). In this study, it was found that eliminating outliers also removed a large number of &#x201c;peak&#x201d; <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> conditions which are significant in predicting different transient and &#x201c;rare&#x201d; events that occur during vehicle operations. Thus, in order to promote robustness in the DNN models, in this study, data associated with outliers (e.g., &#x201c;peak&#x201d; <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> conditions) was included in the training.</p>
<p>Each dataset was divided into train, validation and test sets as described in <xref ref-type="sec" rid="s2-2">Section 2.2</xref>. The train set is used to train the models. The validation set provides an unbiased evaluation of the model&#x2019;s fit on the training samples while tuning the different hyper-parameters of DNN models. The test set is used to evaluate the final model to determine model accuracy and generalization capability.</p>
<p>Feature Scaling is an essential step in data pre-processing for DNN models. The basis for feature scaling is to transform the data such that all the inputs have similar distributions, i.e.,&#x20;a common scale, and equal importance is given to each variable ensuring that no variable influences the model solely due to magnitude (<xref ref-type="bibr" rid="B35">Kotsiantis et&#x20;al., 2006</xref>). Scaling the features helps with the stability, efficiency and robustness of gradient-based optimization algorithms (<xref ref-type="bibr" rid="B57">Wan, 2019</xref>). In this study, normalization was used which shifts and rescales the input values to a range between 0 and 1 (also known as &#x2009; min&#x2212;max scaling) given by:<disp-formula id="equ2">
<mml:math id="m7">
<mml:mtable class="align-star" columnalign="left">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msub>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>norm</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>max</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula>
</p>
<p>In all the datasets, min &#x2212;max scaling was fit on the train set and then used to normalize both the validation and test set. This was done to ensure unbiased testing predictions.</p>
</sec>
<sec id="s3-3">
<title>3.3 Architecture of DNN Models</title>
<p>An Intel<sup>&#xae;</sup> Core<sup>TM</sup> i7-10750H CPU @ 2.60&#xa0;GHZ (12 cores) with 16&#xa0;GB RAM and an NVIDIA GeForce RTX 2060 GPU were used for computation. Python 3.60 programming language was used to develop the model with the help of &#x201c;Keras&#x201d; deep learning library using TensorFlow (<xref ref-type="bibr" rid="B1">Abadi et&#x20;al., 2016</xref>) as backend. The Python library &#x201c;scikit-learn&#x201d; (<xref ref-type="bibr" rid="B46">Pedregosa et&#x20;al., 2011</xref>) was used for pre-processing data, dataset splitting, hyperparameter grid search and model evaluation using different metrics.</p>
<sec id="s3-3-1">
<title>3.3.1 Hyperparameter Selection and Optimization</title>
<p>In this study, hyperparameter optimization, a very important ingredient of DNN training (<xref ref-type="bibr" rid="B24">Feurer and Hutter, 2019</xref>), is implemented using a grid search where the search space is defined by a grid of hyperparameter values. Every point in the grid which represents a model configuration is then evaluated for performance using appropriate evaluation metrics. Grid search was performed using the GridSearchCV function in the &#x201c;scikit-learn&#x201d; library, which allows to perform cross-validation (<xref ref-type="bibr" rid="B48">Refaeilzadeh et&#x20;al., 2009</xref>) in order to understand the generalization capability of each model configuration being tested. The target hyperparameters for optimization included the number of hidden layers, number of nodes in each hidden layer, learning rate and batch size. The ranges of hyperparameters considered for the two datasets are given in <xref ref-type="table" rid="T2">Table&#x20;2</xref>. <xref ref-type="table" rid="T3">Table&#x20;3</xref> summarizes the final architecture and hyperparameters for each&#x20;model.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Ranges of hyperparameters explored for different models (Engine Dynamometer and Chassis Dynamometer).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Dataset</th>
<th colspan="2" align="center">Engine dynamometer</th>
<th colspan="2" align="center">Chassis dynamometer</th>
</tr>
<tr>
<th align="left">Model</th>
<th align="center">Engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>
</th>
<th align="center">Tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>
</th>
<th align="center">Engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>
</th>
<th align="center">Tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Learning Rate</td>
<td align="center">[0.01,0.001,0.0001]</td>
<td align="center">[0.01,0.001,0.0001]</td>
<td align="center">[0.01,0.001,0.0001]</td>
<td align="center">[0.01,0.001,0.0001]</td>
</tr>
<tr>
<td align="left">Batch Size</td>
<td align="center">[100, 500, 1,000, 95,417]</td>
<td align="center">[100, 500, 1 000, 95,417]</td>
<td align="center">[1,000, 5,000, 10,000, 3,31,967]</td>
<td align="center">[1,000,5,000, 10,000, 3,31,967]</td>
</tr>
<tr>
<td align="left">Input Layer Nodes</td>
<td align="center">8</td>
<td align="center">5</td>
<td align="center">9</td>
<td align="center">5</td>
</tr>
<tr>
<td align="left">Hidden Layers</td>
<td align="center">[2,3,4,5,6]</td>
<td align="center">[2,3,4,5,6]</td>
<td align="center">[2,3,4,5,6]</td>
<td align="center">[2,3,4,5,6]</td>
</tr>
<tr>
<td align="left">First Hidden Layer Nodes</td>
<td align="center">[200,100,50,20]</td>
<td align="center">[200,100,50,20]</td>
<td align="center">[200,100,50,20]</td>
<td align="center">[200,100,50,20]</td>
</tr>
<tr>
<td align="left">Last Hidden Layer Nodes</td>
<td align="center">[20,15,10,5]</td>
<td align="center">[20,15,10,5]</td>
<td align="center">[20,15,10,5]</td>
<td align="center">[20,15,10,5]</td>
</tr>
<tr>
<td align="left">Hidden Layer Activation Function</td>
<td align="center">ReLU</td>
<td align="center">ReLU</td>
<td align="center">ReLU</td>
<td align="center">ReLU</td>
</tr>
<tr>
<td align="left">Output Layer Nodes</td>
<td align="center">1</td>
<td align="center">1</td>
<td align="center">1</td>
<td align="center">1</td>
</tr>
<tr>
<td align="left">Output Layer Activation Function</td>
<td align="center">ReLU</td>
<td align="center">ReLU</td>
<td align="center">ReLU</td>
<td align="center">ReLU</td>
</tr>
<tr>
<td align="left">Epochs</td>
<td align="center">200</td>
<td align="center">200</td>
<td align="center">200</td>
<td align="center">200</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Final optimal hyperparameters for engine-out and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> models for dataset 1 and 2.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Dataset</th>
<th colspan="2" align="center">Engine dynamometer</th>
<th colspan="2" align="center">Chassis dynamometer</th>
</tr>
<tr>
<th align="left">Model</th>
<th align="center">Engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>
</th>
<th align="center">Tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>
</th>
<th align="center">Engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>
</th>
<th align="center">Tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Input Layer Nodes</td>
<td align="center">8</td>
<td align="center">5</td>
<td align="center">9</td>
<td align="center">5</td>
</tr>
<tr>
<td align="left">Hidden Layer Nodes</td>
<td align="center">[200, 100, 50, 5]</td>
<td align="center">[1,000, 500, 250, 100, 5]</td>
<td align="center">[1,000, 500, 250, 100, 5]</td>
<td align="center">[2,000, 1,000, 500, 250, 100, 5]</td>
</tr>
<tr>
<td align="left">Hidden Layer Activation Function</td>
<td align="center">ReLU</td>
<td align="center">ReLU</td>
<td align="center">ReLU</td>
<td align="center">ReLU</td>
</tr>
<tr>
<td align="left">Output Nodes</td>
<td align="center">1</td>
<td align="center">1</td>
<td align="center">1</td>
<td align="center">1</td>
</tr>
<tr>
<td align="left">Output Layer Activation Function</td>
<td align="center">ReLU</td>
<td align="center">LeakyReLU</td>
<td align="center">ReLU</td>
<td align="center">LeakyReLU</td>
</tr>
<tr>
<td align="left">Learning Rate</td>
<td align="center">0.001</td>
<td align="center">0.001</td>
<td align="center">0.001</td>
<td align="center">0.001</td>
</tr>
<tr>
<td align="left">Learning Rate Decay</td>
<td align="center">1/5 every 200 Epochs</td>
<td align="center">1/5 every 200 Epochs</td>
<td align="center">1/10 every 400 Epochs</td>
<td align="center">1/10 every 400 Epochs</td>
</tr>
<tr>
<td align="left">Drop Out</td>
<td align="center">0</td>
<td align="center">0.1</td>
<td align="center">0.1</td>
<td align="center">0.1</td>
</tr>
<tr>
<td align="left">Batch Size</td>
<td align="center">500</td>
<td align="center">500</td>
<td align="center">1,000</td>
<td align="center">1,000</td>
</tr>
<tr>
<td align="left">Epochs</td>
<td align="center">600</td>
<td align="center">600</td>
<td align="center">1,000</td>
<td align="center">1,000</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>For the hyperparameter optimization, initially a small DNN network composed of two hidden layers with 20 and 10 nodes, respectively, was set up to reduce computational burden. Some hyperparameters were selected based on optimal values in literature that used DNNs for similar supervised learning regression tasks. The Adam optimizer (<xref ref-type="bibr" rid="B33">Kingma and Ba, 2014</xref>), a stochastic first-order diagonally scaled method, was used as the optimization algorithm. Associated with the optimization algorithm, the learning rate (a parameter that controls the change in the DNN model weights), as well as the batch size (the amount of data used in the Forward- and Backward-Propagations) were also tuned. With respect to the DNN model, the Rectified Linear Unit (ReLU) activation function (<xref ref-type="bibr" rid="B44">Nair and Hinton, 2010</xref>) was considered appropriate for both the hidden layers and the output layer. ReLU is a piece-wise linear function which outputs the input itself if it is positive, but outputs zero if it is negative. In this application all input values are non-negative, thus, ReLU is an appropriate candidate for the hidden layers.</p>
<p>Moreover, the number of hidden layers and nodes in each hidden layer were optimized using a separate grid search using the optimized learning rate and batch size from the first grid search. The different DNN hidden layer configurations for the grid search were developed using a custom function that utilizes three parameters as inputs to create different DNN hidden layer configurations; number of hidden layers, number of neurons in the first hidden layer and number of neurons in the last hidden layer. Based on the number of hidden layers selected, the function individually selects the number of neurons for first and last hidden layers from the ranges provided in <xref ref-type="table" rid="T2">Table&#x20;2</xref> and linearly decreases the number of neurons in each layer based on the number of hidden layers. For example, if we consider 5 hidden layers, 200 neurons in the first hidden layer and 20 neurons in the last hidden layer, a DNN hidden layer configuration given by (200,155,110,65,20) is created by the function. Each network in both the grid searches was run for 200 epochs with 5 fold cross-validation (<xref ref-type="bibr" rid="B55">Stone, 1978</xref>) to determine the optimal batch size, learning rate and hidden layer configuration based on the optimal mean MSE on the cross-validation tests. Further tests were then carried out on the optimal network architecture determined to evaluate the effect of increasing the width of the network (i.e.,&#x20;number of nodes in each hidden layer) and number of epochs on the performance of the model. The number of epochs for training the network was optimized based on the training and validation loss curves to ensure that there was no overfitting of the model on the train set. Number of epochs was used as a termination criteria for the training of all the models. The optimal hyperparameters based on the results from the grid searches have been summarized in <xref ref-type="table" rid="T3">Table&#x20;3</xref>.</p>
<sec id="s3-3-1-1">
<title>Learning Rate Decay</title>
<p>Learning rate decay is a mechanism by which the learning rate (employed by the optimization algorithm) is set and adjusted as the optimization progresses in help with learning. Specifically, during the early stages of training large learning rates are employed to allow for large steps, and in order to avoid spurious local minima. In the latter stages of training, a smaller, more refined learning rate is employed in order to obviate the effects of noise, and in order to converge (to a local minimum). Learning rate decay has been shown empirically to improve model optimization and generalization (<xref ref-type="bibr" rid="B53">Smith, 2018</xref>; <xref ref-type="bibr" rid="B26">Ge et&#x20;al., 2019</xref>; <xref ref-type="bibr" rid="B59">You et&#x20;al., 2019</xref>). In this study, learning rate decay was used for all the models.</p>
</sec>
<sec id="s3-3-1-2">
<title>Dropout</title>
<p>Overparametrized DNNs, i.e.,&#x20;DNNs with large number of parameter (weights), are prone to overfitting to the training dataset (<xref ref-type="bibr" rid="B50">Sankararaman et&#x20;al., 2020</xref>). Dropout is a regularization technique often employed in DNN training to mitigate this issue (<xref ref-type="bibr" rid="B54">Srivastava et&#x20;al., 2014</xref>). The technique involves randomly &#x201c;dropping out&#x201d; nodes along with their connections from the network during training. Temporarily &#x201c;deactivating&#x201d; nodes during training reduces the over-adaptation of the network weights to the training data and leads to improvement in network out-of-sample performance and generalization (<xref ref-type="bibr" rid="B8">Baldi and Sadowski, 2013</xref>). The fraction of nodes that are deactivated at every iteration of training (&#x201c;dropout rate&#x201d;) is often treated as a hyperparameter. In this study a small dropout rate of 0.1 was applied to the hidden layers of all models during training except for the engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> model for Dataset&#x20;1.</p>
</sec>
</sec>
<sec id="s3-3-2">
<title>3.3.2 Loss Function and Evaluation Metrics</title>
<p>In this study, the following loss function and evaluation metrics were&#x20;used.</p>
<sec id="s3-3-2-1">
<title>Mean Squared Error (MSE)</title>
<p>Mean Squared Error (MSE) is the default loss function used in many DNNs for regression problems. It is calculated via<disp-formula id="equ3">
<mml:math id="m8">
<mml:mtable class="align-star" columnalign="left">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula>and is the average of the squared difference between the true output value (y<sub>i</sub>) and the model&#x2019;s predicted value <inline-formula id="inf1">
<mml:math id="m4">
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, also referred to as the prediction&#x20;error.</p>
</sec>
<sec id="s3-3-2-2">
<title>R-Squared (<italic>R</italic>
<sup>2</sup>)</title>
<p>Coefficient of determination or R-Squared (<italic>R</italic>
<sup>2</sup>), defined as<disp-formula id="equ4">
<mml:math id="m9">
<mml:mtable class="align-star" columnalign="left">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msup>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula>where <inline-formula id="inf2">
<mml:math id="m10">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> is the mean of the true output value, is a statistical measure used to determine the &#x201c;goodness of fit&#x201d;, i.e.,&#x20;how well the predicted <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> values fit with the true <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> values. Using <italic>R</italic>
<sup>2</sup> as an evaluation metric indicates the model performance across different points in the <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> distribution. The higher the <italic>R</italic>
<sup>2</sup> value, the better the model prediction across the <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> distribution.</p>
</sec>
<sec id="s3-3-2-3">
<title>Total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> Error</title>
<p>Another evaluation metric that has been used in the literature to measure the predictive accuracy of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions models is Total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> error (%) (<xref ref-type="bibr" rid="B25">Fischer, 2013</xref>; <xref ref-type="bibr" rid="B31">Johri and Filipi, 2014</xref>; <xref ref-type="bibr" rid="B9">Bellone et&#x20;al., 2020</xref>). Total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> error, defined as<disp-formula id="equ5">
<mml:math id="m11">
<mml:mtable class="align-star" columnalign="left">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mi>T</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mspace width="0.28em"/>
<mml:mi>N</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>O</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mspace width="0.28em"/>
<mml:mi>E</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mspace width="0.28em"/>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mspace width="0.28em"/>
<mml:mi>N</mml:mi>
<mml:mi>O</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mspace width="0.28em"/>
<mml:mi>N</mml:mi>
<mml:mi>O</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mspace width="0.28em"/>
<mml:mi>N</mml:mi>
<mml:mi>O</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2a;</mml:mo>
<mml:mn>100</mml:mn>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula> is the difference between total true and predicted <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions in the dataset. Total cumulative <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> is calculated for train, validation and test datasets for both true and predicted <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> values. The percent difference between total true and predicted <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> over each dataset shows if the model has captured various transient <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> conditions (both high and low) across the dataset effectively and the percentage of total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions captured by the model over the dataset.</p>
</sec>
<sec id="s3-3-2-4">
<title>Instantaneous <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> error</title>
<p>Since the DNN models developed in this study are used to predict instantaneous <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions, we propose a novel instantaneous <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> error metric that captures the error at every point in the training (validation, testing) set. The absolute prediction error and percent absolute prediction error (%) for every training point is calculated at every epoch of training and is given by<disp-formula id="equ6">
<mml:math id="m12">
<mml:mtable class="align-star" columnalign="left">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mi>A</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mspace width="0.28em"/>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mspace width="0.28em"/>
<mml:mi>E</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>o</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mi>A</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mspace width="0.28em"/>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mspace width="0.28em"/>
<mml:mi>E</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>o</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mspace width="0.28em"/>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2a;</mml:mo>
<mml:mn>100</mml:mn>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula>
</p>
<p>The maximum, minimum and mean absolute prediction error and percent error is then calculated and reported for each epoch over the training set. As the DNN learns to predict <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions at every instant (data point) in the training set, these error metrics should reduce indicating improvement in the model&#x2019;s instantaneous <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> prediction capability.</p>
</sec>
</sec>
<sec id="s3-3-3">
<title>3.3.3 Output Layer Activation Function for Tailpipe NOx Model</title>
<p>Both engine-out and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> models have 1 node in the output layer as the models are predicting one continuous non-negative output (engine-out or tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions). ReLU was considered as an appropriate candidate for the output layer for both models. However, an interesting phenomenon was observed while examining model predictions for tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>. Namely, the model predicted tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> as &#x201c;0&#x201d; for many true labels where the <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions were smaller than 0.0001&#xa0;g/s (70% of the training set). This resulted in underprediction of total cycle <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions since many non-zero values were predicted as zero. Extracting the input values to the output layer ReLU activation function showed negative values for smaller true <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> labels (<inline-formula id="inf3">
<mml:math id="m13">
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.000</mml:mn>
<mml:mspace width="0.17em"/>
<mml:mn>1</mml:mn>
</mml:math>
</inline-formula> g/s), which by the nature of ReLU are output as &#x201c;0&#x201d;, thereby increasing total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> prediction error. Thus, other activation functions like LeakyReLU (<xref ref-type="bibr" rid="B39">Maas et&#x20;al., 2013</xref>) and Exponential Linear Unit (ELU) (<xref ref-type="bibr" rid="B17">Clevert et&#x20;al., 2015</xref>) were tested for the output layer. These activation functions are similar to the ReLU activation function with minor differences (e.g., LeakyReLU has a small slope in the negative region; nonzero gradient when the node is not active). It was found that using LeakyReLU improved the tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> model predictions at lower <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> values at the expense of predicting some negative <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> values (<inline-formula id="inf4">
<mml:math id="m14">
<mml:mo>&#x3c;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mi>%</mml:mi>
</mml:math>
</inline-formula> of the dataset). The small negative slope helps to smoothen the hard threshold that ReLU has to output &#x201c;0&#x201d; value and therefore helps the models predict better at lower <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> values. LeakyReLU only outputs negative values for very small <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> true labels (<inline-formula id="inf5">
<mml:math id="m15">
<mml:mo>&#x3c;</mml:mo>
<mml:mn>1</mml:mn>
<mml:msup>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>5</mml:mn>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> g/s). These negative values are negligible when compared to the total training set <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> predicted <inline-formula id="inf6">
<mml:math id="m16">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.01</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, thereby improving overall tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> DNN model performance.</p>
</sec>
</sec>
</sec>
<sec id="s4">
<title>4 Results</title>
<p>In this section, the main results for the engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> models are presented. <xref ref-type="table" rid="T3">Table&#x20;3</xref> summarizes the final architecture and hyperparameters for each model. The results are divided into the following sub-sections: model evaluation metrics (<xref ref-type="sec" rid="s4-1">Section 4.1</xref>), error metrics (<xref ref-type="sec" rid="s4-2">Section 4.2</xref>), and instantaneous actual vs predicted <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions (<xref ref-type="sec" rid="s4-3">Section 4.3</xref>), on the train, validation and test sets for each model and dataset.</p>
<sec id="s4-1">
<title>4.1 Model Evaluation Metrics Results</title>
<p>The evaluations metrics used to measure the prediction accuracy and overall performance of the models on the train, validation and test sets are reported in <xref ref-type="table" rid="T4">Table&#x20;4</xref>. Cross-validation (5 fold), a popular tool used in machine learning to evaluate a model prediction and generalization capability (<xref ref-type="bibr" rid="B48">Refaeilzadeh et&#x20;al., 2009</xref>), was employed in this study. The results presented in <xref ref-type="table" rid="T4">Table&#x20;4</xref> are the average MSE, MAE, <italic>R</italic>
<sup>2</sup> and MAE (%) over the maximum <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> values over the 5 fold cross-validation for each model along with their respective 95% confidence intervals. Overall, the confidence interval values reported in <xref ref-type="table" rid="T4">Table&#x20;4</xref> indicate that the models were robust to changes in training inputs.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Evaluation metrics for train, validation and test set with 95% confidence intervals (all models).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Dataset</th>
<th colspan="2" align="center">Engine dynamometer</th>
<th colspan="2" align="center">Chassis dynamometer</th>
</tr>
<tr>
<th align="left">Model</th>
<th align="center">Engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>
</th>
<th align="center">Tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>
</th>
<th align="center">Engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>
</th>
<th align="center">Tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Train MSE (g/s)</td>
<td align="center">5.02E-06&#x20;&#xb1; 3.03E-07</td>
<td align="center">7.79E-07&#x20;&#xb1; 2.14E-07</td>
<td align="center">1.91E-05&#x20;&#xb1; 3.23E-07</td>
<td align="center">4.21E-05&#x20;&#xb1; 2.07E-06</td>
</tr>
<tr>
<td align="left">Val MSE (g/s)</td>
<td align="center">7.35E-06&#x20;&#xb1; 2.47E-07</td>
<td align="center">1.27E-06&#x20;&#xb1; 3.29E-07</td>
<td align="center">4.30E-05&#x20;&#xb1; 8.60E-07</td>
<td align="center">7.20E-05&#x20;&#xb1; 2.70E-06</td>
</tr>
<tr>
<td align="left">Test MSE (g/s)</td>
<td align="center">7.41E-06&#x20;&#xb1; 1.76E-07</td>
<td align="center">1.43E-06&#x20;&#xb1; 3.37E-06</td>
<td align="center">4.19E-05&#x20;&#xb1; 4.86E-07</td>
<td align="center">7.07E-05&#x20;&#xb1; 9.22E-07</td>
</tr>
<tr>
<td align="left">Train MAE (g/s)</td>
<td align="center">1.22E-03&#x20;&#xb1; 2.41E-05</td>
<td align="center">4.30E-04&#x20;&#xb1; 1.36E-04</td>
<td align="center">2.48E-03&#x20;&#xb1; 2.61E-05</td>
<td align="center">3.18E-03&#x20;&#xb1; 3.56E-05</td>
</tr>
<tr>
<td align="left">Val MAE (g/s)</td>
<td align="center">1.25E-03&#x20;&#xb1; 1.43E-05</td>
<td align="center">4.44E-04&#x20;&#xb1; 1.32E-04</td>
<td align="center">3.25E-03&#x20;&#xb1; 2.04E-05</td>
<td align="center">3.94E-03&#x20;&#xb1; 7.02E-05</td>
</tr>
<tr>
<td align="left">Test MAE (g/s)</td>
<td align="center">1.34E-03&#x20;&#xb1; 1.77E-05</td>
<td align="center">4.51E-04&#x20;&#xb1; 1.35E-04</td>
<td align="center">3.27E-03&#x20;&#xb1; 2.20E-05</td>
<td align="center">3.91E-03&#x20;&#xb1; 3.14E-05</td>
</tr>
<tr>
<td align="left">Train <italic>R</italic>
<sup>2</sup>
</td>
<td align="center">0.998&#x20;&#xb1; 0.001</td>
<td align="center">0.996&#x20;&#xb1; 0.001</td>
<td align="center">0.987&#x20;&#xb1; 0.001</td>
<td align="center">0.956&#x20;&#xb1; 0.002</td>
</tr>
<tr>
<td align="left">Val <italic>R</italic>
<sup>2</sup>
</td>
<td align="center">0.997&#x20;&#xb1; 0.001</td>
<td align="center">0.995&#x20;&#xb1; 0.001</td>
<td align="center">0.971&#x20;&#xb1; 0.001</td>
<td align="center">0.926&#x20;&#xb1; 0.003</td>
</tr>
<tr>
<td align="left">Test <italic>R</italic>
<sup>2</sup>
</td>
<td align="center">0.997&#x20;&#xb1; 0.001</td>
<td align="center">0.994&#x20;&#xb1; 0.001</td>
<td align="center">0.972&#x20;&#xb1; 0.001</td>
<td align="center">0.927&#x20;&#xb1; 0.001</td>
</tr>
<tr>
<td align="left">Train MAE (%)</td>
<td align="center">0.512&#x20;&#xb1; 0.010</td>
<td align="center">0.080&#x20;&#xb1; 0.025</td>
<td align="center">0.516&#x20;&#xb1; 0.021</td>
<td align="center">1.487&#x20;&#xb1; 0.040</td>
</tr>
<tr>
<td align="left">Val MAE (%)</td>
<td align="center">0.566&#x20;&#xb1; 0.006</td>
<td align="center">0.084&#x20;&#xb1; 0.025</td>
<td align="center">0.950&#x20;&#xb1; 0.020</td>
<td align="center">2.137&#x20;&#xb1; 0.222</td>
</tr>
<tr>
<td align="left">Test MAE (%)</td>
<td align="center">0.566&#x20;&#xb1; 0.006</td>
<td align="center">0.085&#x20;&#xb1; 0.025</td>
<td align="center">0.883&#x20;&#xb1; 0.028</td>
<td align="center">1.797&#x20;&#xb1; 0.020</td>
</tr>
<tr>
<td align="left">Train Total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> Error (%)</td>
<td align="center">0.086&#x20;&#xb1; 0.049</td>
<td align="center">1.229&#x20;&#xb1; 1.250</td>
<td align="center">0.116&#x20;&#xb1; 0.114</td>
<td align="center">0.151&#x20;&#xb1; 0.231</td>
</tr>
<tr>
<td align="left">Val Total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> Error (%)</td>
<td align="center">0.100&#x20;&#xb1; 0.058</td>
<td align="center">1.304&#x20;&#xb1; 1.375</td>
<td align="center">0.155&#x20;&#xb1; 0.114</td>
<td align="center">0.368&#x20;&#xb1; 0.350</td>
</tr>
<tr>
<td align="left">Test Total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> Error (%)</td>
<td align="center">0.084&#x20;&#xb1; 0.059</td>
<td align="center">1.214&#x20;&#xb1; 1.313</td>
<td align="center">0.167&#x20;&#xb1; 0.109</td>
<td align="center">0.249&#x20;&#xb1; 0.195</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The selection of evaluation metrics was directed at analyzing different aspects of the model&#x2019;s <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> prediction capability. First, MSE and MAE were chosen to evaluate if the models were generalizing well to unseen data, i.e.,&#x20;validation and test sets. For all the models, the training and validation MSE and MAE values are very close indicating good model generalization. The test and validation set MSE and MAE values are comparable indicating model robustness to predicting <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> for conditions unseen by the model when training.</p>
<p>One of the holistic metrics used in the literature to capture <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>, emissions model accuracy is <italic>R</italic>
<sup>2</sup> (<xref ref-type="bibr" rid="B5">Arsie et&#x20;al., 2013</xref>; <xref ref-type="bibr" rid="B9">Bellone et&#x20;al., 2020</xref>; <xref ref-type="bibr" rid="B51">Shin et&#x20;al., 2020</xref>; <xref ref-type="bibr" rid="B60">Yu et&#x20;al., 2021</xref>). As can be seen from <xref ref-type="table" rid="T4">Table&#x20;4</xref>, <italic>R</italic>
<sup>2</sup> values on train, validation and test sets are high for both engine-out and tailpipe models for Dataset 1 and engine-out model for Dataset 2 even with the inclusion of peak <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>, conditions (<xref ref-type="bibr" rid="B60">Yu et&#x20;al., 2021</xref>). Contrary to (<xref ref-type="bibr" rid="B5">Arsie et&#x20;al., 2013</xref>; <xref ref-type="bibr" rid="B61">Zhang et&#x20;al., 2015</xref>; <xref ref-type="bibr" rid="B9">Bellone et&#x20;al., 2020</xref>; <xref ref-type="bibr" rid="B51">Shin et&#x20;al., 2020</xref>) that used ECU variables which are comprehensive but not easily accessible, the models developed in this study achieved comparable high model accuracy while utilizing only simple OBD, parameters as inputs to the models. The aftertreatment system on the bus used to collect data for Dataset 2 has been subject to a product recall due to a manufacturing defect (<xref ref-type="bibr" rid="B20">EPA, 2018</xref>). Therefore, the data is not completely representative of a working SCR, system and subsequently the production of tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>, emissions. This could explain the relatively lower <italic>R</italic>
<sup>2</sup> values for the tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>, model for this dataset. However, the model still captured a significant portion of the transient tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>, emissions as indicated by Test <italic>R</italic>
<sup>2</sup> of 0.9275 shown in <xref ref-type="table" rid="T4">Table&#x20;4</xref>.</p>
<p>Linearity of the models was evaluated using mean absolute error (percent) with respect to the maximum <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> in the dataset. As can be seen from <xref ref-type="table" rid="T4">Table&#x20;4</xref>, the percent MAE with respect maximum <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> for all the models are well within 1&#x2013;2%, which is comparable to the <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> measurement accuracy of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions analyzers which have linearity of 1% of full-scale <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> (<xref ref-type="bibr" rid="B28">Gluck et&#x20;al., 2003</xref>).</p>
<p>
<xref ref-type="fig" rid="F3">Figure&#x20;3</xref> presents the training and validation MSE (loss) and <italic>R</italic>
<sup>2</sup> curves for each model. These curves help to visualize the progress of the MSE function and <italic>R</italic>
<sup>2</sup> as the network is trained over a set number of epochs. The training loss over the epochs shows how well the model is learning using the given dataset, while the validation loss shows how well the model is generalizing to a smaller validation set that is not being used to train the DNN. For all the models, it can be seen that MSE decreases as the network learns, while <italic>R</italic>
<sup>2</sup> increases which indicates improvement in model prediction. The sudden drops in the loss curves indicate where the learning rate decay was implemented for each&#x20;model.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Evolution of MSE and <italic>R</italic>
<sup>2</sup> curves over training and validation data (All Models). <bold>(A)</bold> Engine Out NOx Loss and <italic>R</italic>
<sup>2</sup> Dataset 1. <bold>(B)</bold> Tailpipe NOx Loss and <italic>R</italic>
<sup>2</sup> Dataset 1. <bold>(C)</bold> Engine out NOx Loss &#x26; <italic>R</italic>
<sup>2</sup> Dataset 2. <bold>(D)</bold> Tailpipe NOx Loss and <italic>R</italic>
<sup>2</sup> Dataset 2</p>
</caption>
<graphic xlink:href="fmech-08-840310-g003.tif"/>
</fig>
<p>The results of Regression Analysis (<italic>R</italic>
<sup>2</sup> fit) for train and test datasets of the different models are shown in <xref ref-type="fig" rid="F4">Figure&#x20;4</xref>. The models show high degrees of agreement with the measured engine-out and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions demonstrated by high <italic>R</italic>
<sup>2</sup> values on both train and test sets. The points are well distributed on both sides of the regression fit line which indicates normal distribution of prediction errors with a mean around 0, which is further confirmed by the histogram of errors shown for all the models. The large scatter of points on either side of the regression fit line in <xref ref-type="fig" rid="F4">Figures 4G,H</xref> can be attributed to the large number of data points which causes the &#x201c;few&#x201d; outliers in the plot to cover a larger region in the&#x20;plots.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Train and Test <italic>R</italic>
<sup>2</sup> fits with histograms showing distribution of errors (All Models). <bold>(A)</bold>: Engine Out NOx Model <italic>R</italic>
<sup>2</sup> Train Dataset 1. <bold>(B)</bold>: Engine Out NOx Model <italic>R</italic>
<sup>2</sup> Test Dataset 1. <bold>(C)</bold>: Engine Out NOx Model <italic>R</italic>
<sup>2</sup> Train Dataset 2. <bold>(D)</bold>: Engine Out NOx Model <italic>R</italic>
<sup>2</sup> Test Dataset 2. <bold>(E)</bold>: Tailpipe NOx Model <italic>R</italic>
<sup>2</sup> Train Dataset 1. <bold>(F)</bold>: Tailpipe NOx Model <italic>R</italic>
<sup>2</sup> Test Dataset 1. <bold>(G)</bold>: Tailpipe NOx Model <italic>R</italic>
<sup>2</sup> Train Dataset 2. <bold>(H)</bold>: Tailpipe NOx Model <italic>R</italic>
<sup>2</sup>Test Dataset 2.</p>
</caption>
<graphic xlink:href="fmech-08-840310-g004.tif"/>
</fig>
</sec>
<sec id="s4-2">
<title>4.2 Error Metrics</title>
<p>
<xref ref-type="fig" rid="F5">Figure&#x20;5</xref> shows the total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> error (%) (<xref ref-type="bibr" rid="B5">Arsie et&#x20;al., 2013</xref>; <xref ref-type="bibr" rid="B31">Johri and Filipi, 2014</xref>; <xref ref-type="bibr" rid="B9">Bellone et&#x20;al., 2020</xref>) for train and validation sets (orange line) on a log scale. The error was consistent on average over the training. From <xref ref-type="table" rid="T4">Table&#x20;4</xref> it can be observed that total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> error (%) for lower accuracy models (<italic>R</italic>
<sup>2</sup> &#x3d; 0.93&#x2013;0.95) is lower than that of higher accuracy models (<italic>R</italic>
<sup>2</sup> &#x3d; 0.99). A simple reason for this could be due to the fact that the lower accuracy models overpredict and underpredict instantaneous <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> more than the higher accuracy models. The results from <xref ref-type="table" rid="T4">Table&#x20;4</xref> show that there is no clear correlation between <italic>R</italic>
<sup>2</sup> values and the total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> error, i.e.,&#x20;higher <italic>R</italic>
<sup>2</sup> models do not show lower total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> error. The studies conducted on the models indicate that the magnitude of overprediction and underprediction in lower <italic>R</italic>
<sup>2</sup> models, sufficiently balance out when the cumulative total true and predicted <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> over the train, validation and test set is calculated, resulting in a lower total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> error (see histograms in <xref ref-type="fig" rid="F4">Figure&#x20;4</xref>). This suggests that the total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> error metric alone is not a good indicator of a model&#x2019;s instantaneous <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> predictive capability.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Progression of total <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> error in comparison with maximum, minimum and mean absolute error over training of all DNN <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> models. <bold>(A)</bold> Engine Out NOx Train Errors (%) Dataset 1. <bold>(B)</bold> Tailpipe NOx Train Errors (%) Dataset 1. <bold>(C)</bold> Engine Out NOx Train Errors (%) Dataset 2. <bold>(D)</bold> Tailpipe NOx Train Errors (%) Dataset 2. <bold>(E)</bold> Engine Out NOx Train Errors Dataset 1. <bold>(F)</bold> Tailpipe NOx Train Errors Dataset 1. <bold>(G)</bold> Engine Out NOx Train Errors Dataset 2. <bold>(H)</bold> Tailpipe NOx Train Errors Dataset 2.</p>
</caption>
<graphic xlink:href="fmech-08-840310-g005.tif"/>
</fig>
<p>Therefore, in order to better understand the progression of the instantaneous prediction error at every epoch, the (maximum, minimum and mean) absolute prediction error and percent absolute prediction error over the train set were used as additional evaluation merits; see <xref ref-type="sec" rid="s3-3-2">Section 3.3.2</xref> for definition. From <xref ref-type="fig" rid="F5">Figure&#x20;5</xref>, it can be seen that all metrics decrease as the model is trained, indicating improvement in model accuracy. The large maximum percent absolute prediction errors (of the order 10<sup>7</sup>) can be attributed to a few very low <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions points being significantly overpredicted, however, this represents a very small portion of the entire dataset <inline-formula id="inf7">
<mml:math id="m17">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>5</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. Subsequently, it can be observed that the mean absolute error (%) is low (in the order of 10<sup>1</sup>) indicating higher overall instantaneous prediction capability. Also, it is important to consider the scale of the <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions while evaluating percentage absolute error. Very small errors in predictions can be blown up when calculating absolute error with respect to the true <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions which are at the scale of 1E-04 and lower. These absolute errors however are still small when compared to the average value of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions in the datasets (0.03&#x2013;0.05&#xa0;g/s) as can be seen from <xref ref-type="fig" rid="F5">Figures 5E&#x2013;H</xref> and <xref ref-type="table" rid="T4">Table&#x20;4</xref> which shows the mean MAE and percent MAE with respect to the maximum <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> over the train, validation and test&#x20;sets.</p>
</sec>
<sec id="s4-3">
<title>4.3 Actual vs Predicted NOx Emissions</title>
<p>
<xref ref-type="fig" rid="F6">Figures 6</xref>, <xref ref-type="fig" rid="F7">7</xref> depict the actual <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions in red and the predicted <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions in blue. Such visualizations aid in understanding the predictive capabilities of the models over the course of training, and clearly highlight points for which the model is overpredicting or underpredicting <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Improvement of instantaneous <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> prediction over DNN training (Engine-Out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> model Dataset 1). <bold>(A)</bold> Engine Out NOx Training Epoch 1 Dataset 1. <bold>(B)</bold> Engine Out NOx Training Epoch 200 Dataset 1. <bold>(C)</bold> Engine Out NOx Training Epoch 400 Dataset 1. <bold>(D)</bold> Engine Out NOx Training Epoch 600 Dataset 1.</p>
</caption>
<graphic xlink:href="fmech-08-840310-g006.tif"/>
</fig>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Actual vs Predicted <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions for a portion of the test set (All Models). <bold>(A)</bold> Test Set Engine Out NOx Actual vs Predicted Dataset 1. <bold>(B)</bold> Test Set Tailpipe NOx Actual vs Predicted Dataset 1. <bold>(C)</bold> Test Set Engine Out NOx Actual vs Predicted Dataset 2. <bold>(D)</bold> Test Set Tailpipe NOx Actual vs Predicted Dataset 2.</p>
</caption>
<graphic xlink:href="fmech-08-840310-g007.tif"/>
</fig>
<p>As an example, <xref ref-type="fig" rid="F6">Figure&#x20;6</xref> depicts the improvement in the predictions of the engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> DNN model for Dataset 1, over a small section of the train set as the network learns. <xref ref-type="fig" rid="F6">Figures 6A&#x2013;D</xref> show the actual and predicted <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions for the DNN model at epochs 1, 200, 400 and 600 respectively. It can be observed that as the network is trained, subsequently the predictions (dashed blue line) approach the true values of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions (red line). The increasing overlap of the two lines in <xref ref-type="fig" rid="F6">Figures 6B&#x2013;D</xref> indicates significant improvement in the model predictive capabilities over the course of training.</p>
<p>
<xref ref-type="fig" rid="F7">Figure&#x20;7</xref> shows the <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> prediction results for a portion of the test set for all the models. Sections of the test set have been enlarged below each sub-plot to provide more clarity to the profile for measured and predicted <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions. <xref ref-type="fig" rid="F7">Figure&#x20;7B</xref> also includes an enlarged log-scale plot to show the peak tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> predictions more clearly. The DNN models are able to capture both dips and peaks in the engine-out and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions effectively as observed from the enlarged plots. The DNN models were successfully able to capture high frequency oscillations in previously unseen test data, while having MAE within 1&#x2013;2% of the full scale of the <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions available in the dataset. Overall, the results are very promising, and the models appear to be suitable for transient <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions estimation in engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> control and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> compliance applications.</p>
</sec>
</sec>
<sec id="s5">
<title>5 Discussion</title>
<p>In this section, three different studies conducted have been described which highlight some important aspects of the application of DNNs to <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions predictions. First, an analysis of the effect of type of input data split on the accuracy of developed DNN models is presented. Then, insights from an input feature importance study are discussed. Finally, the effectiveness of the developed DNN models to fault-detection in SCR aftertreatment systems is also presented.</p>
<sec id="s5-1">
<title>5.1 Effect of Training Data on Model Accuracy</title>
<p>In this section, the effect of model training data for <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> prediction on the DNN model accuracy has been analyzed. DNN models were trained using data split by two different approaches as described in <xref ref-type="sec" rid="s2-2">Section 2.2</xref>. The results for this experiment on each of the models is presented in <xref ref-type="fig" rid="F8">Figure&#x20;8</xref>. It was observed that when the models were trained using randomly selected training data, they had good accuracy (both <italic>R</italic>
<sup>2</sup> and MSE) on train, validation and test sets. However, when the models were trained using complete test cycles and then tested on different (unseen) complete runs of the same test cycles, the models had less prediction accuracy on the test set. However, the models are not overfitting on the training data, as <italic>R</italic>
<sup>2</sup> and MSE of the &#x201c;hold-out&#x201d; validation set is comparable to that of the train set, as can be seen from the striped bars in <xref ref-type="fig" rid="F8">Figure&#x20;8</xref>.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Effect of type of data split on model performance. <bold>(A)</bold> Engine Out NOx Model Dataset 1. <bold>(B)</bold> Tailpipe NOx Model Dataset 1. <bold>(C)</bold> Engine Out NOx Model Dataset 2. <bold>(D)</bold> Tailpipe NOx Model Dataset 2.</p>
</caption>
<graphic xlink:href="fmech-08-840310-g008.tif"/>
</fig>
<p>One explanation for this phenomenon could be that there are variations in <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions measurements across multiple runs of the same test cycle, either due to instrument measurement deviations or accuracy limits or due to the effect of an engine or aftertreatment parameter that is not included in the input features for the DNN models - for eg. fuel injection pressure and timing, urea injection or <italic>NH</italic>
<sub>3</sub> slip. The results from this study suggest that DNN models trained using randomly selected data, are capable of learning the different variations in <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> measurements that occur at similar inputs much better than when the model is trained using whole test cycles. The variation in <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions at the same point in the dataset between the test cycle runs in the train set and the test set are unknown to the models when the data is not split randomly. This could be causing the higher error between the model prediction and the true <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> measurement in the test cycle. Therefore, to improve model prediction when using multiple runs of whole test cycles to train the DNN, it could be advisable to add more input features to explain the variation in true <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> measurements. Fuel injection strategies affect the formation of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions (<xref ref-type="bibr" rid="B3">Agarwal et&#x20;al., 2013</xref>). SCR performance is also affected by <italic>NH</italic>
<sub>3</sub>/<italic>NO</italic>
<sub>
<italic>x</italic>
</sub> ratio, while NH3 slip could also affect the effectiveness of engine-out to tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> conversion (<xref ref-type="bibr" rid="B27">Girard et&#x20;al., 2007</xref>). Therefore, proprietary ECU parameters such as fuel injection pressure and timing, urea injection timing and quantity and NH<sub>3</sub> slip could be included as inputs to the DNN models to capture the variation in <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions.</p>
</sec>
<sec id="s5-2">
<title>5.2 Input Feature Importance Study</title>
<p>DNN&#x2019;s are inherently black-box models due to their multi-layer nonlinear architecture. DNN&#x2019;s are capable of modelling complex problems with high accuracy but at the expense of losing &#x201c;explainability&#x201d;, i.e.,&#x20;how transparent the model&#x2019;s predictions are to a human (<xref ref-type="bibr" rid="B23">Fan et&#x20;al., 2021</xref>). In the application of predicting <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions, especially for engine control and compliance purposes it is important to discern where and why the model is predicting incorrectly. Therefore, an attempt has been made to determine &#x201c;important&#x201d; inputs to the DNN models presented in this paper that affect model prediction. This study does not completely make the DNN models transparent, but tries to gain some understanding into the inner workings of the DNN&#x2019;s characterization of various engine and aftertreatment parameters to the production of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions.</p>
<p>The DNN models make use of input engine or aftertreatment parameters to learn the complex transient nature of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions. Therefore, by removing an input feature that is important to the DNN to learn, would result in decrease in the model accuracy. This was the underlying principle used to develop an understanding of relative importance of engine or aftertreatment variables for the DNN models to predict <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions. In this study, few (5&#x2013;9) input features were used to train the models, and as a consequence this method was easier to implement. After the DNN models were trained, the models (with the same hyperparameters and architecture) were trained again by removing one input feature at a time to find the input feature which when removed reduced the model prediction accuracy, i.e.,&#x20;<italic>R</italic>
<sup>2</sup> and MSE. As an example, results for the experiments have been presented for the engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> model using Dataset 1 in <xref ref-type="fig" rid="F9">Figure&#x20;9</xref>.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>Effect of variable removal on model performance (Engine out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> model Dataset 1). <bold>(A)</bold> Effect of Variable Removal on <italic>R</italic>
<sup>2</sup>&#x2014;Engine Out NOx Dataset 1. <bold>(B)</bold> Effect of Variable Removal on MSE&#x2014;Engine Out NOx Dataset 1.</p>
</caption>
<graphic xlink:href="fmech-08-840310-g009.tif"/>
</fig>
<p>However, as can be seen from the <italic>R</italic>
<sup>2</sup> and MSE values for both train and test sets, removal of any single input variable did not seem to affect the model prediction accuracy. The actual physics of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> formation and engine operation could provide an explanation for this phenomenon. <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions are formed due to highly complex chemical and physical phenomena which are affected by parameters that are highly dependent on each other. Therefore, even if one variable or input is removed from the DNN model, information provided by other engine or aftertreatment variables help the DNN to capture the complex process of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> formation in diesel engines. The observed independence could also be because the DNN was sufficiently over-parameterized, i.e.,&#x20;number of parameters in the network exceeds the training points (<inline-formula id="inf8">
<mml:math id="m18">
<mml:mo>&#x223c;</mml:mo>
<mml:mn>1</mml:mn>
<mml:msup>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>6</mml:mn>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> model parameters vs <inline-formula id="inf9">
<mml:math id="m19">
<mml:mo>&#x223c;</mml:mo>
<mml:mn>1</mml:mn>
<mml:msup>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>5</mml:mn>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> training points) Therefore, the network is able to learn from existing input features even if one input feature is removed. The two hypotheses could be responsible in conjunction for the observed independence of model accuracy on the removal of single input features.</p>
<p>Further testing was therefore conducted by removing multiple sets of inputs till a significant reduction in model prediction accuracy was achieved to test the model&#x2019;s capability to model <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions with only the most &#x201c;important&#x201d; variables. For the engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> model, it was observed that just providing engine speed and torque as inputs to the DNN still resulted in model accuracy (<italic>R</italic>
<sup>2</sup>) of 0.93. This is consistent with the diesel engine operation&#x2014;as the engine speed and torque essentially determine the engine operating conditions which is highly related to the production of engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions. However, the significant reduction in accuracy suggests that the other input features also contribute to the DNN prediction accuracy on a smaller scale when compared to engine speed and torque. Similarly, using chassis dynamometer data which was collected from a hybrid bus, model accuracy, i.e.,&#x20;<italic>R</italic>
<sup>2</sup> of 0.93 was achieved using engine speed and torque along with state of charge and charging current as input features.</p>
<p>For the tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> model using the engine dynamometer dataset, utilizing SCR inlet temperature, exhaust mass flow rate and engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> as inputs resulted in a model prediction accuracy with <italic>R</italic>
<sup>2</sup> of 0.95 on the test set. However, for the tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> model for the hybrid bus, removing even a single input feature resulted in significant reduction in model prediction accuracy as shown in <xref ref-type="fig" rid="F10">Figure&#x20;10</xref>. This could be attributed to incorrect functioning of the SCR system for this hybrid bus (<xref ref-type="bibr" rid="B20">EPA, 2018</xref>). Subsequently, the data collected to train this tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> DNN model, is not a true representation of the effect of SCR parameters on tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions. Considering that this particular DNN model is much &#x201c;deeper&#x201d; and &#x201c;wider&#x201d; than the other tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> DNN model, over-parameterization of the network was inadequate for the network to completely capture incorrect operation of the SCR system.</p>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>Effect of variable removal on model performance (tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> model Dataset 2). <bold>(A)</bold> Effect of Variable Removal on <italic>R</italic>
<sup>2</sup>&#x2014;Tailpipe NOx Dataset 2. <bold>(B)</bold> Effect of Variable Removal on MSE&#x2014;Tailpipe NOx Dataset 2.</p>
</caption>
<graphic xlink:href="fmech-08-840310-g010.tif"/>
</fig>
<p>The variable importance study however suggests that the DNN models are capable of capturing some aspects of the physics of engine-out and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions without the need for any physical or chemical equations. Important engine and aftertreatment variables guide the DNN models to predict with higher accuracy. The study conducted also demonstrates that utilizing minimal information, i.e.,&#x20;two to four physics inspired inputs, the DNN models developed in this study are capable of capturing complex trends of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions in heavy-duty vehicles as indicated by the <italic>R</italic>
<sup>2</sup> values of 0.92 for engine-out and 0.95 for tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub>&#x20;model.</p>
</sec>
<sec id="s5-3">
<title>5.3 DNN as a Fault Detection Tool for Engine and Aftertreatment System</title>
<p>This section presents an example of the application of DNN models such as the ones developed in this paper for detection of anomalies or faults in diesel vehicles. If DNN models using physics inspired inputs are trained using data from functioning engine and aftertreatment systems, the predictions of the model can compared with data that is obtained from in-use vehicles. This can be applied to detect abnormal <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions occurring either due to a faulty engine operation, incorrectly operating aftertreatment systems or defeat devices. Fault detection can therefore be performed on an engine-level, as well as, aftertreatment-level.</p>
<p>As an example, data was collected from an poorly-functioning aftertreatment (SCR) system for the same engine as the one used for engine dynamometer testing in this paper. The DNN model trained using data from a functioning aftertreatment system was used to predict the tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions for this engine. The tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions predictions for all three engine dynamometer test cycles (<xref ref-type="sec" rid="s2-2">Section 2.2</xref>) were then compared with the &#x201c;faulty&#x201d; aftertreatment system data. In <xref ref-type="fig" rid="F11">Figure&#x20;11A</xref>, the blue dashed line indicates the DNN prediction (using functioning aftertreatment system data) and the red line indicates <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions measurements collected from the faulty aftertreatment system. The cumulative total tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions over the 3 test cycles for the faulty aftertreatment system was 29.81&#xa0;g while, the DNN model predicted the expected total tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions (if the aftertreatment was functioning correctly) of 18.6&#xa0;g, as shown in <xref ref-type="fig" rid="F11">Figure&#x20;11B</xref>. The engine with a faulty aftertreatment system produced 60% higher tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions which was successfully detected by the DNN model. This example demonstrates the capability of optimized DNN models to detect <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions anomalies or faults in diesel vehicles and their application for testing and compliance purposes.</p>
<fig id="F11" position="float">
<label>FIGURE 11</label>
<caption>
<p>Application of DNN for fault detection in SCR aftertreatment Systems. <bold>(A)</bold> Tailpipe NOx Emission-faulty after treatment vs DNN model prediction. <bold>(B)</bold> Cumulative Tailpipe NOx Emission.</p>
</caption>
<graphic xlink:href="fmech-08-840310-g011.tif"/>
</fig>
</sec>
<sec id="s5-4">
<title>5.4 Application of DNN Models to Other Heavy-Duty Engines and Fuels</title>
<p>This section discusses the application of similar DNN models to other heavy-duty engines and fuels. Even though the models developed in this paper have been trained on data from one particular engine, it is expected that due to the use of physics inspired features, the models are capable of capturing the significant features and trends that affect transient <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions in other heavy duty diesel engines. However, from an instantaneous engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> perspective, since it is a continuous variable and highly dependent on engine design and calibration and fuel injection strategies, trained model accuracy would be reduced when tested directly on other engines not used to train the models developed in this study. Also, from a tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> model perspective, SCR aftertreatment systems with different catalysts have different <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> conversion dependencies on the inputs used in the DNN models developed in this study. Therefore, using comprehensive datasets such as the ones developed in this study for other engines and aftertreatment systems, and subsequently applying a similar training process as described in this study should theoretically result in accurate DNN <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions models for different heavy-duty engines. The current DNN models could also be modified to include other inputs that capture the effect of different engine designs and calibration and SCR catalysts along with comprehensive datasets for other engines to evaluate model performance on other heavy-duty engines.</p>
<p>The type of fuel tested to develop the datasets used to train the DNN models in this study could influence the performance of the trained models. The models in this study were trained using data from engine and chassis dynamometer testing running on certification diesel fuel. Therefore, the dataset used to train the DNN models captures the <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions trends for certification diesel fuel. Capturing <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions trends for other fuels such as biodiesel or other higher oxygenated renewable fuels would require subsequent training and optimization using vehicle testing data using these fuels. DNN models developed in this study could be utilized as a base model for initial training using data from a different fuel and then optimized for <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions prediction. A larger dataset that encompasses <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions trends using different fuels on a single engine can be used to train similar DNN models with fuel type as an input. The model performance could then be evaluated to capture the influence of fuels on the DNN <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions predictions.</p>
</sec>
</sec>
<sec id="s6">
<title>6 Conclusions and Future Work</title>
<p>Deep Neural Network (DNN) models were developed using physics inspired inputs to predict transient heavy-duty diesel engine-out and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions using engine and aftertreatment variables. The study employed popular and well-established techniques in machine/deep learning to develop engine-out and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions models with high predictive power. Based on an in-depth analysis of the DNN models for predicting <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions developed in this study, the following conclusions can be drawn:<list list-type="simple">
<list-item>
<p>1. DNN models using physics inspired inputs are capable of effectively characterizing the complex, nonlinear nature of transient engine-out and tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions. In this study, simple and easily accessible OBD parameters (inputs) were used to develop accurate DNN models. All the models developed in this study have a mean absolute error percentage within 1&#x2013;2% of the maximum <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> measurement, which is comparable to physical <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions measurement analyzer accuracy of 1% of full&#x20;scale.</p>
</list-item>
<list-item>
<p>2. Novel tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> models developed using SCR aftertreament variables, such as SCR inlet and outlet temperature, engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> and exhaust mass flow rate, showed good prediction accuracy (<italic>R</italic>
<sup>2</sup> &#x3d; 0.99). However, the DNN tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> model developed using data from a faulty SCR system exhibited lower prediction accuracy (<italic>R</italic>
<sup>2</sup> &#x3d; 0.92) on the test&#x20;set.</p>
</list-item>
<list-item>
<p>3. This study analyzed the effect of type of dataset splitting on the model accuracy. It was shown that randomly splitting the dataset into train and test sets provides a better understanding of cycle-to-cycle <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions variation to the DNN model while training&#x2014;thereby improving model accuracy on the test set. If the DNN models are trained using multiple runs of test cycles as train data, it would be advisable to include more input features that provide additional information to the DNN about the cause of disparity in <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions for similar test cycles.</p>
</list-item>
<list-item>
<p>4. The feature importance study conducted on the DNN models showed the robustness of the models to removal of single input features while training the network. It was also observed that the DNN models had close understanding of the complex transient nature of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions as they were trained using physics inspired input features. Engine-out <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> models showed good correlation with engine operating conditions like engine speed and torque (<italic>R</italic>
<sup>2</sup> &#x3d; 0.93), while tailpipe <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> models exhibited good accuracy even with just three aftertreatment variables as inputs (<italic>R</italic>
<sup>2</sup> &#x3d; 0.95). Interestingly, the DNN models did not perform equally well when trained using data from a poorly functioning aftertreatment system (<italic>R</italic>
<sup>2</sup> &#x3d; 0.92). This indicated that when DNN models for <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions are trained using physics inspired inputs, training data that is not representative of the physics of <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions formation can lead to relatively poor DNN model performance.</p>
</list-item>
<list-item>
<p>5. This work demonstrated that DNN <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions models can be very effective tools for fault detection in Selective Catalytic Reduction (SCR) systems. Cumulative <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> predictions from the DNN model detected that the engine with a faulty aftertreatment (SCR) system produced 60% more total cycle <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> (g) than the expected <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions from a functioning aftertreatment (SCR) system.</p>
</list-item>
</list>
</p>
<p>Future work in this domain will involve the application of similar DNN models to on-road testing data from OBD information and a Portable Emissions Measurement System (PEMS). On-road emissions prediction presents an interesting challenge as environmental variables such as road grade, ambient temperature, pressure and humidity also affect <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions, but their effects are not necessarily captured in the controlled environment of laboratory testing. Use of DNN models that are trained using physics inspired inputs along with real-world driving effects would help develop models for on-road <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> prediction, which should reduce the disparities in <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions between on-road and laboratory type tests. However, measurements from low cost production on-road sensors are less repeatable than those taken from expensive instruments used in engine and chassis dynamometer test cells, and hence it would be challenging to achieve equally high accuracy models using on-road data. The influence of different type of fuels on DNN model <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> prediction could also be explored by training the models on comprehensive datasets including data from different fuels tested on a heavy-duty vehicle. More heavy-duty engines and aftertreatment systems could be incorporated into the DNN models to assess the model&#x2019;s robustness in predicting <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions from different engine sizes and SCR systems. Successful implementation using comprehensive datasets available from chassis and engine dynamometer testing regularly conducted for compliance purposes could result in a database created to measure cumulative <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions over test cycles for different heavy-duty engines using the DNN models. This would be important to inform the development of future <italic>NO</italic>
<sub>
<italic>x</italic>
</sub> emissions regulations and for validating real-world emissions measurements against expected performance.</p>
</sec>
</body>
<back>
<sec id="s7">
<title>Data Availability Statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s8">
<title>Author Contributions</title>
<p>Specific contributions to the paper are as follows: RP: Data Curation, Formal Analysis, Methodology, Software, Validation, Investigation and Writing - original draft. VT: Investigation, Methodology and Validation. ASB: Investigation, Methodology, Software and Validation. MB: Resources and Supervision. RS: Resources and Data Curation. TN: Resources, Project Administration and Funding Acquisition. ALB: Resources, Supervision, Project administration and Funding Acquisition.</p>
</sec>
<sec id="s9">
<title>Funding</title>
<p>Funding for this study was provided by Horiba Instruments&#x20;Inc.</p>
</sec>
<sec sec-type="COI-statement" id="s10">
<title>Conflict of Interest</title>
<p>TN was employed by Horiba Instruments&#x20;Inc. This study received funding from Horiba Instruments Inc. The funder was involved in resources, project administration and funding acquisition.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors, and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ack>
<p>The authors would like to thank Maria Peralta, Advanced Testing Center Director, United&#x20;States EPA, NVFEL, Garrett Brown, Scott Ludlam and Robert Caldwell at the United&#x20;States EPA, NVFEL for conducting the chassis dynamometer tests at the Heavy-Duty Chassis Dynamometer Test Facility and providing data for this&#x20;study.</p>
</ack>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Abadi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Barham</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Davis</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Dean</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). &#x201c;<article-title>Tensorflow: A System for Large-Scale Machine Learning</article-title>,&#x201d; in <conf-name>Proceeding of the 12th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 16)</conf-name>, <conf-loc>Savannah, GA, USA</conf-loc>, <conf-date>November 2016</conf-date>, <fpage>265</fpage>&#x2013;<lpage>283</lpage>. </citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abiodun</surname>
<given-names>O. I.</given-names>
</name>
<name>
<surname>Jantan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Omolara</surname>
<given-names>A. E.</given-names>
</name>
<name>
<surname>Dada</surname>
<given-names>K. V.</given-names>
</name>
<name>
<surname>Mohamed</surname>
<given-names>N. A.</given-names>
</name>
<name>
<surname>Arshad</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>State-of-the-art in Artificial Neural Network Applications: A Survey</article-title>. <source>Heliyon</source> <volume>4</volume>, <fpage>e00938</fpage>. <pub-id pub-id-type="doi">10.1016/j.heliyon.2018.e00938</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Agarwal</surname>
<given-names>A. K.</given-names>
</name>
<name>
<surname>Srivastava</surname>
<given-names>D. K.</given-names>
</name>
<name>
<surname>Dhar</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Maurya</surname>
<given-names>R. K.</given-names>
</name>
<name>
<surname>Shukla</surname>
<given-names>P. C.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>A. P.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Effect of Fuel Injection Timing and Pressure on Combustion, Emissions and Performance Characteristics of a Single cylinder Diesel Engine</article-title>. <source>Fuel</source> <volume>111</volume>, <fpage>374</fpage>&#x2013;<lpage>383</lpage>. <pub-id pub-id-type="doi">10.1016/j.fuel.2013.03.016</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Amodei</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Ananthanarayanan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Anubhai</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Bai</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Battenberg</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Case</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). &#x201c;<article-title>Deep Speech 2: End-To-End Speech Recognition in English and Mandarin</article-title>,&#x201d; in <conf-name>Proceeding of the International conference on machine learning</conf-name>, <conf-date>June 2016</conf-date> (<publisher-loc>Brookline, MA</publisher-loc>: <publisher-name>PMLR</publisher-name>), <fpage>173</fpage>&#x2013;<lpage>182</lpage>. </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Arsie</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Cricchio</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>De Cesare</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Pianese</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Sorrentino</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>A Methodology to Enhance Design and On-Board Application of Neural Network Models for Virtual Sensing of Nox Emissions in Automotive Diesel Engines</article-title>. <source>SAE Tech. Pap.</source> <volume>6</volume>. <pub-id pub-id-type="doi">10.4271/2013-24-0138</pub-id> </citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Askin</surname>
<given-names>A. C.</given-names>
</name>
<name>
<surname>Barter</surname>
<given-names>G. E.</given-names>
</name>
<name>
<surname>West</surname>
<given-names>T. H.</given-names>
</name>
<name>
<surname>Manley</surname>
<given-names>D. K.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>The Heavy-Duty Vehicle Future in the united states: A Parametric Analysis of Technology and Policy Tradeoffs</article-title>. <source>Energy Policy</source> <volume>81</volume>, <fpage>1</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1016/j.enpol.2015.02.005</pub-id> </citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bagheri</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Walker</surname>
<given-names>P. D.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>J.&#x20;L.</given-names>
</name>
<name>
<surname>Surawski</surname>
<given-names>N. C.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Strategies for Improving the Emission Performance of Hybrid Electric Vehicles</article-title>. <source>Sci. Total Environ.</source> <volume>771</volume>, <fpage>144901</fpage>. <pub-id pub-id-type="doi">10.1016/j.scitotenv.2020.144901</pub-id> </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baldi</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Sadowski</surname>
<given-names>P. J.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Understanding Dropout</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>26</volume>, <fpage>2814</fpage>&#x2013;<lpage>2822</lpage>. </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bellone</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Faghani</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Karayiannidis</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Comparison of Cnn and Lstm for Modeling Virtual Sensors in an Engine</article-title>. <source>SAE Tech. Pap.</source> <volume>2</volume>, <fpage>2632</fpage>&#x2013;<lpage>2639</lpage>. <pub-id pub-id-type="doi">10.4271/2020-01-0735</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bengio</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Deep Learning of Representations for Unsupervised and Transfer Learning</article-title>. <source>JMLR: Workshop Conf. Proc.</source> <volume>27</volume>, <fpage>17</fpage>&#x2013;<lpage>27</lpage>. </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bishop</surname>
<given-names>C. M.</given-names>
</name>
</person-group> (<year>1994</year>). <article-title>Neural Networks and Their Applications</article-title>. <source>Rev. scientific Instr.</source> <volume>65</volume>, <fpage>1803</fpage>&#x2013;<lpage>1832</lpage>. <pub-id pub-id-type="doi">10.1063/1.1144830</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Boningari</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Smirniotis</surname>
<given-names>P. G.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Impact of Nitrogen Oxides on the Environment and Human Health: Mn-Based Materials for the NO X Abatement</article-title>. <source>Curr. Opin. Chem. Eng.</source> <volume>13</volume>, <fpage>133</fpage>&#x2013;<lpage>141</lpage>. <pub-id pub-id-type="doi">10.1016/j.coche.2016.09.004</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bowman</surname>
<given-names>C. T.</given-names>
</name>
</person-group> (<year>1975</year>). <article-title>Kinetics of Pollutant Formation and Destruction in Combustion</article-title>. <source>Prog. Energ. Combustion Sci.</source> <volume>1</volume>, <fpage>33</fpage>&#x2013;<lpage>45</lpage>. <pub-id pub-id-type="doi">10.1016/0360-1285(75)90005-2</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brown</surname>
<given-names>A. L.</given-names>
</name>
<name>
<surname>Fleming</surname>
<given-names>K. L.</given-names>
</name>
<name>
<surname>Safford</surname>
<given-names>H. R.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Prospects for a Highly Electric Road Transportation Sector in the usa</article-title>. <source>Curr. Sustainable/Renewable Energ. Rep.</source> <volume>7</volume>, <fpage>1</fpage>&#x2013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1007/s40518-020-00155-3</pub-id> </citation>
</ref>
<ref id="B15">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Camporeale</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Ciliberti</surname>
<given-names>P. D.</given-names>
</name>
<name>
<surname>Carlucci</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ingrosso</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2017</year>). <source>Dynamic Validation and Sensitivity Analysis of a Nox Estimation Model Based on in-cylinder Pressure Measurement</source>. <publisher-loc>Warrendale, PA</publisher-loc>: <publisher-name>SAE International</publisher-name>. <pub-id pub-id-type="doi">10.4271/2017-24-0131</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ciregan</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Meier</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Schmidhuber</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2012</year>). &#x201c;<article-title>Multi-column Deep Neural Networks for Image Classification</article-title>,&#x201d; in <conf-name>2012 IEEE conference on computer vision and pattern recognition</conf-name>, <conf-loc>Providence, RI, USA</conf-loc>, <conf-date>June 2012</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>3642</fpage>&#x2013;<lpage>3649</lpage>. </citation>
</ref>
<ref id="B17">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Clevert</surname>
<given-names>D.-A.</given-names>
</name>
<name>
<surname>Unterthiner</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Hochreiter</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2015</year>). <source>Fast and Accurate Deep Network Learning by Exponential Linear Units (Elus)</source>. <comment>arXiv preprint arXiv:1511.07289</comment>. </citation>
</ref>
<ref id="B18">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Dahifale</surname>
<given-names>B. S.</given-names>
</name>
<name>
<surname>Patil</surname>
<given-names>A. S.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Diesel Engine Performance Improvement for Constant Speed Application Using Cfd</article-title>,&#x201d; in <source>ASME International Mechanical Engineering Congress and Exposition</source> (<publisher-name>American Society of Mechanical Engineers</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.1115/imece2017-70012</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Donateo</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Filomena</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Real Time Estimation of Emissions in a Diesel Vehicle with Neural Networks</article-title>,&#x201d;. <source>E3S Web of Conferences (EDP Sciences)</source>, <volume>197</volume>, <fpage>06</fpage>&#x2013;<lpage>20</lpage>. <pub-id pub-id-type="doi">10.1051/e3sconf/202019706020</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="book">
<collab>EPA</collab> (<year>2018</year>). <source>EPA Announces Largest Voluntary Recall of Medium- and Heavy-Duty Trucks</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://archive.epa.gov/epa/newsreleases/epa-announces-largest-voluntary-recall-medium-and-heavy-duty-trucks.html">https://archive.epa.gov/epa/newsreleases/epa-announces-largest-voluntary-recall-medium-and-heavy-duty-trucks.html</ext-link>
</comment>. </citation>
</ref>
<ref id="B21">
<citation citation-type="book">
<collab>EPA</collab> (<year>2021a</year>). <source>CFR Title 40 Part 1065 - Engine Testing Procedures</source>. <publisher-name>Legal Information Institute</publisher-name>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.ecfr.gov/current/title-40/chapter-I/subchapter-U/part-1065">https://www.ecfr.gov/current/title-40/chapter-I/subchapter-U/part-1065</ext-link>
</comment>. </citation>
</ref>
<ref id="B22">
<citation citation-type="book">
<collab>EPA</collab> (<year>2021b</year>). <source>Regulations for Emissions from Vehicles and Engines</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.epa.gov/regulations-emissions-vehicles-and-engines/cleaner-trucks-initiative">https://www.epa.gov/regulations-emissions-vehicles-and-engines/cleaner-trucks-initiative</ext-link>
</comment>. </citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fan</surname>
<given-names>F. L.</given-names>
</name>
<name>
<surname>Xiong</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>On Interpretability of Artificial Neural Networks: A Survey</article-title>. <source>IEEE Trans. Radiat. Plasma Med. Sci.</source> <volume>6</volume> (<issue>5</issue>), <fpage>741</fpage>&#x2013;<lpage>760</lpage>. <pub-id pub-id-type="doi">10.1109/trpms.2021.3066428</pub-id> </citation>
</ref>
<ref id="B24">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Feurer</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hutter</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Hyperparameter Optimization</article-title>,&#x201d; in <source>Automated Machine Learning</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>3</fpage>&#x2013;<lpage>33</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-05318-5_1</pub-id> </citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fischer</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Transient Nox Estimation Using Artificial Neural Networks</article-title>. <source>IFAC Proc. Volumes</source> <volume>46</volume>, <fpage>101</fpage>&#x2013;<lpage>106</lpage>. <pub-id pub-id-type="doi">10.3182/20130904-4-jp-2042.00006</pub-id> </citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ge</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Kakade</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Kidambi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Netrapalli</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>The Step Decay Schedule: A Near Optimal, Geometrically Decaying Learning Rate Procedure for Least Squares</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>32</volume>, <fpage>14977</fpage>&#x2013;<lpage>14988</lpage>. </citation>
</ref>
<ref id="B27">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Girard</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Snow</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Cavataio</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Lambert</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2007</year>). <source>The Influence of Ammonia to Nox Ratio on Scr Performance</source>. <publisher-loc>Warrendale, PA</publisher-loc>: <publisher-name>SAE International</publisher-name>. <pub-id pub-id-type="doi">10.4271/2007-01-1581</pub-id> </citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gluck</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Glenn</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Logan</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Vu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Walsh</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Williams</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Evaluation of Nox Flue Gas Analyzers for Accuracy and Their Applicability for Low-Concentration Measurements</article-title>. <source>J.&#x20;Air Waste Manag. Assoc.</source> <volume>53</volume>, <fpage>749</fpage>&#x2013;<lpage>758</lpage>. <pub-id pub-id-type="doi">10.1080/10473289.2003.10466208</pub-id> </citation>
</ref>
<ref id="B29">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Goodfellow</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Bengio</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Courville</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). <source>Deep Learning</source>. <publisher-name>MIT press</publisher-name>. </citation>
</ref>
<ref id="B30">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Heywood</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). <source>Internal Combustion Engine Fundamentals</source>. <edition>2nd Edition</edition>. <publisher-loc>New York</publisher-loc>: <publisher-name>McGraw-Hill</publisher-name>. </citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Johri</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Filipi</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Neuro-fuzzy Model Tree Approach to Virtual Sensing of Transient Diesel Soot and Nox Emissions</article-title>. <source>Int. J.&#x20;Engine Res.</source> <volume>15</volume>, <fpage>918</fpage>&#x2013;<lpage>927</lpage>. <pub-id pub-id-type="doi">10.1177/1468087413492962</pub-id> </citation>
</ref>
<ref id="B32">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Khair</surname>
<given-names>M. K.</given-names>
</name>
<name>
<surname>Majewski</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2006</year>). <source>Diesel Emissions and Their Control</source>. <publisher-loc>Warrendale, PA</publisher-loc>: <publisher-name>SAE International</publisher-name>. </citation>
</ref>
<ref id="B33">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Kingma</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Ba</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2014</year>). <source>Adam: A Method for Stochastic Optimization</source>. <comment>arXiv preprint arXiv:1412.6980</comment> </citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Koebel</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Madia</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Elsener</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Selective Catalytic Reduction of No and No2 at Low Temperatures</article-title>. <source>Catal. Today</source> <volume>73</volume>, <fpage>239</fpage>&#x2013;<lpage>247</lpage>. <pub-id pub-id-type="doi">10.1016/s0920-5861(02)00006-8</pub-id> </citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kotsiantis</surname>
<given-names>S. B.</given-names>
</name>
<name>
<surname>Kanellopoulos</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Pintelas</surname>
<given-names>P. E.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Data Preprocessing for Supervised Leaning</article-title>. <source>Int. J.&#x20;Comput. Sci.</source> <volume>1</volume>, <fpage>111</fpage>&#x2013;<lpage>117</lpage>. <pub-id pub-id-type="doi">10.4304/jcp.1.4.30-37</pub-id> </citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lavoie</surname>
<given-names>G. A.</given-names>
</name>
<name>
<surname>Heywood</surname>
<given-names>J.&#x20;B.</given-names>
</name>
<name>
<surname>Keck</surname>
<given-names>J.&#x20;C.</given-names>
</name>
</person-group> (<year>1970</year>). <article-title>Experimental and Theoretical Study of Nitric Oxide Formation in Internal Combustion Engines</article-title>. <source>Combustion Sci. Tech.</source> <volume>1</volume>, <fpage>313</fpage>&#x2013;<lpage>326</lpage>. <pub-id pub-id-type="doi">10.1080/00102206908952211</pub-id> </citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>LeCun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bengio</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Deep Learning</article-title>. <source>Nature</source> <volume>521</volume>, <fpage>436</fpage>&#x2013;<lpage>444</lpage>. <pub-id pub-id-type="doi">10.1038/nature14539</pub-id> </citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Shin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Proposal of a Methodology for Designing Engine Operating Variables Using Predicted Nox Emissions Based on Deep Neural Networks</article-title>. <source>J.&#x20;Mech. Sci. Technol.</source> <volume>35</volume>, <fpage>1747</fpage>&#x2013;<lpage>1756</lpage>. <pub-id pub-id-type="doi">10.1007/s12206-021-0337-2</pub-id> </citation>
</ref>
<ref id="B39">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Maas</surname>
<given-names>A. L.</given-names>
</name>
<name>
<surname>Hannun</surname>
<given-names>A. Y.</given-names>
</name>
<name>
<surname>Ng</surname>
<given-names>A. Y.</given-names>
</name>
</person-group> (<year>2013</year>). &#x201c;<article-title>Rectifier Nonlinearities Improve Neural Network Acoustic Models</article-title>,&#x201d;. <source>Proc. Icml (Citeseer)</source> (<publisher-loc>CA 94305 USA</publisher-loc>: <publisher-name>Computer Science Department, Stanford University</publisher-name>), <volume>30</volume>. </citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mentink</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Seykens</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Escobar Valdivieso</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Development and Application of a Virtual Nox Sensor for Robust Heavy Duty Diesel Engine Emission Control</article-title>. <source>SAE Int. J.&#x20;Engines</source> <volume>10</volume>, <fpage>1297</fpage>&#x2013;<lpage>1304</lpage>. <pub-id pub-id-type="doi">10.4271/2017-01-0951</pub-id> </citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Merryman</surname>
<given-names>E. L.</given-names>
</name>
<name>
<surname>Levy</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>1975</year>). <article-title>Nitrogen Oxide Formation in Flames: The Roles of No2 and Fuel Nitrogen</article-title>. <source>Symp. (International) Combustion</source> <volume>15</volume>, <fpage>1073</fpage>&#x2013;<lpage>1083</lpage>. <pub-id pub-id-type="doi">10.1016/S0082-0784(75)80372-9</pub-id> </citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mobasheri</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Mirsalim</surname>
<given-names>S. M.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Analysis the Effect of Advanced Injection Strategies on Engine Performance and Pollutant Emissions in a Heavy Duty Di-diesel Engine by Cfd Modeling</article-title>. <source>Int. J.&#x20;Heat Fluid Flow</source> <volume>33</volume>, <fpage>59</fpage>&#x2013;<lpage>69</lpage>. <pub-id pub-id-type="doi">10.1016/j.ijheatfluidflow.2011.10.004</pub-id> </citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mohammad</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rezaei</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Hayduk</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Delebinski</surname>
<given-names>T. O.</given-names>
</name>
<name>
<surname>Shahpouri</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Shahbakhti</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Hybrid Physical and Machine Learning-Oriented Modeling Approach to Predict Emissions in a Diesel Compression Ignition Engine</article-title>. <source>SAE Tech. Paper</source>, <fpage>1</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.4271/2021-01-0496</pub-id> </citation>
</ref>
<ref id="B44">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Nair</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>G. E.</given-names>
</name>
</person-group> (<year>2010</year>). &#x201c;<article-title>Rectified Linear Units Improve Restricted Boltzmann Machines</article-title>,&#x201d; in <conf-name>Proceedings of the 27th International Conference on International Conference on Machine Learning</conf-name>, <conf-date>June 2010</conf-date>, <fpage>807</fpage>&#x2013;<lpage>814</lpage>. </citation>
</ref>
<ref id="B45">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Panneer Selvam</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Shekhar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Northrop</surname>
<given-names>W. F.</given-names>
</name>
</person-group> (<year>2021</year>). <source>Prediction of Nox Emissions from Compression Ignition Engines Using Ensemble Learning-Based Models with Physical Interpretability</source>. <comment>SAE Technical Paper</comment>. <pub-id pub-id-type="doi">10.4271/2021-24-0082</pub-id> </citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pedregosa</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Varoquaux</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Gramfort</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Michel</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Thirion</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Grisel</surname>
<given-names>O.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>Scikit-learn: Machine Learning in python</article-title>. <source>J.&#x20;machine Learn. Res.</source> <volume>12</volume>, <fpage>2825</fpage>&#x2013;<lpage>2830</lpage>. </citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Provataris</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Savva</surname>
<given-names>N. S.</given-names>
</name>
<name>
<surname>Chountalas</surname>
<given-names>T. D.</given-names>
</name>
<name>
<surname>Hountalas</surname>
<given-names>D. T.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Prediction of nox emissions for high speed di diesel engines using a semi-empirical, two-zone model</article-title>. <source>Energ. Convers. Manag.</source> <volume>153</volume>, <fpage>659</fpage>&#x2013;<lpage>670</lpage>. <pub-id pub-id-type="doi">10.1016/j.enconman.2017.10.007</pub-id> </citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Refaeilzadeh</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Cross-validation</article-title>. <source>Encyclopedia database Syst.</source> <volume>5</volume>, <fpage>532</fpage>&#x2013;<lpage>538</lpage>. <pub-id pub-id-type="doi">10.1007/978-0-387-39940-9_565</pub-id> </citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rumelhart</surname>
<given-names>D. E.</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>G. E.</given-names>
</name>
<name>
<surname>Williams</surname>
<given-names>R. J.</given-names>
</name>
</person-group> (<year>1986</year>). <article-title>Learning Representations by Back-Propagating Errors</article-title>. <source>nature</source> <volume>323</volume>, <fpage>533</fpage>&#x2013;<lpage>536</lpage>. <pub-id pub-id-type="doi">10.1038/323533a0</pub-id> </citation>
</ref>
<ref id="B50">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Sankararaman</surname>
<given-names>K. A.</given-names>
</name>
<name>
<surname>De</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>W. R.</given-names>
</name>
<name>
<surname>Goldstein</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>The Impact of Neural Network Overparameterization on Gradient Confusion and Stochastic Gradient Descent</article-title>,&#x201d; in <source>International Conference on Machine Learning</source> (<publisher-loc>Brookline, MA</publisher-loc>: <publisher-name>PMLR</publisher-name>), <fpage>8469</fpage>&#x2013;<lpage>8479</lpage>. </citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Min</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Deep Neural Network Model with Bayesian Hyperparameter Optimization for Prediction of Nox at Transient Conditions in a Diesel Engine</article-title>. <source>Eng. Appl. Artif. Intelligence</source> <volume>94</volume>, <fpage>103761</fpage>. <pub-id pub-id-type="doi">10.1016/j.engappai.2020.103761</pub-id> </citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Silver</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Maddison</surname>
<given-names>C. J.</given-names>
</name>
<name>
<surname>Guez</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sifre</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Van Den Driessche</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Mastering the Game of Go with Deep Neural Networks and Tree Search</article-title>. <source>nature</source> <volume>529</volume>, <fpage>484</fpage>&#x2013;<lpage>489</lpage>. <pub-id pub-id-type="doi">10.1038/nature16961</pub-id> </citation>
</ref>
<ref id="B53">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Smith</surname>
<given-names>L. N.</given-names>
</name>
</person-group> (<year>2018</year>). <source>A Disciplined Approach to Neural Network Hyper-Parameters: Part 1&#x2013;learning Rate, Batch Size, Momentum, and Weight Decay</source>. <comment>arXiv preprint arXiv:1803.09820</comment>. </citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Srivastava</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Krizhevsky</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sutskever</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Salakhutdinov</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Dropout: A Simple Way to Prevent Neural Networks from Overfitting</article-title>. <source>J.&#x20;Machine Learn. Res.</source> <volume>15</volume>, <fpage>1929</fpage>&#x2013;<lpage>1958</lpage>. </citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stone</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>1978</year>). <article-title>Cross-validation:a Review2</article-title>. <source>Ser. Stat.</source> <volume>9</volume>, <fpage>127</fpage>&#x2013;<lpage>139</lpage>. <pub-id pub-id-type="doi">10.1080/02331887808801414</pub-id> </citation>
</ref>
<ref id="B56">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Shrivastava</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Gupta</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Revisiting Unreasonable Effectiveness of Data in Deep Learning Era</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE international conference on computer vision</conf-name>, <conf-loc>Venice, Italy</conf-loc>, <conf-date>Oct. 2017</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>843</fpage>&#x2013;<lpage>852</lpage>. <pub-id pub-id-type="doi">10.1109/iccv.2017.97</pub-id> </citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wan</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Influence of Feature Scaling on Convergence of Gradient Iterative Algorithm</article-title>. <source>J.&#x20;Phys. Conf. Ser.</source> <volume>1213</volume>, <fpage>032021</fpage>. <pub-id pub-id-type="doi">10.1088/1742-6596/1213/3/032021</pub-id> </citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Winkler</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Anderson</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Garza</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ruona</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Vogt</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wallington</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Vehicle Criteria Pollutant (Pm, No X, Co, Hcs) Emissions: How Low Should We Go</article-title>. <source>npj&#x20;Clim. Atmos. Sci.</source> <volume>1</volume>, <fpage>1</fpage>&#x2013;<lpage>5</lpage>. <pub-id pub-id-type="doi">10.1038/s41612-018-0037-5</pub-id> </citation>
</ref>
<ref id="B59">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>You</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Long</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jordan</surname>
<given-names>M. I.</given-names>
</name>
</person-group> (<year>2019</year>). <source>How Does Learning Rate Decay Help Modern Neural Networks</source>. <comment>arXiv:1908.01878 (cs, stat)</comment>. </citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Shah</surname>
<given-names>A. N.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A Novel Deep Learning Approach to Predict the Instantaneous NO&#x2093; Emissions from Diesel Engine</article-title>. <source>IEEE Access</source> <volume>9</volume>, <fpage>11002</fpage>&#x2013;<lpage>11013</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2021.3050165</pub-id> </citation>
</ref>
<ref id="B61">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Pennycott</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Burke</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Akehurst</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Brace</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2015</year>). <source>Predicting the Nitrogen Oxides Emissions of a Diesel Engine Using Neural Networks</source>. <publisher-loc>Warrendale, PA</publisher-loc>: <publisher-name>SAE International</publisher-name>. <pub-id pub-id-type="doi">10.4271/2015-01-1626</pub-id> </citation>
</ref>
</ref-list>
</back>
</article>