<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Chem.</journal-id>
<journal-title>Frontiers in Chemistry</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Chem.</abbrev-journal-title>
<issn pub-type="epub">2296-2646</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1353745</article-id>
<article-id pub-id-type="doi">10.3389/fchem.2024.1353745</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Chemistry</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Quantitative analysis of pyrolysis characteristics and chemical components of tobacco materials based on machine learning</article-title>
<alt-title alt-title-type="left-running-head">Wu et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fchem.2024.1353745">10.3389/fchem.2024.1353745</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Wu</surname>
<given-names>Zhifeng</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2599362/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Qi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yu</surname>
<given-names>Hongxiao</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Fu</surname>
<given-names>Lili</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yang</surname>
<given-names>Zhen</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lu</surname>
<given-names>Yan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Guo</surname>
<given-names>Zhongya</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Yasen</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhou</surname>
<given-names>Xiansheng</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Yingjie</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname>
<given-names>Le</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Zhengzhou Tobacco Research Institute of CNTC</institution>, <addr-line>Zhengzhou</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Technology Center</institution>, <institution>China Tobacco Shandong Industrial Co., Ltd.</institution>, <addr-line>Jinan</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Ministry and Municipality Jointly Build the Key Laboratory of Sichuan Province for Efficient Utilization of Domestic Cigar Tobacco Leaf Industry</institution>, <addr-line>Chengdu</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Technology Center</institution>, <institution>China Tobacco Guangdong Industrial Co., Ltd.</institution>, <addr-line>Guangzhou</addr-line>, <country>China</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Qingzhou Cigarette Factory</institution>, <institution>China Tobacco Shandong Industrial Co., Ltd.</institution>, <addr-line>Qinzhou</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2562879/overview">Cai Liang</ext-link>, Southeast University, China</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1990899/overview">Shiyong Wu</ext-link>, East China University of Science and Technology, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2603823/overview">Ziliang Wang</ext-link>, Shandong University, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Le Wang, <email>wangl@ztri.com.cn</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>06</day>
<month>02</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>12</volume>
<elocation-id>1353745</elocation-id>
<history>
<date date-type="received">
<day>11</day>
<month>12</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>02</day>
<month>01</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Wu, Zhang, Yu, Fu, Yang, Lu, Guo, Li, Zhou, Liu and Wang.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Wu, Zhang, Yu, Fu, Yang, Lu, Guo, Li, Zhou, Liu and Wang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>To investigate the quantitative relationship between the pyrolysis characteristics and chemical components of tobacco materials, various machine learning methods were used to establish a quantitative analysis model of tobacco. The model relates the thermal weight loss rate to 19 chemical components, and identifies the characteristic temperature intervals of the pyrolysis process that significantly relate to the chemical components. The results showed that: 1) Among various machine learning methods, partial least squares (PLS), support vector regression (SVR) and Gaussian process regression (GPR) demonstrated superior regression performance on thermogravimetric data and chemical components. 2) The PLS model showed the best performance on fitting and prediction effects, and has good generalization ability to predict the 19 chemical components. For most components, the determination coefficients <italic>R</italic>
<sup>2</sup> are above 0.85. While the performance of SVR and GPR models was comparable, the <italic>R</italic>
<sup>2</sup> for most chemical components were below 0.75. 3) The significant temperature intervals for various chemical components were different, and most of the affected temperature intervals were within 130&#xb0;C&#x2013;400&#xb0;C. The results can provide a reference for the materials selection of cigarette and reveal the possible interactions of various chemical components of tobacco materials in the pyrolysis process.</p>
</abstract>
<kwd-group>
<kwd>tobacco material</kwd>
<kwd>chemical components</kwd>
<kwd>thermogravimetric analysis</kwd>
<kwd>machine learning</kwd>
<kwd>characteristic temperature range</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Green and Sustainable Chemistry</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>The chemical components of tobacco raw materials affects the quality of tobacco and its products (<xref ref-type="bibr" rid="B20">Thielen et al., 2008</xref>). The conventional chemical components of tobacco raw materials such as total sugars, reducing sugars, nicotine, total nitrogen, potassium and chlorine are widely used in the formulation design of cigarette (<xref ref-type="bibr" rid="B22">Xia et al., 2009</xref>; <xref ref-type="bibr" rid="B4">Chen et al., 2021</xref>), quality control of raw material (<xref ref-type="bibr" rid="B19">Tang et al., 2020</xref>; <xref ref-type="bibr" rid="B14">Losso et al., 2022</xref>), and grading of raw material (<xref ref-type="bibr" rid="B12">Kr&#xfc;semann et al., 2019</xref>; <xref ref-type="bibr" rid="B13">Kurt, 2021</xref>) At present, the continuous flow analyzer method is mainly used to detect the content of conventional chemical components of tobacco. However, this method is complex, time-consuming, and environmental pollution caused by the consumption of a large number of organic reagents during the detection process (<xref ref-type="bibr" rid="B16">Peng et al., 2022</xref>).</p>
<p>In view of the problems of long cycle time and poor reproducibility of conventional chemical content determination, Near Infrared Reflectance Spectroscopy (NIRS) has become an important method for the quantitative analysis of tobacco chemical components due to its simple sample preparation and fast analysis speed (<xref ref-type="bibr" rid="B7">Duan et al., 2012</xref>). The combination of chemical compositional measures with spectroscopic methods capable of characterizing the chemical information of a sample has been widely used in many practical applications. For example, <xref ref-type="bibr" rid="B3">Bi et al. (2015)</xref> combined infrared spectral data from petrol and tobacco with machine learning algorithms to show superior prediction performance for octane number in petrol and nicotine content prediction in tobacco, respectively. <xref ref-type="bibr" rid="B24">Zhang et al. (2008)</xref> analyzed the near-infrared spectral differences of tobacco samples, it was found that the support vector machine method can be effective for rapid and accurate analysis of conventional chemical components in tobacco. <xref ref-type="bibr" rid="B21">Wei et al. (2022)</xref> utilized NIRS and machine learning method to realize online monitoring of moisture, starch and other components of tobacco raw material.</p>
<p>The pyrolysis of tobacco, as a special type of biomass, is a very complex process due to the simultaneous presence of cellulose, hemicellulose, lignin and extractives in different ratios, resulting in multiple simultaneous reactions occurring in series parallel (<xref ref-type="bibr" rid="B1">Balsora et al., 2022</xref>; <xref ref-type="bibr" rid="B9">Guo et al., 2022</xref>). Tobacco components are degraded by different mechanisms and pathways in different temperature ranges. Pyrolysis as a thermochemical conversion process can be used to extract valuable chemicals from tobacco biomass. In addition to the NIR method discussed earlier, Thermogravimetry Analysis (TGA) is also widely used in classification, sensory quality evaluation and aroma type judgment of tobacco raw material (<xref ref-type="bibr" rid="B8">Guo et al., 2019</xref>; <xref ref-type="bibr" rid="B5">Danieli et al., 2022</xref>; <xref ref-type="bibr" rid="B10">Heng et al., 2022</xref>). In addition, pyrolysis data were used to predict pyrolysis products and to model kinetics in many previous studies. For example, <xref ref-type="bibr" rid="B18">Sun et al. (2016)</xref> developed and trained an artificial neural network (ANN) to investigate the effects of operating temperature, biomass particle size and space velocity on the pyrolysis products of pine shavings, and good agreement was achieved between the experimental and simulation results. <xref ref-type="bibr" rid="B23">Yin et al. (2021)</xref> classified tobacco raw materials and growth region using TGA data information sources by the SVM method, and achieved high-precision classification of tobacco raw material grade and growth region.</p>
<p>Based on the pyrolysis characteristics of tobacco, the biggest obstacle to building fast and accurate models for quantitative chemical composition analysis is the complex, non-linear relationship between the pyrolysis behavior of tobacco and its complex chemical composition (<xref ref-type="bibr" rid="B17">Strandberg et al., 2017</xref>). The potential of machine learning methods to reveal the relationship between several independent variables and several dependent variables is considerable (<xref ref-type="bibr" rid="B11">Jordan and Mitchell, 2015</xref>; <xref ref-type="bibr" rid="B6">Dobbelaere et al., 2021</xref>). Machine learning methods have been shown to be powerful in dealing with non-linear problems, for example, by using Partial Least Squares (PLS) to model the fitted data or by putting complex relationships into black boxes to build neural network models, both of which are effective in solving non-linear problems associated with complex data. This has been demonstrated in many chemical analyses related to spectra. However, it is rarely reported the quantitative analysis of pyrolysis characteristics and chemical components of tobacco raw materials by TGA methods. Based on the above background, the aim of this study is to model the quantitative relationship between pyrolysis properties and chemical composition of tobacco using machine learning methods. Specifically, 157 tobacco samples were first analyzed chemically and thermogravimetrically, respectively. Then the DTG data and chemical composition were used as inputs to establish quantitative relationships using different machine learning models, and the preferred models were used to screen the characteristic temperature intervals.</p>
<p>The study has the following novelty. Firstly, it is the first to directly model the quantitative analysis between the pyrolysis behavior of tobacco and 19 types of chemical information. Second, this paper finds the best model applicable between complex thermogravimetric data and chemical information by comparing different machine learning methods. Third, the temperature intervals with high correlation between different chemicals corresponding to the pyrolysis reaction process are screened out, which can provide a basis for the possible synergistic, coupling and other interaction effects of different chemicals in the pyrolysis process.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Materials and methods</title>
<sec id="s2-1">
<title>2.1 Materials and sample preparation</title>
<p>The 157 tobacco samples originated from Brazil, Zimbabwe, and 104 counties in six provinces in China&#x2019;s major tobacco producing regions, including Henan, Yunnan, and Guizhou provinces. The tobacco collection years included 2017, 2018, and 2019. Tobacco can be classified into 35 grades according to the national standard GB 2635-1992.After being placed in a constant temperature and humidity chamber with a temperature of (22 &#xb1; 1) &#xb0;C and relative humidity of (60 &#xb1; 2)% for 48&#xa0;h to reach equilibrium, the tobacco leaf samples were pulverized by means of a high-speed grinder and screened by a 60 mesh (250&#xa0;&#x3bc;m) sieve for further use.</p>
<p>A total of 157 tobacco samples were studied. The chemical information included total phytoaloids, reducing sugars, total sugars, total nitrogen, potassium, chloride, starch, dichloromethane extract, solanasol, phosphate, magnesium, calcium, polyphenols, refractory acid, total amino acids, amadori compounds, neophytadiene, and PH. Tobacco samples were treated as solution according to tobacco industry standards and then analysed directly for total phytoaloids, reducing sugars, total sugars, total nitrogen, potassium, chloride, starch, dichloromethane extract, phosphate, magnesium, calcium, refractory acids, total amino acids, amadori compounds, neophytadiene, using a flow analyser (Alliance-Futura). The content of polyphenols, solanasol were determined using a liquid chromatograph. PH values were measured by a Mettler-Toledo Seven Compact PH meter.</p>
</sec>
<sec id="s2-2">
<title>2.2 Methods</title>
<sec id="s2-2-1">
<title>2.2.1 Thermogravimetric analysis</title>
<p>Thermogravimetric analyses of tobacco samples were finished by using discovery thermogravimetric Analyzer produced by TA Instruments. Weighing (10.0 &#xb1; 0.5) mg tobacco powder for the thermogravimetric test, the flow rate of purge gas (nitrogen) in the reaction zone of the thermogravimetric analyzer was set at 30&#xa0;mL/min, and the flow rate of protection gas (nitrogen) was set at 20&#xa0;mL/min. The samples were heated up from 40&#xb0;C to 105&#xb0;C at a rate of 10&#xb0;C/min and kept for 30&#xa0;min to remove the water in the samples, then heated up to 800&#xb0;C at an elevated temperature rate of 10&#xb0;C/min. During the test, 120 data points were recorded per minute for each sample, and the time-dependent mass loss data in the range of 105&#x2013;800&#xb0;C were selected. The corresponding DTG results were then obtained by normalizing and differencing the temperature-based TGA curves.</p>
</sec>
<sec id="s2-2-2">
<title>2.2.2 Machine learning methods</title>
<p>Partial Least Squares Regression (PLS) is a multivariate statistical analysis method that extracts the latent variables with the highest correlation to the dependent variable by reducing the dimensionality of the independent variables, and then performs regression analysis on these latent variables. Compared to Principal Component Regression (PCR), PLS combines the advantages of multivariate linear regression methods and considers the relationship between independent variables and dependent variables in the selection of latent components, making it effective in handling high-dimensional data. Based on Bayesian methods, Gaussian Process Regression (GPR) is a non-parametric regression method. GPR uses a Gaussian process as a prior distribution of the data and updates the posterior distribution based on observed data to predict the new input. Support Vector Regression (SVR) is a regression method based on Support Vector Machines, minimizing loss and maximizing margin to derive the model. SVR can perform linear regression in a multidimensional feature space and achieve non-linear regression through the use of kernel functions. Random Forest Regression (RF) is a regression algorithm based on ensemble learning that constructs multiple decision trees and averages their predictions to obtain the final result. Neural Networks (NN) estimate or approximately estimate functions by connecting a large number of neurons, which can handle high-dimensional data and adapt to non-linear or complex data relationships.</p>
</sec>
<sec id="s2-2-3">
<title>2.2.3 Model evaluation index</title>
<p>There are various metrics for machine learning to assess the effectiveness of model fitting. Root Mean Squared Error (RMSE) and coefficient of determination (<italic>R</italic>
<sup>2</sup>) are usually used to evaluate the regression models. RMSE measures the degree of deviation between the predicted values and the actual values, where a smaller RMSE indicates a higher level of model fitting accuracy. <italic>R</italic>
<sup>2</sup> measures the extent to which the model can explain the variability in the data, also known as the coefficient of determination. It ranges from 0 to 1. The values closer to 1 indicate an improved fit of the model to the data, and closer to 0 indicate an inferior fit.<disp-formula id="equ1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ2">
<mml:math id="m2">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
</sec>
</sec>
<sec id="s2-3">
<title>2.3 Data processing</title>
<p>In the analysis of pyrolysis characteristic parameters of tobacco leaf samples, it is necessary to confirm the reliability of the pyrolysis characteristic data, which relies on a good repeatability of thermogravimetric experiments. <xref ref-type="fig" rid="F1">Figure 1</xref> illustrates the DTG curves of three repeated experiments carried out on a particular sample, and it can be visually observed that there are no significant differences between the three experiments. In addition, the differences between the three experiments are described quantitatively using the Normalized Root Mean Squared Error (NRMSE). NRMSE between trial 1 and trial 2 is 1.85%, and NRMSE between trial 1 and trial 3 is 0.57%. It indicates that there is a good repeatability for the thermogravimetric experiments, which is sufficient to meet the experimental requirements.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Repeatability comparison chart.</p>
</caption>
<graphic xlink:href="fchem-12-1353745-g001.tif"/>
</fig>
<p>Since the temperature intervals of the original differential thermogravimetric data (DTG) are not equal, interpolation is required to obtain thermogravimetric data from different tobacco samples at the same temperature points. The temperature range of the interpolation is from 105&#xb0;C to 900&#xb0;C, with a temperature interval of 0.1&#xb0;C. The DTG curves of 157 samples are shown in <xref ref-type="fig" rid="F2">Figure 2</xref>, with a total of 8436 points obtained for each sample. As can be seen from <xref ref-type="fig" rid="F2">Figure 2</xref>, in general, the DTG curves of the 157 tobacco samples were similar in shape. The differences in the DTG curves between the tobacco samples were mainly reflected before 500&#xb0;C, with less variability in the rest of the temperature range. This is because there are two distinct stages in the pyrolysis process of tobacco. The first stage is 100&#xb0;C&#x2013;230&#xb0;C, this stage is mainly monosaccharides, free amino acids and other thermally unstable, volatile components degradation. The second stage is at 230&#xb0;C&#x2013;500&#xb0;C, which is mainly the pyrolysis of biological components such as hemicellulose, cellulose and lignin of tobacco species (<xref ref-type="bibr" rid="B2">Barontini et al., 2013</xref>; <xref ref-type="bibr" rid="B15">Ma et al., 2022</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>DTG curves of 157 samples.</p>
</caption>
<graphic xlink:href="fchem-12-1353745-g002.tif"/>
</fig>
<p>For each chemical index, the maximum and minimum values were counted, then the distribution range was divided into a number of intervals, the number of samples in each interval was counted, and the distribution probability was calculated to make a densities plot of the distribution of the chemical content of tobacco samples. <xref ref-type="fig" rid="F3">Figure 3</xref> shows the distribution densities of 19 chemical constituents of 157 tobacco samples. The horizontal axis represents the interval of content values, and the vertical axis represents the probability density distribution. It can be seen from the figure that the distributions are not uniform for various chemical constituents.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Density distribution of 19 chemical components in 157 samples.</p>
</caption>
<graphic xlink:href="fchem-12-1353745-g003.tif"/>
</fig>
<p>The linear correlation coefficients among 19 chemical components of 157 tobacco samples are shown in <xref ref-type="fig" rid="F4">Figure 4</xref>. The values in the heat map reflect the strength of the correlation between the chemical components (the higher the value, the stronger the correlation). It can be seen that the correlation coefficients between most components are very low, and the correlation coefficients above 0.9 are only total sugar, reducing sugar, methylene chloride extract, solanesol, calcium and nonvolatile acid. These results suggest that each chemical components needs to be modelled independently due to the dependence between different chemical components is minimal. In the end, we considered all 19 chemical information as features as inputs for machine learning.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Chemical components correlation coefficient matrix.</p>
</caption>
<graphic xlink:href="fchem-12-1353745-g004.tif"/>
</fig>
</sec>
</sec>
<sec sec-type="results|discussion" id="s3">
<title>3 Results and discussion</title>
<sec id="s3-1">
<title>3.1 Performance comparison of different machine learning models</title>
<p>To explore quantitative analysis models between DTG data and chemical components, Partial Least Squares (PLS), Support Vector Regression (SVR), Gaussian Process Regression (GPR), Multiple Linear Regression (MLS), Random Forest (RF) and Shallow Neural Network (SNN) were used to establish fitting models for the representative chemical components of tobacco materials, including total alkaloids, reducing sugars, and total nitrogen. The regression performance of each model on the training set and validation set are shown in <xref ref-type="table" rid="T1">Table 1</xref>. It can be observed that for the three chemical components, the <italic>R</italic>
<sup>2</sup> of the MPL, RF and RNN model training sets and test sets are below 0.7, indicating poor fitting performance for these three models on the high-dimensional DTG data. Among them, the <italic>R</italic>
<sup>2</sup> of MPL for the three chemical components are all less than zero, indicating that linear regression cannot fit DTG data effectively. On the other hand, the R2 of the training set and test set of the PLS, SVR and GPR models are all above 0.7, indicating good fitting and prediction performance relatively. Therefore, PLS, SVR and GPR models are selected to build quantitative analysis models between DTG data and 19 chemical components for comparison in the following analysis.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Performance comparison of different models.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="center"/>
<th rowspan="2" align="center">Components</th>
<th colspan="2" align="center">PLS</th>
<th colspan="2" align="center">SVR</th>
<th colspan="2" align="center">GPR</th>
<th colspan="2" align="center">MPL</th>
<th colspan="2" align="center">RF</th>
<th colspan="2" align="center">SNN</th>
</tr>
<tr>
<th align="center">
<italic>R</italic>
<sup>2</sup>
</th>
<th align="center">RMSE</th>
<th align="center">
<italic>R</italic>
<sup>2</sup>
</th>
<th align="center">RMSE</th>
<th align="center">
<italic>R</italic>
<sup>2</sup>
</th>
<th align="center">RMSE</th>
<th align="center">
<italic>R</italic>
<sup>2</sup>
</th>
<th align="center">RMSE</th>
<th align="center">
<italic>R</italic>
<sup>2</sup>
</th>
<th align="center">RMSE</th>
<th align="center">
<italic>R</italic>
<sup>2</sup>
</th>
<th align="center">RMSE</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="center">Training set</td>
<td align="center">Total phytoaloids %</td>
<td align="center">0.97</td>
<td align="center">0.098</td>
<td align="center">0.73</td>
<td align="center">0.334</td>
<td align="center">0.77</td>
<td align="center">0.306</td>
<td align="center">&#x2212;0.18</td>
<td align="center">0.615</td>
<td align="center">0.59</td>
<td align="center">0.3625</td>
<td align="center">0.46</td>
<td align="center">0.414</td>
</tr>
<tr>
<td align="center">Reducing sugar %</td>
<td align="center">0.95</td>
<td align="center">0.839</td>
<td align="center">0.76</td>
<td align="center">1.822</td>
<td align="center">0.78</td>
<td align="center">1.844</td>
<td align="center">&#x2212;0.87</td>
<td align="center">5.464</td>
<td align="center">0.67</td>
<td align="center">2.3032</td>
<td align="center">0.31</td>
<td align="center">3.324</td>
</tr>
<tr>
<td align="center">Total nitrogen %</td>
<td align="center">0.90</td>
<td align="center">0.090</td>
<td align="center">0.85</td>
<td align="center">0.110</td>
<td align="center">0.85</td>
<td align="center">0.11</td>
<td align="center">&#x2212;0.9</td>
<td align="center">0.381</td>
<td align="center">0.57</td>
<td align="center">0.1817</td>
<td align="center">0.30</td>
<td align="center">0.231</td>
</tr>
<tr>
<td rowspan="3" align="center">Test set</td>
<td align="center">Total phytoaloids %</td>
<td align="center">0.87</td>
<td align="center">0.299</td>
<td align="center">0.71</td>
<td align="center">0.278</td>
<td align="center">0.76</td>
<td align="center">0.267</td>
<td align="center">&#x2212;1.24</td>
<td align="center">2.619</td>
<td align="center">0.41</td>
<td align="center">0.684</td>
<td align="center">0.33</td>
<td align="center">0.874</td>
</tr>
<tr>
<td align="center">Reducing sugar %</td>
<td align="center">0.93</td>
<td align="center">2.036</td>
<td align="center">0.71</td>
<td align="center">2.021</td>
<td align="center">0.73</td>
<td align="center">1.922</td>
<td align="center">&#x2212;3.52</td>
<td align="center">8.429</td>
<td align="center">0.53</td>
<td align="center">2.814</td>
<td align="center">0.18</td>
<td align="center">4.924</td>
</tr>
<tr>
<td align="center">Total nitrogen %</td>
<td align="center">0.88</td>
<td align="center">0.136</td>
<td align="center">0.81</td>
<td align="center">0.129</td>
<td align="center">0.80</td>
<td align="center">0.132</td>
<td align="center">&#x2212;2.67</td>
<td align="center">1.727</td>
<td align="center">0.48</td>
<td align="center">0.205</td>
<td align="center">0.21</td>
<td align="center">0.289</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3-2">
<title>3.2 Comparison of fitting performance for different chemical components</title>
<p>It is crucial to select the number of latent variables in the PLS modeling process. The model will be overfitted when there are too many latent variables, while selecting too few latent variables will result in loss of sample information and insufficient model fitting. Therefore, the number of latent variables that cumulative contribution rate of each variable reached 90% was set in the experiment, and considering the root mean square error of cross-validation (RMSECV), the number of latent variables with a cumulative contribution rate of about 90% and relatively small RMSECV were selected comprehensively.</p>
<p>When establishing the SVR model, the cross-validation loss is used as the goal to find the optimal penalty parameters. Considering that the GPR model performance of different kernel functions may be different, the square exponential kernel (SE), exponential kernel (Exp) and rational quadratic kernel (RQ) were selected to build models respectively, and the performance average of the three models were taken as the output result of the GPR model.</p>
<p>The performance comparison of the PLS, SVR and GPR models is shown in <xref ref-type="table" rid="T2">Table 2</xref>. It can be observed that for the 19 components predicted, the PLS model has higher <italic>R</italic>
<sup>2</sup> values (0.76&#x2013;0.99) and most of them are above 0.85. Moreover, it has lower RMSE for most chemical components, indicating its optimal performance. For 19 chemical components, the performance of SVR and GPR models are similar, with only a few <italic>R</italic>
<sup>2</sup> values above 0.8 and most <italic>R</italic>
<sup>2</sup> values below 0.75, but some RMSE values are smaller than PLS. The <italic>R</italic>
<sup>2</sup> variation between the training sets and test sets of PLS is small, indicating that the model effectively balances fitting performance and generalization ability when selecting the number of latent variables.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Comparison results of performance parameters of each model.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="center">Components</th>
<th rowspan="2" align="center">Model</th>
<th colspan="2" align="center">Training set</th>
<th colspan="2" align="center">Test set</th>
<th rowspan="2" align="center">Latent variables</th>
</tr>
<tr>
<th align="center">
<italic>R</italic>
<sup>2</sup>
</th>
<th align="center">RMSE</th>
<th align="center">
<italic>R</italic>
<sup>2</sup>
</th>
<th align="center">RMSE</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="center">Total phytoaloids %</td>
<td align="center">PLS</td>
<td align="center">0.97</td>
<td align="center">0.098</td>
<td align="center">0.87</td>
<td align="center">0.299</td>
<td align="center">18</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.73</td>
<td align="center">0.334</td>
<td align="center">0.71</td>
<td align="center">0.278</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.77</td>
<td align="center">0.306</td>
<td align="center">0.76</td>
<td align="center">0.267</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">Reducing sugar %</td>
<td align="center">PLS</td>
<td align="center">0.95</td>
<td align="center">0.839</td>
<td align="center">0.93</td>
<td align="center">2.036</td>
<td align="center">16</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.76</td>
<td align="center">1.822</td>
<td align="center">0.71</td>
<td align="center">2.021</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.76</td>
<td align="center">1.721</td>
<td align="center">0.73</td>
<td align="center">1.922</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">Total sugar %</td>
<td align="center">PLS</td>
<td align="center">1.00</td>
<td align="center">0.328</td>
<td align="center">0.88</td>
<td align="center">1.743</td>
<td align="center">24</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.79</td>
<td align="center">2.028</td>
<td align="center">0.72</td>
<td align="center">2.249</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.80</td>
<td align="center">1.994</td>
<td align="center">0.75</td>
<td align="center">1.532</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">Total nitrogen %</td>
<td align="center">PLS</td>
<td align="center">0.90</td>
<td align="center">0.090</td>
<td align="center">0.88</td>
<td align="center">0.136</td>
<td align="center">6</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.85</td>
<td align="center">0.110</td>
<td align="center">0.81</td>
<td align="center">0.129</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.85</td>
<td align="center">0.110</td>
<td align="center">0.80</td>
<td align="center">0.132</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">Potassium %</td>
<td align="center">PLS</td>
<td align="center">0.89</td>
<td align="center">0.273</td>
<td align="center">0.86</td>
<td align="center">0.104</td>
<td align="center">8</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.68</td>
<td align="center">0.296</td>
<td align="center">0.66</td>
<td align="center">0.297</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.71</td>
<td align="center">0.280</td>
<td align="center">0.68</td>
<td align="center">0.207</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">Chlorine %</td>
<td align="center">PLS</td>
<td align="center">1.00</td>
<td align="center">0.014</td>
<td align="center">0.89</td>
<td align="center">0.123</td>
<td align="center">18</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.53</td>
<td align="center">0.139</td>
<td align="center">0.48</td>
<td align="center">0.150</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.55</td>
<td align="center">0.136</td>
<td align="center">0.47</td>
<td align="center">0.150</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">PH</td>
<td align="center">PLS</td>
<td align="center">0.90</td>
<td align="center">0.044</td>
<td align="center">0.89</td>
<td align="center">0.083</td>
<td align="center">14</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.67</td>
<td align="center">0.079</td>
<td align="center">0.66</td>
<td align="center">0.077</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.70</td>
<td align="center">0.083</td>
<td align="center">0.68</td>
<td align="center">0.059</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">Starch %</td>
<td align="center">PLS</td>
<td align="center">0.98</td>
<td align="center">0.152</td>
<td align="center">0.87</td>
<td align="center">0.633</td>
<td align="center">22</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.68</td>
<td align="center">0.716</td>
<td align="center">0.63</td>
<td align="center">0.707</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.65</td>
<td align="center">0.741</td>
<td align="center">0.63</td>
<td align="center">0.708</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">Dichloromethane extract %</td>
<td align="center">PLS</td>
<td align="center">0.93</td>
<td align="center">0.216</td>
<td align="center">0.89</td>
<td align="center">0.488</td>
<td align="center">25</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.50</td>
<td align="center">1.234</td>
<td align="center">0.46</td>
<td align="center">1.624</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.47</td>
<td align="center">1.492</td>
<td align="center">0.43</td>
<td align="center">1.788</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">Solanesol mg/g</td>
<td align="center">PLS</td>
<td align="center">0.97</td>
<td align="center">0.606</td>
<td align="center">0.90</td>
<td align="center">2.106</td>
<td align="center">16</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.53</td>
<td align="center">2.395</td>
<td align="center">0.49</td>
<td align="center">2.085</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.59</td>
<td align="center">2.239</td>
<td align="center">0.59</td>
<td align="center">1.634</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">Sulfate mg/g</td>
<td align="center">PLS</td>
<td align="center">0.92</td>
<td align="center">1.208</td>
<td align="center">0.87</td>
<td align="center">2.516</td>
<td align="center">12</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.67</td>
<td align="center">2.289</td>
<td align="center">0.60</td>
<td align="center">2.202</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.79</td>
<td align="center">1.815</td>
<td align="center">0.78</td>
<td align="center">2.057</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">Phosphate mg/g</td>
<td align="center">PLS</td>
<td align="center">0.90</td>
<td align="center">0.275</td>
<td align="center">0.84</td>
<td align="center">0.245</td>
<td align="center">10</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.61</td>
<td align="center">0.299</td>
<td align="center">0.58</td>
<td align="center">0.289</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.65</td>
<td align="center">0.281</td>
<td align="center">0.63</td>
<td align="center">0.187</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">Magnesium %</td>
<td align="center">PLS</td>
<td align="center">0.99</td>
<td align="center">0.009</td>
<td align="center">0.78</td>
<td align="center">0.073</td>
<td align="center">26</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.44</td>
<td align="center">0.066</td>
<td align="center">0.38</td>
<td align="center">0.082</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.48</td>
<td align="center">0.063</td>
<td align="center">0.41</td>
<td align="center">0.076</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">Calcium %</td>
<td align="center">PLS</td>
<td align="center">0.97</td>
<td align="center">0.119</td>
<td align="center">0.86</td>
<td align="center">0.312</td>
<td align="center">15</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.48</td>
<td align="center">0.402</td>
<td align="center">0.43</td>
<td align="center">0.494</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.55</td>
<td align="center">0.369</td>
<td align="center">0.49</td>
<td align="center">0.399</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">Polyphenols</td>
<td align="center">PLS</td>
<td align="center">0.80</td>
<td align="center">2.634</td>
<td align="center">0.76</td>
<td align="center">2.554</td>
<td align="center">8</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.48</td>
<td align="center">2.944</td>
<td align="center">0.44</td>
<td align="center">2.972</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.36</td>
<td align="center">3.243</td>
<td align="center">0.28</td>
<td align="center">2.073</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">Refractory acid</td>
<td align="center">PLS</td>
<td align="center">0.99</td>
<td align="center">2.214</td>
<td align="center">0.95</td>
<td align="center">7.702</td>
<td align="center">19</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.78</td>
<td align="center">9.231</td>
<td align="center">0.75</td>
<td align="center">7.828</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.81</td>
<td align="center">7.518</td>
<td align="center">0.79</td>
<td align="center">6.837</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">Amino acids</td>
<td align="center">PLS</td>
<td align="center">1.00</td>
<td align="center">0.093</td>
<td align="center">0.99</td>
<td align="center">1.899</td>
<td align="center">30</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.75</td>
<td align="center">1.707</td>
<td align="center">0.72</td>
<td align="center">1.925</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.75</td>
<td align="center">1.717</td>
<td align="center">0.73</td>
<td align="center">1.253</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">Amadori compounds</td>
<td align="center">PLS</td>
<td align="center">0.99</td>
<td align="center">0.425</td>
<td align="center">0.96</td>
<td align="center">1.680</td>
<td align="center">20</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.81</td>
<td align="center">1.645</td>
<td align="center">0.75</td>
<td align="center">1.864</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.78</td>
<td align="center">1.758</td>
<td align="center">0.76</td>
<td align="center">1.777</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td rowspan="3" align="center">Neophytadiene mg/g</td>
<td align="center">PLS</td>
<td align="center">0.90</td>
<td align="center">0.082</td>
<td align="center">0.78</td>
<td align="center">0.105</td>
<td align="center">11</td>
</tr>
<tr>
<td align="center">SVR</td>
<td align="center">0.42</td>
<td align="center">0.101</td>
<td align="center">0.33</td>
<td align="center">0.089</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">GPR</td>
<td align="center">0.37</td>
<td align="center">0.106</td>
<td align="center">0.34</td>
<td align="center">0.064</td>
<td align="center">&#x2014;</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>
<xref ref-type="fig" rid="F5">Figure 5</xref> shows the fitting performance of the three models for each chemical composition in the training and test sets (only three compositions are listed, the details of the models for the other chemical compositions are presented in <xref ref-type="table" rid="T2">Table 2</xref>). Where the <italic>x</italic>-axis is the true value, the <italic>y</italic>-axis is the model prediction, and the black diagonal line is the best fit line. It can be seen that of the three constituent predictions listed, compared to the other two models, the training and test sets of the PLS model have a relatively high and high agreement between the prediction results and the ground truth, which is closer to the black diagonal line. This indicates excellent fitting and prediction performance with high accuracy. In addition, it can be seen that under the three models, the difference in <italic>R</italic>
<sup>2</sup> and RMSE between the test set and the training set is not large, indicating that the models are not overfitted. For the PLS model in the vast majority of the above discussion shows that the PLS model used can effectively characterize the complex relationship between DTG curves and the chemical composition of tobacco materials. The PLS model with higher accuracy can be further used to investigate the relationship between the content of other components and the pyrolysis process.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>The prediction results of each model.</p>
</caption>
<graphic xlink:href="fchem-12-1353745-g005.tif"/>
</fig>
</sec>
<sec id="s3-3">
<title>3.3 Characteristic temperature interval analysis</title>
<p>PLS is a supervised multivariate statistical analysis method that combines the variability information of the dependent variable to extract latent variables. It can describe the degree and direction of the dependence of each latent variable on the original variables. The VIP (Variable importance in the projection) scores can be obtained to evaluate the contribution and explanatory ability of each variable to the model by calculating the variable coefficients, model weights and residuals of PLS regression. A higher VIP score indicates a more important variable that has a greater impact on the overall model. Given the good performance of the PLS regression model, calculating the VIP scores of the PLS regression model for the DTG of tobacco materials and the content of chemical components can help identify and select temperature points that contribute to the prediction of chemical component content significantly. This allows for the identification of temperature ranges that influence the chemical component content during pyrolysis. Generally, a VIP score greater than 1 indicates that it has important influence on the model.</p>
<p>The VIP score plot of the temperature points for chemical components is shown in <xref ref-type="fig" rid="F6">Figure 6</xref>, which shows the VIP score plots for total alkaloids, reducing sugars and total nitrogen. The <italic>x</italic>-axis represents the temperature points, starting from 105&#xb0;C. From the figure, it can be observed that the VIP scores of the temperature points for total alkaloids greater than 1 appeared within the range of 135&#xb0;C&#x2013;263&#xb0;C and 332&#xb0;C&#x2013;385&#xb0;C, and four distinct peaks appeared in the range of 135&#xb0;C&#x2013;263&#xb0;C. The VIP scores of the temperature points for total nitrogen greater than 1 appeared within the range of 153&#xb0;C&#x2013;246&#xb0;C and 260&#xb0;C&#x2013;399&#xb0;C. The VIP scores of the temperature points for reducing sugars greater than 1 appeared within the range of 150&#xb0;C&#x2013;390&#xb0;C, 510&#xb0;C&#x2013;520&#xb0;C, and 688&#xb0;C&#x2013;701&#xb0;C, and the peaks occurred near the temperature points of 135&#xb0;C, 215&#xb0;C, 345&#xb0;C, respectively. It indicates that the pyrolysis rate in these temperature ranges has a significant impact on the regression model for the corresponding component content, especially near the peaks.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>VIP score map of temperature points in components regression model.</p>
</caption>
<graphic xlink:href="fchem-12-1353745-g006.tif"/>
</fig>
<p>The pyrolysis temperature ranges of VIP scores greater than 1 for the 19 chemical components are shown in <xref ref-type="table" rid="T3">Table 3</xref>. It indicates that different temperature ranges have significant influences on the regression of different components of tobacco materials. The temperature ranges of VIP scores greater than 1 for chemical components are mostly within the range of 130&#xb0;C&#x2013;400&#xb0;C, and a few of them are above 400&#xb0;C. Moreover, the temperature ranges above 400&#xb0;C are relatively short, indicating that the temperature ranges that have the most impact on the regression of chemical components of tobacco materials are mostly below 400&#xb0;C. Combined with the samples DTG curve in <xref ref-type="fig" rid="F2">Figure 2</xref>, it can be seen that the differences in the DTG curves between samples are mostly reflected within the first 400&#xb0;C. This means that the DTG curves before 400&#xb0;C contain the main information, and the PLS regression model effectively captures the characteristics of the DTG curves.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>19 components correspond to temperature ranges with a VIP score greater than 1.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Chemical components</th>
<th align="center">Temperature range/&#xb0;C</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Total phytoaloids</td>
<td align="center">150-390,510-520,688-701</td>
</tr>
<tr>
<td align="center">Reducing sugar</td>
<td align="center">153-246,260-399</td>
</tr>
<tr>
<td align="center">Total sugar</td>
<td align="center">149-235,248-369</td>
</tr>
<tr>
<td align="center">Total nitrogen</td>
<td align="center">135-263,332-385</td>
</tr>
<tr>
<td align="center">Potassium</td>
<td align="center">193-281,339-382</td>
</tr>
<tr>
<td align="center">Chlorine</td>
<td align="center">135-252,238-371</td>
</tr>
<tr>
<td align="center">pH</td>
<td align="center">168-367,675-708</td>
</tr>
<tr>
<td align="center">Starch</td>
<td align="center">175-235,290-372</td>
</tr>
<tr>
<td align="center">Dichloromethane extract</td>
<td align="center">162&#x2013;395</td>
</tr>
<tr>
<td align="center">Solanesol</td>
<td align="center">181-244,261-390</td>
</tr>
<tr>
<td align="center">Sulfate</td>
<td align="center">131-182,190-233,240-374,750-809</td>
</tr>
<tr>
<td align="center">Phosphate</td>
<td align="center">177-263,390-530</td>
</tr>
<tr>
<td align="center">Magnesium</td>
<td align="center">131&#x2013;530</td>
</tr>
<tr>
<td align="center">Calcium</td>
<td align="center">150-275,305-373,675-710</td>
</tr>
<tr>
<td align="center">Polyphenols</td>
<td align="center">129-387,747-772</td>
</tr>
<tr>
<td align="center">Refractory acid</td>
<td align="center">150-276,300-370,375-495,683-712</td>
</tr>
<tr>
<td align="center">Amino acids</td>
<td align="center">132-234,258-295,325-370</td>
</tr>
<tr>
<td align="center">Amadori compounds</td>
<td align="center">130-229,250-342,506-512</td>
</tr>
<tr>
<td align="center">Neophytadiene</td>
<td align="center">231-284,300-374</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To validate the effectiveness of the selected temperature ranges, the characteristic temperature ranges were used as independent variables to establish PLS models for total alkaloids, reducing sugars and total nitrogen. The results are shown in <xref ref-type="table" rid="T4">Table 4</xref>. Compared with the full temperature data as input, the regression performance of the models slightly decreased after the feature temperature range selection. The <italic>R</italic>
<sup>2</sup> reduction of the training set and test set of three chemical components are within 0.1. For total alkaloids, the difference in <italic>R</italic>
<sup>2</sup> between the test set and training set decreased after feature selection, which may be due to overfitting in the original model. For reducing sugars and total nitrogen, the difference in <italic>R</italic>
<sup>2</sup> between the test set and training set increased after feature selection, indicating that valuable information in the independent variables was lost during the selection of the feature temperature ranges. Overall, the performance of the PLS models after feature temperature range selection did not change significantly compared with the original models, suggesting that the selected feature temperature ranges contain the main information of samples DTG curves.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Characteristic temperature interval regression performance.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="center">Components</th>
<th rowspan="2" align="center">Characteristic temperature range/&#xb0;C</th>
<th colspan="4" align="center">Training set</th>
<th colspan="4" align="center">Test set</th>
</tr>
<tr>
<th align="center">Before filtering <italic>R</italic>
<sup>2</sup>
</th>
<th align="center">After filtering <italic>R</italic>
<sup>2</sup>
</th>
<th align="center">Before filtering RMSE</th>
<th align="center">After filtering RMSE</th>
<th align="center">Before filtering <italic>R</italic>
<sup>2</sup>
</th>
<th align="center">After filtering <italic>R</italic>
<sup>2</sup>
</th>
<th align="center">Before filtering RMSE</th>
<th align="center">After filtering RMSE</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Total phytoaloids %</td>
<td align="center">150-390,510-520,688-701</td>
<td align="center">0.97</td>
<td align="center">0.89</td>
<td align="center">0.098</td>
<td align="center">0.148</td>
<td align="center">0.870</td>
<td align="center">0.82</td>
<td align="center">0.299</td>
<td align="center">0.362</td>
</tr>
<tr>
<td align="center">Reducing sugar %</td>
<td align="center">153-246,260-399</td>
<td align="center">0.95</td>
<td align="center">0.90</td>
<td align="center">0.839</td>
<td align="center">1.373</td>
<td align="center">0.93</td>
<td align="center">0.81</td>
<td align="center">2.036</td>
<td align="center">2.997</td>
</tr>
<tr>
<td align="center">Total nitrogen %</td>
<td align="center">135-263,332-385</td>
<td align="center">0.90</td>
<td align="center">0.84</td>
<td align="center">0.090</td>
<td align="center">0.159</td>
<td align="center">0.88</td>
<td align="center">0.79</td>
<td align="center">0.136</td>
<td align="center">0.207</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>It is generally believed that temperature below 400&#xb0;C is the main pyrolysis temperature range of monosaccharides, oligosaccharides, small organic acids, other heat-unstable, volatile components, as well as cellulose (<xref ref-type="bibr" rid="B17">Strandberg et al., 2017</xref>). By correlating the characteristic temperature ranges of different chemical components with the pyrolysis reaction processes of tobacco materials, it is possible to reveal the potential synergistic, coupling and catalytic effects that may exist among various components during the pyrolysis process of tobacco materials. Furthermore, the chemical component content in tobacco materials is related to sensory quality. For example, reducing sugars and total sugars are significantly positively correlated with comfort, while total alkaloids and total nitrogen are significantly negatively correlated with sensory indicators. By selecting the pyrolysis characteristic temperature ranges of different component regression models, the pyrolysis parameters within these ranges can be used as references for the selection of tobacco materials in cigarette formulation.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s4">
<title>4 Conclusion</title>
<p>In this study, 157 kinds of tobacco materials from different growing regions, years and grades were used as samples, and the machine learning model database was constructed through experiments on the thermogravimetric analysis and chemical analysis methods of the samples. Using the differential thermogravimetric curve as the independent variable and the chemical composition content as the dependent variable, quantitative analysis and prediction models were built using different machine learning methods to predict the relationship between the heat loss rate of the differential thermogravimetric curve and the chemical composition content. The regression performance of the different machine learning models was compared, and the temperature ranges with significant effects on the chemical component content were screened based on the VIP scores of the independent variables of the best performing PLS regression model. The results show that 1) the PLS, SVR and GPR models have relatively good regression performance on DTG data and chemical component contents for the three representative chemical components tested. 2) For the prediction of 19 chemical components, the PLS model showed the best fitting, prediction and generalization ability. In addition, the <italic>R</italic>
<sup>2</sup> values of the PLS model for most of the components were above 0.85, and the mean square errors were small. 3) The temperature range that has a large influence on most components of tobacco materials is from 130&#xb0;C to 400&#xb0;C, and the characteristic temperature ranges of different chemical components are different.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The raw data supporting the conclusion of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>ZW: Conceptualization, Data curation, Formal Analysis, Investigation, Validation, Writing&#x2013;original draft. LW: Conceptualization, Methodology, Project administration, Resources, Supervision, Writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s7">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. Zhengzhou Tobacco Research Institute of CNTC Young Talent Lifting Project (252020CR0260).</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>Authors HY and XZ were employed by China Tobacco Shandong Industrial Co., Ltd., Author ZG was employed by China Tobacco Guangdong Industrial Co., Ltd., Author YL was employed by China Tobacco Shandong Industrial Co., Ltd.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Balsora</surname>
<given-names>H. K.</given-names>
</name>
<name>
<surname>S</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Dua</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Joshi</surname>
<given-names>J. B.</given-names>
</name>
<name>
<surname>Kataria</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Sharma</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Machine learning approach for the prediction of biomass pyrolysis kinetics from preliminary analysis</article-title>. <source>J. Environ. Chem. Eng.</source> <volume>10</volume>, <fpage>108025</fpage>. <pub-id pub-id-type="doi">10.1016/j.jece.2022.108025</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barontini</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Tugnoli</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Cozzani</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Tetteh</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jarriault</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zinovik</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Volatile products formed in the thermal decomposition of a tobacco substrate</article-title>. <source>Ind. Eng. Chem. Res.</source> <volume>52</volume>, <fpage>14984</fpage>&#x2013;<lpage>14997</lpage>. <pub-id pub-id-type="doi">10.1021/ie401826u</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bi</surname>
<given-names>Y.-M.</given-names>
</name>
<name>
<surname>Chu</surname>
<given-names>G.-H.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>J.-Z.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>K.-L.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liao</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Ensemble partial least squares algorithm based on variable clustering for quantitative infrared spectrometric analysis</article-title>. <source>Chin. J. Anal. Chem.</source> <volume>43</volume>, <fpage>1086</fpage>&#x2013;<lpage>1091</lpage>. <pub-id pub-id-type="doi">10.1016/S1872-2040(15)60842-8</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>The applicability of different tobacco types to heated tobacco products</article-title>. <source>Industrial Crops Prod.</source> <volume>168</volume>, <fpage>113579</fpage>. <pub-id pub-id-type="doi">10.1016/j.indcrop.2021.113579</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Danieli</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Betina</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ademir</surname>
<given-names>J. Z.</given-names>
</name>
<name>
<surname>Heitor</surname>
<given-names>L. O.</given-names>
</name>
<name>
<surname>Francisco</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Andr&#xe9;</surname>
<given-names>L. C.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>The use of the artificial neural network (ANN) for modeling of thermogravimetric curves of tobacco stalk waste exposed to alkaline treatment</article-title>. <source>J. Nat. Fibers</source> <volume>19</volume> (<issue>15</issue>), <fpage>12119</fpage>&#x2013;<lpage>12128</lpage>. <pub-id pub-id-type="doi">10.1080/15440478.2022.2051670</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dobbelaere</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Plehiers</surname>
<given-names>P. P.</given-names>
</name>
<name>
<surname>Van De Vijver</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Stevens</surname>
<given-names>C. V.</given-names>
</name>
<name>
<surname>Van Geem</surname>
<given-names>K. M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Machine learning in chemical engineering: strengths, weaknesses, opportunities, and threats</article-title>. <source>Engineering</source> <volume>7</volume>, <fpage>1201</fpage>&#x2013;<lpage>1211</lpage>. <pub-id pub-id-type="doi">10.1016/j.eng.2021.03.019</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Duan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Min</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Xiong</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Determination of 27 chemical constituents in Chinese southwest tobacco by FT-NIR spectroscopy</article-title>. <source>Industrial Crops Prod.</source> <volume>40</volume>, <fpage>21</fpage>&#x2013;<lpage>26</lpage>. <pub-id pub-id-type="doi">10.1016/j.indcrop.2012.02.040</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Comparative investigation on thermal degradation of flue-cured tobacco with different particle sizes by a macro-thermogravimetric analyzer and their apparent kinetics based on distributed activation energy model</article-title>. <source>J. Therm. Anal. Calorim.</source> <volume>138</volume>, <fpage>3375</fpage>&#x2013;<lpage>3388</lpage>. <pub-id pub-id-type="doi">10.1007/s10973-019-08215-7</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Kong</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Tobacco fractionation and its effects on pyrolysis chemistry</article-title>. <source>J. Anal. Appl. Pyrolysis</source> <volume>167</volume>, <fpage>105650</fpage>. <pub-id pub-id-type="doi">10.1016/j.jaap.2022.105650</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Heng</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Le</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Jian</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Hong</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Qi</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Macro thermogravimetric analysis and its application in identifying pyrolysis characteristics of different tobacco</article-title>. <source>Tob. Sci. Technol.</source> <volume>55</volume>, <fpage>58</fpage>&#x2013;<lpage>69</lpage>. <pub-id pub-id-type="doi">10.16135/j.issn1002-0861.2021.0732</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jordan</surname>
<given-names>M. I.</given-names>
</name>
<name>
<surname>Mitchell</surname>
<given-names>T. M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Machine learning: trends, perspectives, and prospects</article-title>. <source>Science</source> <volume>349</volume>, <fpage>255</fpage>&#x2013;<lpage>260</lpage>. <pub-id pub-id-type="doi">10.1126/science.aaa8415</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kr&#xfc;semann</surname>
<given-names>E. J. Z.</given-names>
</name>
<name>
<surname>Lasschuijt</surname>
<given-names>M. P.</given-names>
</name>
<name>
<surname>de Graaf</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>de Wijk</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Punter</surname>
<given-names>P. H.</given-names>
</name>
<name>
<surname>van Tiel</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Sensory analysis of characterising flavours: evaluating tobacco product odours using an expert panel</article-title>. <source>Tob. Control</source> <volume>28</volume>, <fpage>152</fpage>&#x2013;<lpage>160</lpage>. <pub-id pub-id-type="doi">10.1136/tobaccocontrol-2017-054152</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kurt</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Impacts of environmental variations on quality and chemical contents of oriental tobacco</article-title>. <source>Beitrage zur Tabakforschung International/Contributions Tob. Res.</source> <volume>30</volume>, <fpage>50</fpage>&#x2013;<lpage>62</lpage>. <pub-id pub-id-type="doi">10.2478/cttr-2021-0006</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Losso</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Cardini</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Huber</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kappacher</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Jakschitz</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Rainer</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Rapid differentiation and quality control of tobacco products using direct analysis in real time mass spectrometry and liquid chromatography mass spectrometry</article-title>. <source>Talanta</source> <volume>238</volume>, <fpage>123057</fpage>. <pub-id pub-id-type="doi">10.1016/j.talanta.2021.123057</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Thermogravimetric pyrolysis kinetics study of tobacco stem via multicomponent kinetic modeling, Asym2sig deconvolution and combined kinetics</article-title>. <source>Bioresour. Technol.</source> <volume>360</volume>, <fpage>127539</fpage>. <pub-id pub-id-type="doi">10.1016/j.biortech.2022.127539</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Dai</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Qi</surname>
<given-names>Q.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Quantitative analysis of routine chemical constituents of tobacco based on thermogravimetric analysis</article-title>. <source>ACS Omega</source> <volume>7</volume>. <pub-id pub-id-type="doi">10.21203/rs.3.rs-1258420/v1</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Strandberg</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Holmgren</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Brostr&#xf6;m</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Predicting fuel properties of biomass using thermogravimetry and multivariate data analysis</article-title>. <source>Fuel Process. Technol.</source> <volume>156</volume>, <fpage>107</fpage>&#x2013;<lpage>112</lpage>. <pub-id pub-id-type="doi">10.1016/j.fuproc.2016.10.021</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Tu</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Pyrolysis products from industrial waste biomass based on a neural network model</article-title>. <source>J. Anal. Appl. Pyrolysis</source> <volume>120</volume>, <fpage>94</fpage>&#x2013;<lpage>102</lpage>. <pub-id pub-id-type="doi">10.1016/j.jaap.2016.04.013</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Climatic factors determine the yield and quality of Honghe flue-cured tobacco</article-title>. <source>Sci. Rep.</source> <volume>10</volume>, <fpage>19868</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-020-76919-0</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thielen</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Klus</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>M&#xfc;ller</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Tobacco smoke: unraveling a controversial subject</article-title>. <source>Exp. Toxicol. Pathology</source> <volume>60</volume>, <fpage>141</fpage>&#x2013;<lpage>156</lpage>. <pub-id pub-id-type="doi">10.1016/j.etp.2008.01.014</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wei</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Bin</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Kang</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>On-line monitoring of the tobacco leaf composition during flue-curing by near-infrared spectroscopy and deep transfer learning</article-title>. <source>Anal. Lett.</source> <volume>55</volume>, <fpage>2089</fpage>&#x2013;<lpage>2107</lpage>. <pub-id pub-id-type="doi">10.1080/00032719.2022.2046021</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Xia</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2009</year>). &#x201c;<article-title>Evolutionary algorithms for optimization of tobacco leaf groups blending</article-title>,&#x201d; in <conf-name>2009 10th ACIS International Conference on Software Engineering, Artificial Intelligences, Networking and Parallel/Distributed Computing</conf-name>, <conf-loc>Daegu, Korea (South)</conf-loc>, <conf-date>May, 2009</conf-date>, <fpage>144</fpage>&#x2013;<lpage>148</lpage>.</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yin</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Auto-classification of biomass through characterization of their pyrolysis behaviors using thermogravimetric analysis with support vector machine algorithm: case study for tobacco</article-title>. <source>Biotechnol. Biofuels</source> <volume>14</volume>, <fpage>106</fpage>. <pub-id pub-id-type="doi">10.1186/s13068-021-01942-w</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Cong</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Jingxiu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Quantitative analysis of routine chemical constituents in tobacco by near-infrared spectroscopy and support vector machine</article-title>. <source>Spectrochimica Acta Part A Mol. Biomol. Spectrosc.</source> <volume>71</volume>, <fpage>1408</fpage>&#x2013;<lpage>1413</lpage>. <pub-id pub-id-type="doi">10.1016/j.saa.2008.04.020</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>