<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Med.</journal-id>
<journal-title>Frontiers in Medicine</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Med.</abbrev-journal-title>
<issn pub-type="epub">2296-858X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmed.2022.811890</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Medicine</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Preliminary prediction of semen quality based on modifiable lifestyle factors by using the XGBoost algorithm</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Zhou</surname> <given-names>Mingjuan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1700849/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Yao</surname> <given-names>Tianci</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Li</surname> <given-names>Jian</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Hui</surname> <given-names>Hui</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Fan</surname> <given-names>Weimin</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/491015/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Guan</surname> <given-names>Yunfeng</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="corresp" rid="c003"><sup>&#x0002A;</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Zhang</surname> <given-names>Aijun</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/657018/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Xu</surname> <given-names>Bufang</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/611104/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Obstetrics and Gynecology, Ruijin Hospital, Shanghai Jiao Tong University School of Medicine</institution>, <addr-line>Shanghai</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>Shanghai National Engineering Research Center of Digital Television Co., Ltd.</institution>, <addr-line>Shanghai</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>Clinical Research Center, Ruijin Hospital, Shanghai Jiao Tong University School of Medicine</institution>, <addr-line>Shanghai</addr-line>, <country>China</country></aff>
<aff id="aff4"><sup>4</sup><institution>Cooperative Medianet Innovation Center, Shanghai Jiao Tong University</institution>, <addr-line>Shanghai</addr-line>, <country>China</country></aff>
<aff id="aff5"><sup>5</sup><institution>School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University</institution>, <addr-line>Shanghai</addr-line>, <country>China</country></aff>
<aff id="aff6"><sup>6</sup><institution>Department of Histo-Embryology, Genetics and Developmental Biology, Shanghai Key Laboratory of Reproductive Medicine, Shanghai Jiao Tong University School of Medicine</institution>, <addr-line>Shanghai</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Stefano Cianci, University of Messina, Italy</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Mohd Faizal Ahmad, National University of Malaysia, Malaysia; Yuewei Liu, Sun Yat-sen University, China; Geng An, Guangzhou Medical University, China</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Bufang Xu <email>bufangxu&#x00040;163.com</email></corresp>
<corresp id="c002">Aijun Zhang <email>zhaj1268&#x00040;163.com</email></corresp>
<corresp id="c003">Yunfeng Guan <email>yfguan69&#x00040;sjtu.edu.cn</email></corresp>
<fn fn-type="other" id="fn001"><p>This article was submitted to Obstetrics and Gynecological Surgery, a section of the journal Frontiers in Medicine</p></fn>
<fn fn-type="equal" id="fn002"><p>&#x02020;These authors have contributed equally to this work and share first authorship</p></fn></author-notes>
<pub-date pub-type="epub">
<day>13</day>
<month>09</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>9</volume>
<elocation-id>811890</elocation-id>
<history>
<date date-type="received">
<day>09</day>
<month>11</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>01</day>
<month>08</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2022 Zhou, Yao, Li, Hui, Fan, Guan, Zhang and Xu.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Zhou, Yao, Li, Hui, Fan, Guan, Zhang and Xu</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Semen quality has decreased gradually in recent years, and lifestyle changes are among the primary causes for this issue. Thus far, the specific lifestyle factors affecting semen quality remain to be elucidated.</p>
</sec>
<sec>
<title>Materials and methods</title>
<p>In this study, data on the following factors were collected from 5,109 men examined at our reproductive medicine center: 10 lifestyle factors that potentially affect semen quality (smoking status, alcohol consumption, staying up late, sleeplessness, consumption of pungent food, intensity of sports activity, sedentary lifestyle, working in hot conditions, sauna use in the last 3 months, and exposure to radioactivity); general factors including age, abstinence period, and season of semen examination; and comprehensive semen parameters [semen volume, sperm concentration, progressive and total sperm motility, sperm morphology, and DNA fragmentation index (DFI)]. Then, machine learning with the XGBoost algorithm was applied to establish a primary prediction model by using the collected data. Furthermore, the accuracy of the model was verified <italic>via</italic> multiple logistic regression following <italic>k</italic>-fold cross-validation analyses.</p>
</sec>
<sec>
<title>Results</title>
<p>The results indicated that for semen volume, sperm concentration, progressive and total sperm motility, and DFI, the area under the curve (AUC) values ranged from 0.648 to 0.697, while the AUC for sperm morphology was only 0.506. Among the 13 factors, smoking status was the major factor affecting semen volume, sperm concentration, and progressive and total sperm motility. Age was the most important factor affecting DFI. Logistic combined with cross-validation analysis revealed similar results. Furthermore, it showed that heavy smoking (&#x0003E;20 cigarettes/day) had an overall negative effect on semen volume and sperm concentration and progressive and total sperm motility (OR = 4.69, 6.97, 11.16, and 10.35, respectively), while age of &#x0003E;35 years was associated with increased DFI (OR = 5.47).</p>
</sec>
<sec>
<title>Conclusion</title>
<p>The preliminary lifestyle-based model developed for semen quality prediction by using the XGBoost algorithm showed potential for clinical application and further optimization with larger training datasets.</p>
</sec></abstract>
<kwd-group>
<kwd>lifestyles</kwd>
<kwd>semen quality</kwd>
<kwd>artificial intelligence</kwd>
<kwd>machine learning</kwd>
<kwd>extreme gradient boosting (XGBoost)</kwd>
</kwd-group>
<contract-num rid="cn001">82071712</contract-num>
<contract-num rid="cn001">81771656</contract-num>
<contract-num rid="cn001">81873857</contract-num>
<contract-num rid="cn001">82071596</contract-num>
<contract-sponsor id="cn001">National Natural Science Foundation of China<named-content content-type="fundref-id">10.13039/501100001809</named-content></contract-sponsor>
<counts>
<fig-count count="8"/>
<table-count count="2"/>
<equation-count count="0"/>
<ref-count count="48"/>
<page-count count="15"/>
<word-count count="7028"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>Introduction</title>
<p>Semen quality is an important determinant of male fertility (<xref ref-type="bibr" rid="B1">1</xref>, <xref ref-type="bibr" rid="B2">2</xref>). In recent years, the semen quality has decreased, and this adverse trend has aroused widespread concern (<xref ref-type="bibr" rid="B3">3</xref>, <xref ref-type="bibr" rid="B4">4</xref>). Many factors have been reported to affect semen quality, including demographic characteristics such as age and body mass; diseases such as endocrine or genetic problems, prostate disorders, seminal tract obstruction, and oncological diseases; environmental factors such as temperature changes, pollution, and electromagnetic radiation; and lifestyle factors such as smoking, alcohol intake, and staying up late (<xref ref-type="bibr" rid="B1">1</xref>, <xref ref-type="bibr" rid="B5">5</xref>&#x02013;<xref ref-type="bibr" rid="B10">10</xref>). Extensive research has indicated that unhealthy lifestyles are among the most important factors accounting for male reproductive disorders and decreased semen quality (<xref ref-type="bibr" rid="B6">6</xref>, <xref ref-type="bibr" rid="B11">11</xref>). However, the specific lifestyle factors affecting semen quality remain to be elucidated. Furthermore, undertaking the relevant research required for this purpose is difficult because of lifestyle complexity (characterized by factors such as frequent changes or the involvement of various characteristics and confounding variables).</p>
<p>Machine learning, a branch of artificial intelligence (AI), is suitable for dealing with flexible relationships among predictor variables and outcomes in large datasets (<xref ref-type="bibr" rid="B12">12</xref>). The application of machine learning in multiple fields of medicine could help develop disease prediction models (<xref ref-type="bibr" rid="B13">13</xref>&#x02013;<xref ref-type="bibr" rid="B15">15</xref>), and many studies have applied this approach to the analysis of semen parameters, such as morphology (<xref ref-type="bibr" rid="B16">16</xref>). However, there are few studies involving the application of AI in the prediction of the impact of lifestyles on semen quality. To our knowledge, thus far, only 2 small-sample (<italic>n</italic> = 100) studies have revealed the effects of lifestyle variations on semen parameters (<xref ref-type="bibr" rid="B17">17</xref>, <xref ref-type="bibr" rid="B18">18</xref>). Furthermore, the volunteers recruited in these studies were young (age between 18 and 36 years) and the semen quality parameters included were limited or ambiguous. Therefore, further comprehensive and extensive research is warranted. Hence, in this study, XGBoost, a decision-tree based machine learning algorithm, was applied to analyze the association between the semen quality characteristics and the lifestyles associated by using data collected from 5,109 men examined in our reproductive medical center, so as to develop a preliminary model for semen quality prediction. Furthermore, the accuracy of the model was verified <italic>via</italic> multiple logistic regression analyses to determine the value of further study.</p>
</sec>
<sec sec-type="materials and methods" id="s2">
<title>Materials and methods</title>
<sec>
<title>Study design</title>
<p>This study was approved by the Ethics Committee of Ruijin Hospital, School of Medicine, Shanghai Jiao Tong University (No. 2019-185), and all participants recruited signed informed consent forms. As shown in the patient recruitment flowchart (<xref ref-type="fig" rid="F1">Figure 1</xref>), from October 2019 to September 2021, 6,951 men examined in our center were recruited. Participants with a BMI &#x0003C; 32 and without chromosome abnormalities were included. The exclusion criteria were as follows: prostatic inflammation and organic injury, seminal tract obstruction, cancer, hypospadias, low testosterone levels, varicocele, mumps, cryptorchidism, diabetes, microdeletion of the Y chromosome azoospermia factor (AZF), hyperlipidemia, hypertension, and sexually transmitted diseases. Ultimately, 6,388 men were included after dropping 563 patients who met the exclusion criteria. According to the different evaluations intended, the routine seminal assay including semen volume, sperm concentration, and sperm motility, was performed for all participants, while sperm morphology tests and DNA fragmentation index (DFI) examination was performed in 3,018 and 2,209 participants, respectively. Each participant completed a baseline questionnaire before the semen analysis, and cases with missing questionnaire responses were excluded. Thus, the final dataset included 5,109 men whose semen volume, sperm concentration, and sperm motility were analyzed. Furthermore, sperm morphology and DFI analyses were performed in 2,511 and 1,812 participants, respectively.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Flow chart of the study population. A total of 5,109 males were included in this study; all of these participants underwent a routine seminal assay while some also underwent sperm morphology and DNA Fragmentation Index assay according to the different inspection purposes.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmed-09-811890-g0001.tif"/>
</fig>
</sec>
<sec>
<title>Questionnaire variables</title>
<p>The questionnaire comprised 13 items including 10 pertaining to habitual lifestyles and three general conditions including age, abstinence period, and date of questionnaire completion; the details are listed in <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 1</xref>. Sleeplessness and intensity of sports activity were analyzed using the Insomnia Severity Index (<xref ref-type="supplementary-material" rid="SM2">Supplementary Table 2</xref>) and modified Physical Activity Questionnaire (<xref ref-type="supplementary-material" rid="SM3">Supplementary Table 3</xref>) (<xref ref-type="bibr" rid="B19">19</xref>), respectively.</p>
</sec>
<sec>
<title>Assessment of semen quality</title>
<p>Semen samples were collected in sterile plastic container by asking the participants to masturbate. The participants were asked to void urine and wash their hands and external genitalia before masturbating to provide the sample. The sample collected was placed in a water bath maintained at 37&#x000B0;C for 30&#x02013;60 min for liquefaction. Semen volume was measured by weighing, assuming a semen density of 1.0 g/ml. Sperm concentration (spermatozoa N/mL) and motility (%) were evaluated using a computer-aided sperm analysis system. DFI was determined by flow cytometry after staining with acridine orange, and sperm morphology was investigated using the Diff-Quick staining method. Reference values from the World Health Organization semen analysis manual were used to assess semen characteristics (<xref ref-type="bibr" rid="B20">20</xref>), and values below the lower threshold provided in the WHO manual were defined as abnormal. Besides, the threshold of DFI 30% was applied to classify normal (DFI &#x0003C; 30%) or abnormal (DFI &#x02265; 30%) groups according to a previously published article (<xref ref-type="bibr" rid="B21">21</xref>).</p>
</sec>
<sec>
<title>AI and machine learning</title>
<p>The algorithm used in this study was extreme gradient boosting (XGBoost). The feature importance was calculated by the gain method from the XGBoost python library, which worked by averaging training loss reduction caused by feature utilization for each splitting. The input variables were the information collected from the questionnaire of each patient, and the output variables were the semen quality parameters. The input variables were considered categorical variables (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 1</xref>), and the output variables were considered dichotomous variables according to the criterion described above. The six semen quality parameters were independent indicators; the XGBoost model was developed using different hyperparameters, separately, to improve the accuracy of the algorithm.</p>
<p>Cross-validation was performed to adjust the parameters. First, a relatively high &#x0201C;learning_rate&#x0201D; was used and the optimum &#x0201C;n_estimators&#x0201D; was selected for this &#x0201C;learning_rate&#x0201D;. Secondly, the parameters &#x0201C;max_depth&#x0201D; and &#x0201C;min_child_weight&#x0201D; were adjusted for the selected &#x0201C;learning_rate&#x0201D; and &#x0201C;n_estimators.&#x0201D; Owing to the unbalanced category of the dataset, the training dataset was oversampled, and the &#x0201C;scale_pos_weight&#x0201D; was always equal to 1. Then, the learning rate was reduced. Next, &#x0201C;max_depth&#x0201D; was adjusted to simplify the XGBoost model according to the results obtained for the test dataset. The clean dataset used for XGBoost was randomly split into training and test datasets in a ratio of 70:30. Hyperparameter details are described in <xref ref-type="table" rid="T1">Table 1</xref>. Lastly, <italic>k</italic>-fold cross-validation with <italic>k</italic> = 10 was performed to evaluate machine learning models.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Distribution of male participants whose data were used for machine learning and the hyperparameters for XGBoost.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Sperm quality parameters</bold></th>
<th valign="top" align="center"><bold>Total no</bold>.</th>
<th valign="top" align="center"><bold>Train set no</bold>.</th>
<th valign="top" align="center"><bold>Test set no</bold>.</th>
<th valign="top" align="center"><bold>Learning rate</bold></th>
<th valign="top" align="center"><bold><italic>N</italic> estimators</bold></th>
<th valign="top" align="center"><bold>Max depth</bold></th>
<th valign="top" align="center"><bold>Min_child_weight</bold></th>
<th valign="top" align="center"><bold>Scale_pos_weight</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Semen volume</td>
<td valign="top" align="center">5,109</td>
<td valign="top" align="center">3,576</td>
<td valign="top" align="center">1,533</td>
<td valign="top" align="center">0.01</td>
<td valign="top" align="center">600</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1</td>
</tr>
<tr>
<td valign="top" align="left">Sperm concentration</td>
<td valign="top" align="center">5,109</td>
<td valign="top" align="center">3,576</td>
<td valign="top" align="center">1,533</td>
<td valign="top" align="center">0.01</td>
<td valign="top" align="center">750</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1</td>
</tr>
<tr>
<td valign="top" align="left">Progressive motility</td>
<td valign="top" align="center">5,109</td>
<td valign="top" align="center">3,576</td>
<td valign="top" align="center">1,533</td>
<td valign="top" align="center">0.01</td>
<td valign="top" align="center">600</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1</td>
</tr>
<tr>
<td valign="top" align="left">Total motility</td>
<td valign="top" align="center">5,109</td>
<td valign="top" align="center">3,576</td>
<td valign="top" align="center">1,533</td>
<td valign="top" align="center">0.01</td>
<td valign="top" align="center">600</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1</td>
</tr>
<tr>
<td valign="top" align="left">Sperm morphology</td>
<td valign="top" align="center">2,511</td>
<td valign="top" align="center">1,758</td>
<td valign="top" align="center">754</td>
<td valign="top" align="center">0.01</td>
<td valign="top" align="center">300</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1</td>
</tr>
<tr>
<td valign="top" align="left">DFI</td>
<td valign="top" align="center">1,812</td>
<td valign="top" align="center">1,268</td>
<td valign="top" align="center">544</td>
<td valign="top" align="center">0.01</td>
<td valign="top" align="center">300</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec>
<title>Statistical analysis and logistic regression analysis</title>
<p>Descriptive statistics were used to summarize general demographics. The correlations among 13 questionnaire items were evaluated by Pearson&#x00027;s correlation coefficients. For continuous variables, data are expressed as mean &#x000B1; SD for normally distributed data or median (Interquartile range, IQR) values for non-parametric data. For categorical variables, data are expressed as percentages.</p>
<p>Univariate and multivariable logistic regression was used to identify the factors related to semen quality. For each independent variable, odds ratios (ORs) and 95% confidence intervals (CIs) were estimated. Collinearity analyses were performed before the logistic regression analysis, and the model&#x00027;s goodness-of-fit was graphically evaluated (ROC curves). The response variables were categorized per the method used for the XGBoost algorithm, and stepwise regression was applied for all multivariate logistic regression analyses. Moreover, <italic>k</italic>-fold cross-validation with <italic>k</italic> = 10 was performed to evaluate the accuracy of the model.</p>
<p>The univariate and multivariable logistic regression analyses were performed with SAS version 9.4 (SAS Institute, Cary, NC, USA), and k-fold cross-validation was performed with <italic>k</italic> = 10 by using the package for R (version 4.1.2). Other statistical analyses were performed using SPSS 23.0. <italic>P</italic> &#x0003C; 0.05 was considered statistically significant.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<sec>
<title>General information collected using the questionnaire</title>
<p><xref ref-type="fig" rid="F2">Figure 2</xref> shows the proportions of participants corresponding to the subgroups for the following questionnaire items: (1) season of semen examination; (2) age; (3) abstinence period; (4) smoking status; (5) alcohol consumption; (6) staying up late; (7) sleeplessness; (8) consumption of pungent food; (9) intensity of sports activity; (10) sedentary lifestyle; (11) work in hot conditions; (12) sauna use in the last 3 months; and (13) exposure to radioactivity.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Information regarding the general and lifestyle characteristics of study participants.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmed-09-811890-g0002.tif"/>
</fig>
</sec>
<sec>
<title>Semen quality among study participants</title>
<p>Among the 5,109 males, the median semen volume, sperm concentration, total sperm count, rapid progressive motility of the sperm, progressive motility of the sperm, and total sperm motility were 3.3 ml (95% CI: 3.40&#x02013;3.49 ml), 68.1 &#x000D7; 10<sup>6</sup>/ml (95% CI: 79.26&#x02013;82.84 &#x000D7; 10<sup>6</sup>/ml), 214.5 &#x000D7; 10<sup>6</sup> (95% CI: 262.28&#x02013;275.23 &#x000D7; 10<sup>6</sup>), 23.0% (95% CI: 22.76&#x02013;23.50%), 47.8% (95% CI: 45.50&#x02013;46.74%), and 60.4% (95% CI: 56.46&#x02013;57.85%), respectively. The median normal sperm morphology among 2,511 men was 6.0% (95% CI: 6.24&#x02013;6.51%), and the median DFI of 1,915 men was 14.4% (95% CI: 17.29&#x02013;18.39%). In addition, 18.2% of the participants showed abnormal sperm morphology (morphologically normal forms, &#x0003C;4.0%, <italic>n</italic> = 2,511) and 13.9% had high DFI (&#x02265;30%, <italic>n</italic> = 1,812).</p>
</sec>
<sec>
<title>Risk factors affecting semen volume</title>
<p>We trained XGBoost with the input of the 13 items and achieved 60.7&#x02013;70.3% accuracy, 55.4&#x02013;72.5% sensitivity, and 39.9&#x02013;70.4% specificity for the test set (<xref ref-type="table" rid="T2">Table 2</xref>).</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Outcomes of machine learning using XGboost.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Sperm quality parameters</bold></th>
<th valign="top" align="center"><bold>Classification accuracy</bold></th>
<th valign="top" align="center"><bold>True negative</bold></th>
<th valign="top" align="center"><bold>False positive</bold></th>
<th valign="top" align="center"><bold>False negative</bold></th>
<th valign="top" align="center"><bold>True positive</bold></th>
<th valign="top" align="center"><bold>Sensitivity</bold></th>
<th valign="top" align="center"><bold>Specificity</bold></th>
<th valign="top" align="center"><bold>Positive predictive value</bold></th>
<th valign="top" align="center"><bold>Negative predictive value</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Semen volume</td>
<td valign="top" align="center">0.7025</td>
<td valign="top" align="center">63</td>
<td valign="top" align="center">71</td>
<td valign="top" align="center">385</td>
<td valign="top" align="center">1,014</td>
<td valign="top" align="center">0.7248</td>
<td valign="top" align="center">0.4701</td>
<td valign="top" align="center">0.9346</td>
<td valign="top" align="center">0.1406</td>
</tr>
<tr>
<td valign="top" align="left">Sperm concentration</td>
<td valign="top" align="center">0.6758</td>
<td valign="top" align="center">94</td>
<td valign="top" align="center">87</td>
<td valign="top" align="center">410</td>
<td valign="top" align="center">942</td>
<td valign="top" align="center">0.6967</td>
<td valign="top" align="center">0.5193</td>
<td valign="top" align="center">0.9155</td>
<td valign="top" align="center">0.1865</td>
</tr>
<tr>
<td valign="top" align="left">Progressive motility</td>
<td valign="top" align="center">0.6282</td>
<td valign="top" align="center">269</td>
<td valign="top" align="center">147</td>
<td valign="top" align="center">423</td>
<td valign="top" align="center">694</td>
<td valign="top" align="center">0.6213</td>
<td valign="top" align="center">0.6466</td>
<td valign="top" align="center">0.8252</td>
<td valign="top" align="center">0.3887</td>
</tr>
<tr>
<td valign="top" align="left">Total motility</td>
<td valign="top" align="center">0.6067</td>
<td valign="top" align="center">218</td>
<td valign="top" align="center">157</td>
<td valign="top" align="center">446</td>
<td valign="top" align="center">712</td>
<td valign="top" align="center">0.6149</td>
<td valign="top" align="center">0.5813</td>
<td valign="top" align="center">0.8193</td>
<td valign="top" align="center">0.3283</td>
</tr>
<tr>
<td valign="top" align="left">DFI</td>
<td valign="top" align="center">0.6838</td>
<td valign="top" align="center">331</td>
<td valign="top" align="center">139</td>
<td valign="top" align="center">33</td>
<td valign="top" align="center">41</td>
<td valign="top" align="center">0.5541</td>
<td valign="top" align="center">0.7043</td>
<td valign="top" align="center">0.2278</td>
<td valign="top" align="center">0.9093</td>
</tr>
<tr>
<td valign="top" align="left">Sperm morphology</td>
<td valign="top" align="center">0.6167</td>
<td valign="top" align="center">55</td>
<td valign="top" align="center">83</td>
<td valign="top" align="center">206</td>
<td valign="top" align="center">410</td>
<td valign="top" align="center">0.6656</td>
<td valign="top" align="center">0.3986</td>
<td valign="top" align="center">0.8316</td>
<td valign="top" align="center">0.2107</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The AUC of the XGBoost model for semen volume was 0.648 (<xref ref-type="fig" rid="F3">Figure 3A</xref>) and the following cross-validation showed that the AUC of the model was 0.617. The feature importance plotted <italic>via</italic> XGBoost showed that the maximum score was for smoking status followed by abstinence period and staying up late (<xref ref-type="fig" rid="F3">Figure 3B</xref>). Logistic regression analyses (<xref ref-type="fig" rid="F3">Figure 3C</xref>) revealed that smoking status, abstinence period, sedentary lifestyle, and age were predictive markers of semen volume. The AUC of the combined markers (AUC = 0.655) was higher than that of the individual markers (AUC = 0.465, 0.563, 0.523, and 0.457, respectively), and the following cross-validation based on the multivariate regression analysis showed that the AUC of the model was 0.539. The maximum odds ratio was related to smoking status (OR = 4.69), indicating it to be the most important predictor (<xref ref-type="supplementary-material" rid="SM4">Supplementary Table 4</xref>). Abstinence period, the second most important factor as revealed by XGBoost, was significantly associated with semen volume in the logistic regression analysis (<xref ref-type="supplementary-material" rid="SM4">Supplementary Table 4</xref>). Besides, as shown in <xref ref-type="fig" rid="F3">Figure 3D</xref>, the OR per the regression analysis indicated that men who smoked more than 20 cigarettes/day were more likely to have a lower semen volume (OR: 4.69, 95% CI: 3.39&#x02013;6.49, <italic>P</italic> &#x0003C; 0.001). However, males who smoked &#x0003C;10 cigarettes/day were less likely to have a lower semen volume (OR: 0.67, 95% CI: 0.48&#x02013;0.93, <italic>P</italic> &#x0003C; 0.05) than non-smokers. Men who practiced abstinence for more than 7 days or had a sedentary lifestyle (&#x02265;5 h/day) were less likely to have a lower semen volume (OR: 0.63, 95% CI: 0.46&#x02013;0.87, <italic>P</italic> &#x0003C; 0.01 and OR: 0.81, 95% CI: 0.65&#x02013;1.00, <italic>P</italic> &#x0003C; 0.05, respectively).</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>XGBoost and logistic regression analysis of the risk factors for semen volume. The ROC curve <bold>(A)</bold> and feature importance <bold>(B)</bold> analyzed by XGBoost and the ROC curve <bold>(C)</bold> and forest diagram showing significant risk factors <bold>(D)</bold> analyzed by logistic regression.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmed-09-811890-g0003.tif"/>
</fig>
</sec>
<sec>
<title>Risk factors affecting sperm concentration</title>
<p>The AUC of the XGBoost model for sperm concentration was 0.661 (<xref ref-type="fig" rid="F4">Figure 4A</xref>), and the cross-validation showed that the AUC of the model was 0.674. The feature importance plotted using XGBoost showed that the maximum important score was for smoking status, followed by age and season of semen examination (<xref ref-type="fig" rid="F4">Figure 4B</xref>). The AUCs of the logistic regression analyses (<xref ref-type="fig" rid="F4">Figure 4C</xref>) revealed that smoking status, age, intensity of sports activity, and consumption of pungent food were predictive markers of sperm concentration. The AUC of the combined marker (AUC = 0.680) was higher than those of individual markers (AUC = 0.457, 0.540, 0.519, and 0.489, respectively), and the cross-validation based on the multivariate regression analysis showed that the AUC of the model was 0.547. The maximum odds ratio was observed for smoking status (OR = 6.97), indicating it is the most important predictor (<xref ref-type="supplementary-material" rid="SM5">Supplementary Table 5</xref>). Age, the second-most important factor revealed by XGBoost, also showed significant association with sperm concentration <italic>via</italic> logistic regression assay (<xref ref-type="supplementary-material" rid="SM5">Supplementary Table 5</xref>). Besides, as shown in <xref ref-type="fig" rid="F4">Figure 4D</xref>, males who smoked more than 20 cigarettes/day were more likely to have lower sperm concentrations than non-smokers (OR: 6.97, 95% CI: 5.18&#x02013;9.37, <italic>P</italic> &#x0003C; 0.001), but smokers were less likely to have lower sperm concentrations than non-smokers when they smoked &#x0003C; 10 cigarettes/day (OR: 0.13, 95% CI: 0.07&#x02013;0.22, <italic>P</italic> &#x0003C; 0.001). Older men (&#x0003E;35 years) were less likely to have lower sperm density (OR: 0.72, 95% CI: 0.57&#x02013;0.91, <italic>P</italic> &#x0003C; 0.01) than younger men (&#x0003C;30 years).</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>XGBoost and logistic regression analysis of the risk factors for sperm concentration. The ROC curve <bold>(A)</bold> and feature importance <bold>(B)</bold> analyzed by XGBoost and the ROC curve <bold>(C)</bold> and forest diagram showing significant risk factors <bold>(D)</bold> analyzed by logistic regression.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmed-09-811890-g0004.tif"/>
</fig>
</sec>
<sec>
<title>Risk factors affecting progressive sperm motility</title>
<p>The AUC of the XGBoost models for progressive sperm motility was 0.697 (<xref ref-type="fig" rid="F5">Figure 5A</xref>), and the cross-validation showed that the AUC of the model was 0.698. The feature importance plotted using XGBoost showed that smoking status was the most important factor, followed by abstinence period and alcohol consumption (<xref ref-type="fig" rid="F5">Figure 5B</xref>). The AUCs of the logistic regression analyses (<xref ref-type="fig" rid="F5">Figure 5C</xref>) revealed that smoking status, abstinence period, alcohol consumption, age, exposure to radioactivity, and working in hot conditions were predictive markers of progressive sperm vitality. The AUC of the combined marker (AUC = 0.705) was slightly higher than that of other markers, and the cross-validation based on the multivariate regression analysis showed that the AUC of the model was 0.696, which was similar to that of XGBoost (AUC = 0.697).</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>XGBoost and logistic regression analysis of the risk factors for progressive sperm motility. The ROC curve <bold>(A)</bold> and the feature importance <bold>(B)</bold> analyzed by XGBoost and the ROC curves <bold>(C)</bold> and forest diagram showing significant risk factors <bold>(D)</bold> analyzed by logistic regression.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmed-09-811890-g0005.tif"/>
</fig>
<p>The top-two odds ratios were observed for smoking status and abstinence period (OR = 11.16 and 2.05), indicating their importance in predictions (<xref ref-type="supplementary-material" rid="SM6">Supplementary Table 6</xref>). Alcohol consumption, which was identified as the third-most important by XGBoost, also showed a significant association with progressive sperm motility in the logistic regression assay (<xref ref-type="supplementary-material" rid="SM6">Supplementary Table 6</xref>).</p>
<p>Moreover, as shown in <xref ref-type="fig" rid="F5">Figure 5D</xref> and <xref ref-type="supplementary-material" rid="SM6">Supplementary Table 6</xref>, males who smoked more than 20 cigarettes/day were more likely to have lower progressive sperm motility (OR: 11.16, 95% CI: 7.82&#x02013;15.93, <italic>P</italic> &#x0003C; 0.001) than non-smokers, but smokers were less likely to have lower progressive sperm motility than non-smokers when they smoked &#x0003C;10 cigarettes/day (OR: 0.07, 95% CI: 0.05&#x02013;0.11, <italic>P</italic> &#x0003C; 0.001). Males who maintained abstinence for more than 7 days were more likely to show lower progressive sperm motility (OR: 2.05, 95% CI: 1.63&#x02013;2.57, <italic>P</italic> &#x0003C; 0.001).</p>
</sec>
<sec>
<title>Risk factors affecting total sperm motility</title>
<p>The AUC of the XGBoost models for total sperm vitalities was 0.660 (<xref ref-type="fig" rid="F6">Figure 6A</xref>), and the cross-validation showed that the AUC of the model was 0.686. The feature importance plotted <italic>via</italic> XGBoost showed that smoking status played the most important part, followed by working in hot conditions and abstinence period (<xref ref-type="fig" rid="F6">Figure 6B</xref>). The AUCs of the logistic regression analyses (<xref ref-type="fig" rid="F6">Figure 6C</xref>) revealed that smoking status, working in hot conditions, abstinence period, season of semen examination, alcohol consumption, consumption of pungent food, age, and exposure to radioactivity were predictive markers of total sperm vitality. The AUC of the combined marker (AUC = 0.700) was slightly higher than that of the other markers, and the cross-validation based on the multivariate regression analysis showed that the AUC of the model was 0.749. The maximum odds ratio was observed for smoking status (OR = 10.35), indicating it was the most important predictor (<xref ref-type="supplementary-material" rid="SM7">Supplementary Table 7</xref>). Moreover, working in hot conditions and abstinence period, two of the top-three important factors revealed by XGBoost, also significantly affected total sperm motility in the regression analysis (<xref ref-type="supplementary-material" rid="SM7">Supplementary Table 7</xref>).</p>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p>XGBoost and logistic regression analysis of the risk factors for total sperm motility. The ROC curve <bold>(A)</bold> and the feature importance <bold>(B)</bold> analyzed by XGBoost and the ROC curve <bold>(C)</bold> and forest diagram showing significant risk factors <bold>(D)</bold> analyzed by logistic regression.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmed-09-811890-g0006.tif"/>
</fig>
<p>As shown in <xref ref-type="fig" rid="F6">Figure 6D</xref> and <xref ref-type="supplementary-material" rid="SM7">Supplementary Table 7</xref>, males who smoked more than 20 cigarettes/day were more likely to have lower total sperm motility than non-smokers (OR: 10.35, 95% CI: 7.35&#x02013;14.56, <italic>P</italic> &#x0003C; 0.001), but smokers were less likely to have a lower total sperm motility than non-smokers when they smoked &#x0003C;10 cigarettes/day (OR: 0.06, 95% CI: 0.03&#x02013;0.10, <italic>P</italic> &#x0003C; 0.001). Males who worked under hot conditions were less likely to show low total sperm motility (OR: 1.63, 95% CI: 1.20&#x02013;2.21, <italic>P</italic> &#x0003C; 0.05). Moreover, males who maintained abstinence for more than 7 days were more likely to have lower total sperm motility (OR: 1.72, 95% CI: 1.37&#x02013;2.17, <italic>P</italic> &#x0003C; 0.001).</p>
</sec>
<sec>
<title>Risk factors affecting sperm morphology</title>
<p>The AUC of the XGBoost model for sperm morphology was only 0.506 (<xref ref-type="fig" rid="F7">Figure 7A</xref>), and the cross-validation showed that the AUC of the model was 0.520. The feature importance plot created using XGBoost showed that smoking status was the maximum important factor (<xref ref-type="fig" rid="F7">Figure 7B</xref>). The AUCs of the logistic regression analyses (<xref ref-type="fig" rid="F7">Figure 7C</xref>) revealed that smoking status was a predictive index for sperm morphology with a poor AUC (0.539), and the cross-validation based on the multivariate regression analysis showed that the AUC of the model was 0.543. As shown in <xref ref-type="fig" rid="F7">Figure 7D</xref> and <xref ref-type="supplementary-material" rid="SM8">Supplementary Table 8</xref>, males who smoked more than 20 cigarettes/day were more likely to have abnormal sperm morphology than non-smokers (OR: 3.0, 95% CI: 1.76&#x02013;5.12, <italic>P</italic> &#x0003C; 0.001), but the trend did not appear for males who smoked &#x0003C;10 cigarettes/day (OR: 0.53, 95% CI: 0.39&#x02013;0.73, <italic>P</italic> &#x0003C; 0.001).</p>
<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p>XGBoost and logistic regression analysis of the risk factors for sperm morphology. The ROC curve <bold>(A)</bold> and feature importance <bold>(B)</bold> analyzed by XGBoost and the ROC curve <bold>(C)</bold> and forest diagram showing significant risk factors <bold>(D)</bold> analyzed by logistic regression.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmed-09-811890-g0007.tif"/>
</fig>
</sec>
<sec>
<title>Risk factors affecting DFI</title>
<p>The AUC of the XGBoost model for DFI was 0.686 (<xref ref-type="fig" rid="F8">Figure 8A</xref>), and the cross-validation showed that the AUC of the model was 0.697. The top three important features affecting total sperm vitality were age, abstinence period, and smoking status (<xref ref-type="fig" rid="F8">Figure 8B</xref>). The AUCs of the logistic regression analyses (<xref ref-type="fig" rid="F8">Figure 8C</xref>) revealed that age, abstinence period, smoking status, and staying up late were predictive markers of sperm DFI. The AUC of the combined marker (AUC = 0.725) was higher than that of the other individual markers (AUC = 0.661, 0.598, 0.466, and 0.443). The cross-validation based on the multivariate regression analysis showed that the AUC of the model was 0.648. The top-two odds ratios appeared for age and abstinence period (OR = 5.47 and 3.61), indicating their important predictive roles. Smoking status, the third important factor revealed by XGBoost, was also shown to significantly affect sperm DFI in regression analysis (<xref ref-type="supplementary-material" rid="SM9">Supplementary Table 9</xref>).</p>
<fig id="F8" position="float">
<label>Figure 8</label>
<caption><p>XGBoost and logistic regression analysis of the risk factors for the DNA fragmentation index (DFI). The ROC curve <bold>(A)</bold> and feature importance <bold>(B)</bold> analyzed by XGBoost and the ROC curve <bold>(C)</bold> and forest diagram showing significant risk factors <bold>(D)</bold> analyzed by logistic regression.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmed-09-811890-g0008.tif"/>
</fig>
<p>Besides, as shown in <xref ref-type="fig" rid="F8">Figure 8D</xref> and <xref ref-type="supplementary-material" rid="SM9">Supplementary Table 9</xref>, older males (&#x0003E;35 years) and those maintaining abstinence for more than 7 days were more likely to have higher DFI (OR: 5.47, 95% CI: 3.41&#x02013;8.76, <italic>P</italic> &#x0003C; 0.001 and OR: 3.61, 95% CI: 2.27&#x02013;5.75, <italic>P</italic> &#x0003C; 0.001) than younger males (&#x0003C;30 years old) and those maintaining abstinence for &#x0003C;4 days, respectively. Males who smoked &#x0003C;10 cigarettes /day were less likely to have a high DFI (OR: 0.42, 95% CI: 0.27&#x02013;0.66, <italic>P</italic> &#x0003C; 0.001) than non-smokers. Nevertheless, when they smoked more than 20 cigarettes/day, the odds ratio of having high a DFI increased (<italic>P</italic> &#x0003E; 0.05).</p>
</sec>
<sec>
<title>Correlations between general information</title>
<p>Considering the close relationships among the variables, Spearman rank correlation analysis was performed. As shown in <xref ref-type="supplementary-material" rid="SM10">Supplementary Table 10</xref>, significant positive correlations were observed between sedentary lifestyle and age, staying up late, sleeplessness, consumption of pungent food, and exposure to radioactivity, with correlation coefficient (ICC) values of 0.043, 0.078, 0.056, 0.061, and 0.438, respectively. Meanwhile, sedentary lifestyles showed negative correlations with smoking status, intensity of sports activity, and working in hot conditions (ICC = &#x02212;0.088, &#x02212;0.119, and &#x02212;0.134, respectively). Positive correlations were observed between staying up late and smoking status, alcohol consumption, sleeplessness, consumption of pungent food, sedentary lifestyle, working in hot conditions, sauna use in the last 3 months, and exposure to radioactivity (ICC = 0.185, 0.238, 0.310, 0.342, 0.078, 0.087, 0.067, and 0.034; <italic>P</italic> &#x0003C; 0.05), but staying up late showed negative correlations with age, abstinence period, and intensity of sports activity (ICC = &#x02212;0.074, &#x02212;0.055, &#x02212;0.067; <italic>P</italic> &#x0003C; 0.05). Exposure to radioactivity showed positive correlations with staying up late, consumption of pungent food, and a sedentary lifestyle (ICC = 0.034, 0.046, and 0.438; <italic>P</italic> &#x0003C; 0.05), but showed negative correlations with smoking status, alcohol consumption, and working in hot conditions (ICC = &#x02212;0.142, &#x02212;0.028, and &#x02212;0.109).</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>Discussion</title>
<p>The factors influencing semen quality are complex. Several studies have reported that male age and environmental/lifestyle exposures, rather than the genetic problems, are primarily responsible for abnormal semen quality (<xref ref-type="bibr" rid="B1">1</xref>, <xref ref-type="bibr" rid="B22">22</xref>, <xref ref-type="bibr" rid="B23">23</xref>). Among these, lifestyle factors can be easily modified without medical interventions (<xref ref-type="bibr" rid="B24">24</xref>), and elucidate the lifestyle factors affecting semen quality can guide men to take appropriate measures in the preconception period. However, as described above, the lifestyles leading to abnormal semen quality have not been completely clarified, while the complexity of these data made related analysis difficult. In recent years, the wide application of AI provided a new method for related research (<xref ref-type="bibr" rid="B13">13</xref>).</p>
<p>Since the typical tabular data in our research were more suitable for the decision tree algorithm, and XGBoost is generally superior to other decision tree algorithms such as GBDT random forest and artificial neural network models in terms of predictive performance (<xref ref-type="bibr" rid="B25">25</xref>&#x02013;<xref ref-type="bibr" rid="B27">27</xref>), we constructed a preliminary lifestyle- and general factor-based semen quality prediction model <italic>via</italic> machine learning with the XGBoost algorithm by using data collected from 5,109 healthy men. Furthermore, since the accuracy of machine learning algorithms may be impaired because of overfitting or insufficient data training (<xref ref-type="bibr" rid="B12">12</xref>, <xref ref-type="bibr" rid="B28">28</xref>&#x02013;<xref ref-type="bibr" rid="B30">30</xref>), we have applied logistic regression combined with cross-validation to verify the accuracy and the feasibility of machine learning-based prediction model.</p>
<p>After training the XGBoost with 13 potential affecting factors, the results showed that the AUCs of semen volume, sperm concentration, sperm progressive and total sperm motility, and DFI were 0.648, 0.661, 0.697, 0.660, and 0.686, respectively, which was consistent with the regression model and the subsequent cross-validation. In addition, the top two important factors affecting semen volume, sperm concentration, and the top three important factors affecting sperm motility and DFI indicated by the XGBoost were also revealed as predictive indices by regression analysis, indicating the promising predictive value of machine learning. However, both the XGBoost model and logistic regression assay as well as the following cross-validation based on sperm morphology showed poor predictive values (AUC = 0.506, 0.520, 0.539, and 0.543). We speculate that this could be because lifestyle-related factors have minimal influence on sperm morphology (<xref ref-type="bibr" rid="B31">31</xref>), which is primarily mediated by genetic factors (<xref ref-type="bibr" rid="B32">32</xref>). The XGBoost prediction model indicated that smoking status was the most important factor affecting the parameters of semen volume, sperm concentration, and motility and was the third important factor affecting DFI, and the results were verified by regression analysis. Many other studies have also indicated cigarette smoking has an overall negative effect on the semen parameters because the toxins originating from cigarette smoke can decrease sperm mitochondrial activity and damage the chromatin structure in human sperm (<xref ref-type="bibr" rid="B33">33</xref>&#x02013;<xref ref-type="bibr" rid="B36">36</xref>). The regression assay further revealed that heavy smoking (&#x0003E;20 cigarettes/day) posed a harmful effect, which suggested that men of reproductive age men should give up heavy smoking first. However, it was interesting that mild (&#x0003C;10 cigarettes/day) smoking had positive consequents, which was partly consistent with the findings of Kemal and Adelusi et al. (<xref ref-type="bibr" rid="B37">37</xref>, <xref ref-type="bibr" rid="B38">38</xref>). They found that smokers showed a higher percentage of rapidly progressive sperm. The possible reason for this result is that mild smoking could generate trace amounts of oxides, which are required to support both sperm motility and capacitation (<xref ref-type="bibr" rid="B39">39</xref>). Moreover, our results inevitably showed interference since many patients who smoke very occasionally (&#x0003C;1 smoke/day) were categorized into the mild smoking group. Further adjustment and improvement of questionnaire designs will be performed in the following research.</p>
<p>Furthermore, the abstinence period was the second-most important factor influencing semen volume, progressive sperm motility, and DFI. The regression analysis further showed that longer abstinence periods (&#x0003E;7 days) can help increase semen volume, but would hurt sperm motility and increase sperm DFI. Sperm motility has been shown to peak within 4 or 5 days of abstinence (<xref ref-type="bibr" rid="B40">40</xref>), and spermatozoa accumulating in the epididymis might react with oxygen and nitrogen species (ROS and RNS) during prolonged periods of ejaculatory abstinence (<xref ref-type="bibr" rid="B41">41</xref>). Thus, males should maintain a healthy rhythm of sex to ensure optimal semen quality.</p>
<p>Age is the primary risk factor affecting semen DFI and a secondary risk factor affecting semen density. The regression assay revealed that the sperm DFI was higher in elder men, and oxidative stress damage might be one of the mechanisms underlying this finding (<xref ref-type="bibr" rid="B42">42</xref>&#x02013;<xref ref-type="bibr" rid="B44">44</xref>). Meanwhile, the semen volume decreased and sperm density increased with increasing age, which might be attributable to prostate atrophy. Increased age is known to be associated with genome-wide mutations, DFI, and chromatin integrity (<xref ref-type="bibr" rid="B45">45</xref>), and high sperm DFI is associated with spontaneous abortion (<xref ref-type="bibr" rid="B46">46</xref>, <xref ref-type="bibr" rid="B47">47</xref>). Thus, men should be to encouraged to have children early.</p>
<p>In addition, other factors explored in this study, except sauna use in the last 3 months and sleeplessness, influenced semen parameters to some extent. Curiously, unlike published research stating that a sedentary lifestyle or playing computer games adversely affected sperm motility (<xref ref-type="bibr" rid="B48">48</xref>), our regression analysis revealed that individuals with predominantly sedentary lifestyles were less likely to have lower semen volume and those exposed to computer radiation constantly were less likely to have lower sperm motility. Moreover, men who slept late were less likely to have a high DFI. However, the correlation analysis (please see the <xref ref-type="supplementary-material" rid="SM10">Supplementary Table 10</xref>) revealed that sedentary lifestyles and prolonged computer usage showed negative correlations with smoking status and late sleeping hours showed a negative correlation with age, which may be one reason for the confusing results.</p>
<p>Our study had some limitations. First, all data were collected from our own center without external validation, and we recruited patients receiving assisted fertility guidance or treatment, which may not fully represent the general population. Second, most lifestyle factors were self-reported and were subjective constructs in this research. Moreover, the stages of changes in most lifestyle factors could not be precisely delineated, and the valid data sample was not large enough to obtain precise predictions. Under the influence of the various factors described above, the current results showed that the XGBoost Algorithm had no obvious advantage over logistic regression. However, considering its benefits of allowing flexible analyses of relationships among predictor variables and outcomes in large datasets as well as the easy online updates in the prediction system, its implementation into the clinical workflow can be advantageous. We believe that the XGBoost will have promising predictive value and guiding significance after enlarging the data sample size and data feature dimensions as well adding information-based data extraction methods.</p>
</sec>
<sec sec-type="conclusions" id="s5">
<title>Conclusion</title>
<p>In summary, the preliminary model for predicting semen quality using lifestyle factors that was developed with the XGBoost algorithm had the potential to undergo further optimization with larger training data. In addition, the model suggested that smoking status, abstinence period, and age were important factors affecting semen quality parameters.</p>
</sec>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s11">Supplementary material</xref>, further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec id="s7">
<title>Ethics statement</title>
<p>This study was approved by the Ethics Committee of Ruijin Hospital, School of Medicine, Shanghai Jiao Tong University (No. 2019-185), and all participants recruited signed informed consent forms.</p>
</sec>
<sec id="s8">
<title>Author contributions</title>
<p>BX, AZ, and YG designed the study and headed the interdisciplinary exchange. TY, YG, and HH performed machine learning. JL and MZ undertook the statistical analyses. WF examined semen parameters. MZ, BX, TY, and AZ collected the data and drafted the manuscript. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec sec-type="funding-information" id="s9">
<title>Funding</title>
<p>This work was supported by grants from the Shanghai Jiao Tong University Medicine-Engineering Fund (Grant Number YG2017MS57), the National Natural Science Foundation of China (Grant Numbers 82071712 and 81771656), the Shanghai Medicine and Health Development Foundation (Grant Number SHWJRS(2021)-99 to BX), and the Guangci Clinical New Technology Sailing Plan of Ruijin Hospital (Grant Number GCQH-2021-07).</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>Author TY is employed by Shanghai National Engineering Research Center of Digital Television Co., Ltd. This work is not funded by Shanghai National Engineering Research Center of Digital Television Co., Ltd. The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
</body>
<back>
<sec sec-type="supplementary-material" id="s11">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fmed.2022.811890/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fmed.2022.811890/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Table_1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_2.docx" id="SM2" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_3.docx" id="SM3" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_4.docx" id="SM4" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_5.docx" id="SM5" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_6.docx" id="SM6" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_7.docx" id="SM7" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_8.docx" id="SM8" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_9.docx" id="SM9" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_10.docx" id="SM10" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Virtanen</surname> <given-names>HE</given-names></name> <name><surname>J&#x000F8;rgensen</surname> <given-names>N</given-names></name> <name><surname>Toppari</surname> <given-names>J</given-names></name></person-group>. <article-title>Semen quality in the 21 century</article-title>. <source>Nat Rev Urol.</source> (<year>2017</year>) <volume>14</volume>:<fpage>120</fpage>&#x02013;<lpage>30</lpage>. <pub-id pub-id-type="doi">10.1038/nrurol.2016.261</pub-id><pub-id pub-id-type="pmid">28050014</pub-id></citation></ref>
<ref id="B2">
<label>2.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Agarwal</surname> <given-names>A</given-names></name> <name><surname>Baskaran</surname> <given-names>S</given-names></name> <name><surname>Parekh</surname> <given-names>N</given-names></name> <name><surname>Cho</surname> <given-names>C-L</given-names></name> <name><surname>Henkel</surname> <given-names>R</given-names></name> <name><surname>Vij</surname> <given-names>S</given-names></name> <etal/></person-group>. <article-title>Male infertility</article-title>. <source>Lancet.</source> (<year>2021</year>) <volume>397</volume>:<fpage>319</fpage>&#x02013;<lpage>33</lpage>. <pub-id pub-id-type="doi">10.1016/S0140-6736(20)32667-2</pub-id><pub-id pub-id-type="pmid">33308486</pub-id></citation></ref>
<ref id="B3">
<label>3.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>C</given-names></name> <name><surname>Li</surname> <given-names>B</given-names></name> <name><surname>Xu</surname> <given-names>K</given-names></name> <name><surname>Liu</surname> <given-names>D</given-names></name> <name><surname>Hu</surname> <given-names>J</given-names></name> <name><surname>Yang</surname> <given-names>Y</given-names></name> <etal/></person-group>. <article-title>Decline in semen quality among 30,636 young Chinese men from 2001 to 2015</article-title>. <source>Fertility Sterility</source>. (<year>2017</year>) <volume>107</volume>:<fpage>35</fpage>. <pub-id pub-id-type="doi">10.1016/j.fertnstert.2016.09.035</pub-id><pub-id pub-id-type="pmid">27793371</pub-id></citation></ref>
<ref id="B4">
<label>4.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>L</given-names></name> <name><surname>Zhang</surname> <given-names>L</given-names></name> <name><surname>Song</surname> <given-names>X-H</given-names></name> <name><surname>Zhang</surname> <given-names>H-B</given-names></name> <name><surname>Xu</surname> <given-names>C-Y</given-names></name> <name><surname>Chen</surname> <given-names>Z-J</given-names></name></person-group>. <article-title>Decline of semen quality among Chinese sperm bank donors within 7 years (2008-2014)</article-title>. <source>Asian J Androl.</source> (<year>2017</year>) <volume>19</volume>:<fpage>521</fpage>&#x02013;<lpage>5</lpage>. <pub-id pub-id-type="doi">10.4103/1008-682X.179533</pub-id><pub-id pub-id-type="pmid">27345004</pub-id></citation></ref>
<ref id="B5">
<label>5.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Verze</surname> <given-names>P</given-names></name> <name><surname>Cai</surname> <given-names>T</given-names></name> <name><surname>Lorenzetti</surname> <given-names>S</given-names></name></person-group>. <article-title>The role of the prostate in male fertility, health and disease</article-title>. <source>Nat Rev Urol.</source> (<year>2016</year>) <volume>13</volume>:<fpage>379</fpage>&#x02013;<lpage>86</lpage>. <pub-id pub-id-type="doi">10.1038/nrurol.2016.89</pub-id><pub-id pub-id-type="pmid">27245504</pub-id></citation></ref>
<ref id="B6">
<label>6.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Skakkebaek</surname> <given-names>NE</given-names></name> <name><surname>Rajpert-De Meyts</surname> <given-names>E</given-names></name> <name><surname>Buck Louis</surname> <given-names>GM</given-names></name> <name><surname>Toppari</surname> <given-names>J</given-names></name> <name><surname>Andersson</surname> <given-names>A-M</given-names></name> <name><surname>Eisenberg</surname> <given-names>ML</given-names></name> <etal/></person-group>. <article-title>Male reproductive disorders and fertility trends: influences of environment and genetic susceptibility</article-title>. <source>Physiol Rev.</source> (<year>2016</year>) <volume>96</volume>:<fpage>55</fpage>&#x02013;<lpage>97</lpage>. <pub-id pub-id-type="doi">10.1152/physrev.00017.2015</pub-id><pub-id pub-id-type="pmid">26582516</pub-id></citation></ref>
<ref id="B7">
<label>7.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Krausz</surname> <given-names>C</given-names></name> <name><surname>Riera-Escamilla</surname> <given-names>A</given-names></name></person-group>. <article-title>Genetics of male infertility</article-title>. <source>Nat Rev Urol.</source> (<year>2018</year>) <volume>15</volume>:<fpage>369</fpage>&#x02013;<lpage>84</lpage>. <pub-id pub-id-type="doi">10.1038/s41585-018-0003-3</pub-id><pub-id pub-id-type="pmid">29622783</pub-id></citation></ref>
<ref id="B8">
<label>8.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Punab</surname> <given-names>M</given-names></name> <name><surname>Poolamets</surname> <given-names>O</given-names></name> <name><surname>Paju</surname> <given-names>P</given-names></name> <name><surname>Vihljajev</surname> <given-names>V</given-names></name> <name><surname>Pomm</surname> <given-names>K</given-names></name> <name><surname>Ladva</surname> <given-names>R</given-names></name> <etal/></person-group>. <article-title>Causes of male infertility: a 9-year prospective monocentre study on 1737 patients with reduced total sperm counts</article-title>. <source>Hum Reprod.</source> (<year>2017</year>) <volume>32</volume>:<fpage>18</fpage>&#x02013;<lpage>31</lpage>. <pub-id pub-id-type="doi">10.1093/humrep/dew284</pub-id><pub-id pub-id-type="pmid">28817890</pub-id></citation></ref>
<ref id="B9">
<label>9.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>de Kretser</surname> <given-names>DM</given-names></name></person-group>. <article-title>Male infertility</article-title>. <source>Lancet.</source> (<year>1997</year>) <volume>349</volume>:<fpage>787</fpage>&#x02013;<lpage>90</lpage>. <pub-id pub-id-type="doi">10.1016/S0140-6736(96)08341-9</pub-id><pub-id pub-id-type="pmid">9074589</pub-id></citation></ref>
<ref id="B10">
<label>10.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hus&#x000E1;kov&#x000E1;</surname> <given-names>P</given-names></name> <name><surname>Ulcov&#x000E1;-Gallov&#x000E1;</surname> <given-names>Z</given-names></name> <name><surname>Bibkov&#x000E1;</surname> <given-names>K</given-names></name> <name><surname>Micanov&#x000E1;</surname> <given-names>Z</given-names></name></person-group>. <article-title>Semen quality of Pilsner University students</article-title>. <source>Cas Lek Cesk.</source> (<year>2008</year>) <volume>147</volume>:<fpage>85</fpage>&#x02013;<lpage>8</lpage>.<pub-id pub-id-type="pmid">18383958</pub-id></citation></ref>
<ref id="B11">
<label>11.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Giwercman</surname> <given-names>A</given-names></name> <name><surname>Giwercman</surname> <given-names>YL</given-names></name></person-group>. <article-title>Environmental factors and testicular function</article-title>. <source>Best Pract Res Clin Endocrinol Metab.</source> (<year>2011</year>) <volume>25</volume>:<fpage>391</fpage>&#x02013;<lpage>402</lpage>. <pub-id pub-id-type="doi">10.1016/j.beem.2010.09.011</pub-id><pub-id pub-id-type="pmid">21397206</pub-id></citation></ref>
<ref id="B12">
<label>12.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Goldstein</surname> <given-names>BA</given-names></name> <name><surname>Navar</surname> <given-names>AM</given-names></name> <name><surname>Carter</surname> <given-names>RE</given-names></name></person-group>. <article-title>Moving beyond regression techniques in cardiovascular risk prediction: applying machine learning to address analytic challenges</article-title>. <source>Eur Heart J.</source> (<year>2017</year>) <volume>38</volume>:<fpage>1805</fpage>&#x02013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1093/eurheartj/ehw302</pub-id><pub-id pub-id-type="pmid">27436868</pub-id></citation></ref>
<ref id="B13">
<label>13.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>R</given-names></name> <name><surname>Pan</surname> <given-names>W</given-names></name> <name><surname>Jin</surname> <given-names>L</given-names></name> <name><surname>Li</surname> <given-names>Y</given-names></name> <name><surname>Geng</surname> <given-names>Y</given-names></name> <name><surname>Gao</surname> <given-names>C</given-names></name> <etal/></person-group>. <article-title>Artificial intelligence in reproductive medicine</article-title>. <source>Reproduction.</source> (<year>2019</year>) <volume>158</volume>:<fpage>R139</fpage>&#x02013;<lpage>54</lpage>. <pub-id pub-id-type="doi">10.1530/REP-18-0523</pub-id><pub-id pub-id-type="pmid">30970326</pub-id></citation></ref>
<ref id="B14">
<label>14.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Topol</surname> <given-names>EJ</given-names></name></person-group>. <article-title>High-performance medicine: the convergence of human and artificial intelligence</article-title>. <source>Nat Med.</source> (<year>2019</year>) <volume>25</volume>:<fpage>44</fpage>&#x02013;<lpage>56</lpage>. <pub-id pub-id-type="doi">10.1038/s41591-018-0300-7</pub-id><pub-id pub-id-type="pmid">30617339</pub-id></citation></ref>
<ref id="B15">
<label>15.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hamet</surname> <given-names>P</given-names></name> <name><surname>Tremblay</surname> <given-names>J</given-names></name></person-group>. <article-title>Artificial intelligence in medicine</article-title>. <source>Metabolism</source>. (<year>2017</year>) <volume>69S</volume>:<fpage>S36</fpage>&#x02013;<lpage>40</lpage>. <pub-id pub-id-type="doi">10.1016/j.metabol.2017.01.011</pub-id><pub-id pub-id-type="pmid">28126242</pub-id></citation></ref>
<ref id="B16">
<label>16.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Riegler</surname> <given-names>MA</given-names></name> <name><surname>Stensen</surname> <given-names>MH</given-names></name> <name><surname>Witczak</surname> <given-names>O</given-names></name> <name><surname>Andersen</surname> <given-names>JM</given-names></name> <name><surname>Hicks</surname> <given-names>SA</given-names></name> <name><surname>Hammer</surname> <given-names>HL</given-names></name> <etal/></person-group>. <article-title>Artificial intelligence in the fertility clinic: status, pitfalls and possibilities</article-title>. <source>Hum Reprod.</source> (<year>2021</year>) <volume>36</volume>:<fpage>2429</fpage>&#x02013;<lpage>42</lpage>. <pub-id pub-id-type="doi">10.1093/humrep/deab168</pub-id><pub-id pub-id-type="pmid">34324672</pub-id></citation></ref>
<ref id="B17">
<label>17.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Girela</surname> <given-names>JL</given-names></name> <name><surname>Gil</surname> <given-names>D</given-names></name> <name><surname>Johnsson</surname> <given-names>M</given-names></name> <name><surname>Gomez-Torres</surname> <given-names>MJ</given-names></name> <name><surname>De Juan</surname> <given-names>J</given-names></name></person-group>. <article-title>Semen parameters can be predicted from environmental factors and lifestyle using artificial intelligence methods</article-title>. <source>Biol Reprod.</source> (<year>2013</year>) <volume>88</volume>:<fpage>99</fpage>. <pub-id pub-id-type="doi">10.1095/biolreprod.112.104653</pub-id><pub-id pub-id-type="pmid">23446456</pub-id></citation></ref>
<ref id="B18">
<label>18.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sahoo</surname> <given-names>AJ</given-names></name> <name><surname>Kumar</surname> <given-names>Y</given-names></name></person-group>. <article-title>Seminal quality prediction using data mining methods</article-title>. <source>Technol Health Care.</source> (<year>2014</year>) <volume>22</volume>:<fpage>531</fpage>&#x02013;<lpage>45</lpage>. <pub-id pub-id-type="doi">10.3233/THC-140816</pub-id><pub-id pub-id-type="pmid">24898862</pub-id></citation></ref>
<ref id="B19">
<label>19.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lagerros</surname> <given-names>YT</given-names></name> <name><surname>Mucci</surname> <given-names>LA</given-names></name> <name><surname>Bellocco</surname> <given-names>R</given-names></name> <name><surname>Nyr&#x000E9;n</surname> <given-names>O</given-names></name> <name><surname>B&#x000E4;lter</surname> <given-names>O</given-names></name> <name><surname>B&#x000E4;lter</surname> <given-names>KA</given-names></name></person-group>. <article-title>Validity and reliability of self-reported total energy expenditure using a novel instrument</article-title>. <source>Eur J Epidemiol.</source> (<year>2006</year>) <volume>21</volume>:<fpage>227</fpage>&#x02013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.1007/s10654-006-0013-y</pub-id><pub-id pub-id-type="pmid">16547838</pub-id></citation></ref>
<ref id="B20">
<label>20.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cooper</surname> <given-names>TG</given-names></name> <name><surname>Noonan</surname> <given-names>E</given-names></name> <name><surname>von Eckardstein</surname> <given-names>S</given-names></name> <name><surname>Auger</surname> <given-names>J</given-names></name> <name><surname>Baker</surname> <given-names>HWG</given-names></name> <name><surname>Behre</surname> <given-names>HM</given-names></name> <etal/></person-group>. <article-title>World Health Organization reference values for human semen characteristics</article-title>. <source>Hum Reprod Update.</source> (<year>2010</year>) <volume>16</volume>:<fpage>231</fpage>&#x02013;<lpage>45</lpage>. <pub-id pub-id-type="doi">10.1093/humupd/dmp048</pub-id><pub-id pub-id-type="pmid">19934213</pub-id></citation></ref>
<ref id="B21">
<label>21.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Le</surname> <given-names>MT</given-names></name> <name><surname>Nguyen</surname> <given-names>TAT</given-names></name> <name><surname>Nguyen</surname> <given-names>HTT</given-names></name> <name><surname>Nguyen</surname> <given-names>TTT</given-names></name> <name><surname>Nguyen</surname> <given-names>VT</given-names></name> <name><surname>Le</surname> <given-names>DD</given-names></name> <etal/></person-group>. <article-title>Does sperm DNA fragmentation correlate with semen parameters?</article-title> <source>Reprod Med Biol.</source> (<year>2019</year>) <volume>18</volume>:<fpage>390</fpage>&#x02013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1002/rmb2.12297</pub-id><pub-id pub-id-type="pmid">31607800</pub-id></citation></ref>
<ref id="B22">
<label>22.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tournaye</surname> <given-names>H</given-names></name> <name><surname>Krausz</surname> <given-names>C</given-names></name> <name><surname>Oates</surname> <given-names>RD</given-names></name></person-group>. <article-title>Concepts in diagnosis and therapy for male reproductive impairment</article-title>. <source>Lancet Diabetes Endocrinol.</source> (<year>2017</year>) <volume>5</volume>:<fpage>554</fpage>&#x02013;<lpage>64</lpage>. <pub-id pub-id-type="doi">10.1016/S2213-8587(16)30043-2</pub-id><pub-id pub-id-type="pmid">27395770</pub-id></citation></ref>
<ref id="B23">
<label>23.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ilacqua</surname> <given-names>A</given-names></name> <name><surname>Izzo</surname> <given-names>G</given-names></name> <name><surname>Emerenziani</surname> <given-names>GP</given-names></name> <name><surname>Baldari</surname> <given-names>C</given-names></name> <name><surname>Aversa</surname> <given-names>A</given-names></name></person-group>. <article-title>Lifestyle and fertility: the influence of stress and quality of life on male fertility</article-title>. <source>Reprod Biol Endocrinol.</source> (<year>2018</year>) <volume>16</volume>:<fpage>115</fpage>. <pub-id pub-id-type="doi">10.1186/s12958-018-0436-9</pub-id><pub-id pub-id-type="pmid">30474562</pub-id></citation></ref>
<ref id="B24">
<label>24.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mulder</surname> <given-names>M</given-names></name> <name><surname>Ranchor</surname> <given-names>AV</given-names></name> <name><surname>Sanderman</surname> <given-names>R</given-names></name> <name><surname>Bouma</surname> <given-names>J</given-names></name> <name><surname>van den Heuvel</surname> <given-names>WJ</given-names></name></person-group>. <article-title>The stability of lifestyle behaviour</article-title>. <source>Int J Epidemiol.</source> (<year>1998</year>) <volume>27</volume>:<fpage>199</fpage>&#x02013;<lpage>207</lpage>. <pub-id pub-id-type="doi">10.1093/ije/27.2.199</pub-id><pub-id pub-id-type="pmid">9602399</pub-id></citation></ref>
<ref id="B25">
<label>25.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yue</surname> <given-names>S</given-names></name> <name><surname>Li</surname> <given-names>S</given-names></name> <name><surname>Huang</surname> <given-names>X</given-names></name> <name><surname>Liu</surname> <given-names>J</given-names></name> <name><surname>Hou</surname> <given-names>X</given-names></name> <name><surname>Zhao</surname> <given-names>Y</given-names></name> <etal/></person-group>. <article-title>Machine learning for the prediction of acute kidney injury in patients with sepsis</article-title>. <source>J Transl Med.</source> (<year>2022</year>) <volume>20</volume>:<fpage>215</fpage>. <pub-id pub-id-type="doi">10.1186/s12967-022-03364-0</pub-id><pub-id pub-id-type="pmid">35783603</pub-id></citation></ref>
<ref id="B26">
<label>26.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhu</surname> <given-names>Y</given-names></name> <name><surname>Zhang</surname> <given-names>J</given-names></name> <name><surname>Wang</surname> <given-names>G</given-names></name> <name><surname>Yao</surname> <given-names>R</given-names></name> <name><surname>Ren</surname> <given-names>C</given-names></name> <name><surname>Chen</surname> <given-names>G</given-names></name> <etal/></person-group>. <article-title>Machine learning prediction models for mechanically ventilated patients: analyses of the MIMIC-III database</article-title>. <source>Front Med.</source> (<year>2021</year>) <volume>8</volume>:<fpage>662340</fpage>. <pub-id pub-id-type="doi">10.3389/fmed.2021.662340</pub-id><pub-id pub-id-type="pmid">34277655</pub-id></citation></ref>
<ref id="B27">
<label>27.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lee</surname> <given-names>H-C</given-names></name> <name><surname>Yoon</surname> <given-names>SB</given-names></name> <name><surname>Yang</surname> <given-names>S-M</given-names></name> <name><surname>Kim</surname> <given-names>WH</given-names></name> <name><surname>Ryu</surname> <given-names>H-G</given-names></name> <name><surname>Jung</surname> <given-names>C-W</given-names></name> <etal/></person-group>. <article-title>Prediction of acute kidney injury after liver transplantation: machine learning approaches vs. logistic regression model</article-title>. <source>J Clin Med</source>. (<year>2018</year>) <volume>7</volume>:<fpage>428</fpage>. <pub-id pub-id-type="doi">10.3390/jcm7110428</pub-id><pub-id pub-id-type="pmid">30413107</pub-id></citation></ref>
<ref id="B28">
<label>28.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rajkomar</surname> <given-names>A</given-names></name> <name><surname>Dean</surname> <given-names>J</given-names></name> <name><surname>Kohane</surname> <given-names>I</given-names></name></person-group>. <article-title>Machine learning in medicine</article-title>. <source>N Engl J Med.</source> (<year>2019</year>) <volume>380</volume>:<fpage>1347</fpage>&#x02013;<lpage>58</lpage>. <pub-id pub-id-type="doi">10.1056/NEJMra1814259</pub-id><pub-id pub-id-type="pmid">30943338</pub-id></citation></ref>
<ref id="B29">
<label>29.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Davenport</surname> <given-names>T</given-names></name> <name><surname>Kalakota</surname> <given-names>R</given-names></name></person-group>. <article-title>The potential for artificial intelligence in healthcare</article-title>. <source>Future Healthc J.</source> (<year>2019</year>) <volume>6</volume>:<fpage>94</fpage>&#x02013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.7861/futurehosp.6-2-94</pub-id><pub-id pub-id-type="pmid">31363513</pub-id></citation></ref>
<ref id="B30">
<label>30.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Collins</surname> <given-names>GS</given-names></name> <name><surname>Moons</surname> <given-names>KGM</given-names></name></person-group>. <article-title>Reporting of artificial intelligence prediction models</article-title>. <source>Lancet.</source> (<year>2019</year>) <volume>393</volume>:<fpage>1577</fpage>&#x02013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1016/S0140-6736(19)30037-6</pub-id><pub-id pub-id-type="pmid">31007185</pub-id></citation></ref>
<ref id="B31">
<label>31.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pacey</surname> <given-names>AA</given-names></name> <name><surname>Povey</surname> <given-names>AC</given-names></name> <name><surname>Clyma</surname> <given-names>JA</given-names></name> <name><surname>McNamee</surname> <given-names>R</given-names></name> <name><surname>Moore</surname> <given-names>HD</given-names></name> <name><surname>Baillie</surname> <given-names>H</given-names></name> <etal/></person-group>. <article-title>Modifiable and non-modifiable risk factors for poor sperm morphology</article-title>. <source>Hum Reprod.</source> (<year>2014</year>) <volume>29</volume>:<fpage>1629</fpage>&#x02013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.1093/humrep/deu116</pub-id><pub-id pub-id-type="pmid">25890552</pub-id></citation></ref>
<ref id="B32">
<label>32.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ray</surname> <given-names>PF</given-names></name> <name><surname>Toure</surname> <given-names>A</given-names></name> <name><surname>Metzler-Guillemain</surname> <given-names>C</given-names></name> <name><surname>Mitchell</surname> <given-names>MJ</given-names></name> <name><surname>Arnoult</surname> <given-names>C</given-names></name> <name><surname>Coutton</surname> <given-names>C</given-names></name></person-group>. <article-title>Genetic abnormalities leading to qualitative defects of sperm morphology or function</article-title>. <source>Clin Genet.</source> (<year>2017</year>) <volume>91</volume>:<fpage>217</fpage>&#x02013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1111/cge.12905</pub-id><pub-id pub-id-type="pmid">27779748</pub-id></citation></ref>
<ref id="B33">
<label>33.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mostafa</surname> <given-names>RM</given-names></name> <name><surname>Nasrallah</surname> <given-names>YS</given-names></name> <name><surname>Hassan</surname> <given-names>MM</given-names></name> <name><surname>Farrag</surname> <given-names>AF</given-names></name> <name><surname>Majzoub</surname> <given-names>A</given-names></name> <name><surname>Agarwal</surname> <given-names>A</given-names></name></person-group>. <article-title>The effect of cigarette smoking on human seminal parameters, sperm chromatin structure and condensation</article-title>. <source>Andrologia</source>. (<year>2018</year>) <volume>50</volume>:<fpage>12910</fpage>. <pub-id pub-id-type="doi">10.1111/and.12910</pub-id><pub-id pub-id-type="pmid">29124782</pub-id></citation></ref>
<ref id="B34">
<label>34.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mohamad Al-Ali</surname> <given-names>B</given-names></name> <name><surname>Eredics</surname> <given-names>K</given-names></name></person-group>. <article-title>Synergistic effects of cigarette smoking and varicocele on semen parameters in 715 patients</article-title>. <source>Wien Klin Wochenschr.</source> (<year>2017</year>) <volume>129</volume>:<fpage>482</fpage>&#x02013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1007/s00508-017-1199-6</pub-id><pub-id pub-id-type="pmid">28439698</pub-id></citation></ref>
<ref id="B35">
<label>35.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Keskin</surname> <given-names>MZ</given-names></name> <name><surname>Budak</surname> <given-names>S</given-names></name> <name><surname>Gubari</surname> <given-names>S</given-names></name> <name><surname>Durmaz</surname> <given-names>K</given-names></name> <name><surname>Yoldas</surname> <given-names>M</given-names></name> <name><surname>Celik</surname> <given-names>O</given-names></name> <etal/></person-group>. <article-title>Do cigarette and alcohol affect semen analysis?</article-title> <source>Arch Ital Urol Androl.</source> (<year>2016</year>) <volume>88</volume>:<fpage>56</fpage>&#x02013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.4081/aiua.2016.1.56</pub-id><pub-id pub-id-type="pmid">27072177</pub-id></citation></ref>
<ref id="B36">
<label>36.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sharma</surname> <given-names>R</given-names></name> <name><surname>Harlev</surname> <given-names>A</given-names></name> <name><surname>Agarwal</surname> <given-names>A</given-names></name> <name><surname>Esteves</surname> <given-names>SC</given-names></name></person-group>. <article-title>Cigarette smoking and semen quality: a new meta-analysis examining the effect of the 2010. World Health Organization Laboratory Methods for the Examination of Human Semen</article-title>. <source>Eur Urol.</source> (<year>2016</year>) <volume>70</volume>:<fpage>635</fpage>&#x02013;<lpage>45</lpage>. <pub-id pub-id-type="doi">10.1016/j.eururo.2016.04.010</pub-id><pub-id pub-id-type="pmid">27317089</pub-id></citation></ref>
<ref id="B37">
<label>37.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ozgur</surname> <given-names>K</given-names></name> <name><surname>Isikoglu</surname> <given-names>M</given-names></name> <name><surname>Seleker</surname> <given-names>M</given-names></name> <name><surname>Donmez</surname> <given-names>L</given-names></name></person-group>. <article-title>Semen quality of smoking and non-smoking men in infertile couples in a Turkish population</article-title>. <source>Arch Gynecol Obstet.</source> (<year>2005</year>) <volume>271</volume>:<fpage>109</fpage>&#x02013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1007/s00404-003-0572-z</pub-id><pub-id pub-id-type="pmid">14685893</pub-id></citation></ref>
<ref id="B38">
<label>38.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Adelusi</surname> <given-names>B</given-names></name> <name><surname>al-Twaijiri</surname> <given-names>MH</given-names></name> <name><surname>al-Meshari</surname> <given-names>A</given-names></name> <name><surname>Kangave</surname> <given-names>D</given-names></name> <name><surname>al-Nuaim</surname> <given-names>LA</given-names></name> <name><surname>Younnus</surname> <given-names>B</given-names></name></person-group>. <article-title>Correlation of smoking and coffee drinking with sperm progressive motility in infertile males</article-title>. <source>Afr J Med Med Sci.</source> (<year>1998</year>) <volume>27</volume>:<fpage>47</fpage>&#x02013;<lpage>50</lpage>.<pub-id pub-id-type="pmid">10456129</pub-id></citation></ref>
<ref id="B39">
<label>39.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>O&#x00027;Flaherty</surname> <given-names>CM</given-names></name> <name><surname>Beorlegui</surname> <given-names>NB</given-names></name> <name><surname>Beconi</surname> <given-names>MT</given-names></name></person-group>. <article-title>Lactate dehydrogenase-C4 is involved in heparin- and NADH-dependent bovine sperm capacitation</article-title>. <source>Andrologia.</source> (<year>2002</year>) <volume>34</volume>:<fpage>91</fpage>&#x02013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1046/j.0303-4569.2001.00481.x</pub-id><pub-id pub-id-type="pmid">11966575</pub-id></citation></ref>
<ref id="B40">
<label>40.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hanson</surname> <given-names>BM</given-names></name> <name><surname>Aston</surname> <given-names>KI</given-names></name> <name><surname>Jenkins</surname> <given-names>TG</given-names></name> <name><surname>Carrell</surname> <given-names>DT</given-names></name> <name><surname>Hotaling</surname> <given-names>JM</given-names></name></person-group>. <article-title>The impact of ejaculatory abstinence on semen analysis parameters: a systematic review</article-title>. <source>J Assist Reprod Genet.</source> (<year>2018</year>) <volume>35</volume>:<fpage>213</fpage>&#x02013;<lpage>20</lpage>. <pub-id pub-id-type="doi">10.1007/s10815-017-1086-0</pub-id><pub-id pub-id-type="pmid">29143943</pub-id></citation></ref>
<ref id="B41">
<label>41.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Borges</surname> <given-names>E</given-names></name> <name><surname>Braga</surname> <given-names>DPAF</given-names></name> <name><surname>Zanetti</surname> <given-names>BF</given-names></name> <name><surname>Iaconelli</surname> <given-names>A</given-names></name> <name><surname>Setti</surname> <given-names>AS</given-names></name></person-group>. <article-title>Revisiting the impact of ejaculatory abstinence on semen quality and intracytoplasmic sperm injection outcomes</article-title>. <source>Andrology.</source> (<year>2019</year>) <volume>7</volume>:<fpage>213</fpage>&#x02013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1111/andr.12572</pub-id><pub-id pub-id-type="pmid">30570220</pub-id></citation></ref>
<ref id="B42">
<label>42.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Johnson</surname> <given-names>SL</given-names></name> <name><surname>Dunleavy</surname> <given-names>J</given-names></name> <name><surname>Gemmell</surname> <given-names>NJ</given-names></name> <name><surname>Nakagawa</surname> <given-names>S</given-names></name></person-group>. <article-title>Consistent age-dependent declines in human semen quality: a systematic review and meta-analysis</article-title>. <source>Ageing Res Rev.</source> (<year>2015</year>) <volume>19</volume>:<fpage>22</fpage>&#x02013;<lpage>33</lpage>. <pub-id pub-id-type="doi">10.1016/j.arr.2014.10.007</pub-id><pub-id pub-id-type="pmid">25462195</pub-id></citation></ref>
<ref id="B43">
<label>43.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Eskenazi</surname> <given-names>B</given-names></name> <name><surname>Wyrobek</surname> <given-names>AJ</given-names></name> <name><surname>Sloter</surname> <given-names>E</given-names></name> <name><surname>Kidd</surname> <given-names>SA</given-names></name> <name><surname>Moore</surname> <given-names>L</given-names></name> <name><surname>Young</surname> <given-names>S</given-names></name> <etal/></person-group>. <article-title>The association of age and semen quality in healthy men</article-title>. <source>Hum Reprod.</source> (<year>2003</year>) <volume>18</volume>:<fpage>447</fpage>&#x02013;<lpage>54</lpage>. <pub-id pub-id-type="doi">10.1093/humrep/deg107</pub-id><pub-id pub-id-type="pmid">12571189</pub-id></citation></ref>
<ref id="B44">
<label>44.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brahem</surname> <given-names>S</given-names></name> <name><surname>Mehdi</surname> <given-names>M</given-names></name> <name><surname>Elghezal</surname> <given-names>H</given-names></name> <name><surname>Saad</surname> <given-names>A</given-names></name></person-group>. <article-title>The effects of male aging on semen quality, sperm DNA fragmentation and chromosomal abnormalities in an infertile population</article-title>. <source>J Assist Reprod Genet.</source> (<year>2011</year>) <volume>28</volume>:<fpage>425</fpage>&#x02013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1007/s10815-011-9537-5</pub-id><pub-id pub-id-type="pmid">21287403</pub-id></citation></ref>
<ref id="B45">
<label>45.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kong</surname> <given-names>A</given-names></name> <name><surname>Frigge</surname> <given-names>ML</given-names></name> <name><surname>Masson</surname> <given-names>G</given-names></name> <name><surname>Besenbacher</surname> <given-names>S</given-names></name> <name><surname>Sulem</surname> <given-names>P</given-names></name> <name><surname>Magnusson</surname> <given-names>G</given-names></name> <etal/></person-group>. <article-title>Rate of de novo mutations and the importance of father&#x00027;s age to disease risk</article-title>. <source>Nature.</source> (<year>2012</year>) <volume>488</volume>:<fpage>471</fpage>&#x02013;<lpage>5</lpage>. <pub-id pub-id-type="doi">10.1038/nature11396</pub-id><pub-id pub-id-type="pmid">23312194</pub-id></citation></ref>
<ref id="B46">
<label>46.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lin</surname> <given-names>M-H</given-names></name> <name><surname>Kuo-Kuang Lee</surname> <given-names>R</given-names></name> <name><surname>Li</surname> <given-names>S-H</given-names></name> <name><surname>Lu</surname> <given-names>C-H</given-names></name> <name><surname>Sun</surname> <given-names>F-J</given-names></name> <name><surname>Hwu</surname> <given-names>Y-M</given-names></name></person-group>. <article-title>Sperm chromatin structure assay parameters are not related to fertilization rates, embryo quality, and pregnancy rates in <italic>in vitro</italic> fertilization and intracytoplasmic sperm injection, but might be related to spontaneous abortion rates</article-title>. <source>Fertil Steril.</source> (<year>2008</year>) <volume>90</volume>:<fpage>352</fpage>&#x02013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1016/j.fertnstert.2007.06.018</pub-id><pub-id pub-id-type="pmid">17904130</pub-id></citation></ref>
<ref id="B47">
<label>47.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>McQueen</surname> <given-names>DB</given-names></name> <name><surname>Zhang</surname> <given-names>J</given-names></name> <name><surname>Robins</surname> <given-names>JC</given-names></name></person-group>. <article-title>Sperm DNA fragmentation and recurrent pregnancy loss: a systematic review and meta-analysis</article-title>. <source>Fertility Sterility</source>. (<year>2019</year>) <volume>112</volume>:<fpage>54</fpage>&#x02013;<lpage>60</lpage>. <pub-id pub-id-type="doi">10.1016/j.fertnstert.2019.03.003</pub-id><pub-id pub-id-type="pmid">31909664</pub-id></citation></ref>
<ref id="B48">
<label>48.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gaskins</surname> <given-names>AJ</given-names></name> <name><surname>Mendiola</surname> <given-names>J</given-names></name> <name><surname>Afeiche</surname> <given-names>M</given-names></name> <name><surname>J&#x000F8;rgensen</surname> <given-names>N</given-names></name> <name><surname>Swan</surname> <given-names>SH</given-names></name> <name><surname>Chavarro</surname> <given-names>JE</given-names></name></person-group>. <article-title>Physical activity and television watching in relation to semen quality in young men</article-title>. <source>Br J Sports Med.</source> (<year>2015</year>) <volume>49</volume>:<fpage>265</fpage>&#x02013;<lpage>70</lpage>. <pub-id pub-id-type="doi">10.1136/bjsports-2012-091644</pub-id><pub-id pub-id-type="pmid">23380634</pub-id></citation></ref>
</ref-list> 
</back>
</article>