<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1413484</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2024.1413484</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>ScnML models single-cell transcriptome to predict spinal cord neuronal cell status</article-title>
<alt-title alt-title-type="left-running-head">Liu et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2024.1413484">10.3389/fgene.2024.1413484</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Liu</surname>
<given-names>Lijia</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>&#x2020;</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Huang</surname>
<given-names>Yuxuan</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2709591/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Zheng</surname>
<given-names>Yuan</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>&#x2020;</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liao</surname>
<given-names>Yihan</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Ma</surname>
<given-names>Siyuan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname>
<given-names>Qian</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2709122/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>School of Recreation and Community Sport</institution>, <institution>Capital University of Physical Education and Sports</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Neuroscience in the Behavioral Sciences</institution>, <institution>Duke University and Duke Kunshan University</institution>, Suzhou, <addr-line>Jiangsu</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Taizhou Hospital of Zhejiang Province</institution>, <institution>Wenzhou Medical University</institution>, <addr-line>Luqiao</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Department of Neurology</institution>, <institution>The First Hospital of Tsinghua University</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1539905/overview">Zhuang Xiong</ext-link>, Fuzhou University, China</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/625844/overview">Xiangzheng Fu</ext-link>, Hunan University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2633486/overview">Xu Chi</ext-link>, Chinese Academy of Sciences (CAS), China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Qian Wang, <email>wangqian32@hotmail.com</email>; Siyuan Ma, <email>masiyuan@cupes.edu.cn</email>
</corresp>
<fn fn-type="equal" id="fn001">
<label>
<sup>&#x2020;</sup>
</label>
<p>These authors have contributed equally to this work</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>04</day>
<month>06</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1413484</elocation-id>
<history>
<date date-type="received">
<day>07</day>
<month>04</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>20</day>
<month>05</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Liu, Huang, Zheng, Liao, Ma and Wang.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Liu, Huang, Zheng, Liao, Ma and Wang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Injuries to the spinal cord nervous system often result in permanent loss of sensory, motor, and autonomic functions. Accurately identifying the cellular state of spinal cord nerves is extremely important and could facilitate the development of new therapeutic and rehabilitative strategies. Existing experimental techniques for identifying the development of spinal cord nerves are both labor-intensive and costly. In this study, we developed a machine learning predictor, ScnML, for predicting subpopulations of spinal cord nerve cells as well as identifying marker genes. The prediction performance of ScnML was evaluated on the training dataset with an accuracy of 94.33%. Based on XGBoost, ScnML on the test dataset achieved 94.08% 94.24%, 94.26%, and 94.24% accuracies with precision, recall, and F1-measure scores, respectively. Importantly, ScnML identified new significant genes through model interpretation and biological landscape analysis. ScnML can be a powerful tool for predicting the status of spinal cord neuronal cells, revealing potential specific biomarkers quickly and efficiently, and providing crucial insights for precision medicine and rehabilitation recovery.</p>
</abstract>
<kwd-group>
<kwd>machine learning</kwd>
<kwd>spinal cord nervous</kwd>
<kwd>ScRNA-seq</kwd>
<kwd>marker genes</kwd>
<kwd>cell subpopulations</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Statistical Genetics and Methodology</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>The spinal cord nerves are the primary regulators of a wide range of motor behaviors in animals, which cover a range of fine motor actions from basic fight or flight responses to complex social interactions (<xref ref-type="bibr" rid="B15">Liau et al., 2023a</xref>). When the spinal nerves are abnormal, the patient quickly enters a phase known as &#x201c;spinal shock,&#x201d; which can lead to permanent loss of motor, sensory, and autonomic functions (<xref ref-type="bibr" rid="B13">Li et al., 2022</xref>). Spinal cord injury (SCI) is a traumatic neurological disorder, especially lower thoracic and cervical spine lesions causing paraplegia and quadriplegia (<xref ref-type="bibr" rid="B3">Alizadeh et al., 2019</xref>). A detailed understanding of spinal cord nerves provides important implications for the future development of more precise clinical treatments or guided exercise training to promote functional recovery after SCI, as well as for the conduct of pathophysiologic research (<xref ref-type="bibr" rid="B9">Fu et al., 2016</xref>; <xref ref-type="bibr" rid="B3">Alizadeh et al., 2019</xref>).</p>
<p>With the development of single-cell sequencing technique (<xref ref-type="bibr" rid="B27">Xiong et al., 2020</xref>; <xref ref-type="bibr" rid="B28">Xiong et al., 2022</xref>), we can explore the cellular composition of spinal nerves at high resolution. For example, <xref ref-type="bibr" rid="B16">Liau et al. (2023b)</xref> used scRNA sequencing to resolve the heterogeneity of mouse spinal motor neurons and discovered a diverse code of neuropeptide to characterize putative motor pool identities. Based on single-cell RNA sequencing (scRNA-seq) technique, <xref ref-type="bibr" rid="B25">Wang T. et al. (2023)</xref> resolved the cellular heterogeneity of orthopedic diseases, including spinal cord injury (SCI), related to their development, as well as their functions and potential molecular mechanisms. <xref ref-type="bibr" rid="B4">Cao et al. (2022)</xref> utilized single-cell RNA sequencing (scRNA-seq) to comprehensively depict the cellular diversity of the spinal cord, deeply reveal the dynamic changes of cells and molecules in the microenvironment, and elucidate the intercellular communication between the normal and injured states of the spinal cord, which provides a powerful tool for the study of the molecular mechanisms of traumatic spinal cord injury. <xref ref-type="bibr" rid="B7">Delile et al. (2019)</xref> used single-cell mRNA sequencing to resolve developmental maps of the cervical and thoracic regions of the neural tube in mice on embryonic days 9.5&#x2013;13.5, revealing mechanisms of neuronal specification and providing direct insights into spinal cord cell classification.</p>
<p>Despite the fact that previous research techniques are quite mature, there are time-consuming and laborious problems in mining marker genes and identifying cell subpopulations using manual methods. Therefore, there is an urgent need to develop a computational method to assist researchers in efficiently identifying cellular subpopulations and deeply exploring their potential marker genes.</p>
<p>To overcome these challenges, we introduced a computational framework, called ScnML, designed to identify biomarkers of cell subpopulations within the spinal cord neuronal and to predict cellular developmental stages. The framework is shown in <xref ref-type="fig" rid="F1">Figure 1</xref>. In order to obtain the optimal predictive model, we used a strategy that combines feature selection and incremental feature selection (IFS) (<xref ref-type="bibr" rid="B21">Wang et al., 2021a</xref>) in four basic classification methods: K-Nearest Neighbors (KNN), extreme Gradient Boosting (XGBoost) (<xref ref-type="bibr" rid="B5">Chen and Guestrin, 2016</xref>), Support Vector Machine (SVM) (<xref ref-type="bibr" rid="B6">Cortes and Vapnik, 1995</xref>; <xref ref-type="bibr" rid="B30">Zhang et al., 2024</xref>), and Random Forest (RF) (<xref ref-type="bibr" rid="B1">Al-Allak et al., 2013</xref>). Given the importance of interpretability and robustness, we chose the XGBoost algorithm to build the computational model. We validated the model using a test set and achieved an accuracy of 94.04%. By performing biological analysis of the optimal genes, we identified potential marker genes that may assist biologists in gaining a deeper insight into the diversity present within spinal cord neuronal.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>The workflow of constructing ScnML.</p>
</caption>
<graphic xlink:href="fgene-15-1413484-g001.tif"/>
</fig>
<sec id="s1-1">
<title>Identification of significant genes by ScnML</title>
<p>To identify significant genes associated with spinal cord neuronal cell subpopulations, we used three feature selection methods (Mutual Information Coefficient: MIC, Coefficient of Variation Squared: CV2, and Principal Component Analysis: PCA) to assess the significance of 27,998 genes and ranked them according to their contribution values. Genes with importance scores less than or equal to zero were excluded. Next, the machine learning models were combined with IFS to determine the optimal subset of genes (<xref ref-type="fig" rid="F2">Figures 2A&#x2013;C</xref>). Machine learning models (SVM, RFC, XGBoost, and KNN) were trained using single-cell gene expression matrices (Normalized of raw read count) as input features, based on five-fold cross-validation.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>The results of feature selection. <bold>(A&#x2013;C)</bold> Show the incremental feature selection (IFS) curves illustrating the prediction performance of the three feature selection methods (CV2, MIC and PCA) with four different classifiers for different gene subsets. <bold>(D)</bold> Comparative Venn diagram of the top 100 genes in MIC, CV2 and PCA.</p>
</caption>
<graphic xlink:href="fgene-15-1413484-g002.tif"/>
</fig>
<p>The results from the training dataset showed that MIC combination with XGBoost (ScnML), achieved the optimal prediction performance by using the first 210 genes, with an accuracy of 94.33% (<xref ref-type="table" rid="T1">Table 1</xref>). Based on the 210 best genes, ScnML also achieved the best performance on the test dataset, with accuracy, precision, recall, and F1_metrics of 94.08%, 94.24%, 94.26%, and 94.24%, respectively (<xref ref-type="table" rid="T2">Table 2</xref>). It is notable that the four machine learning models, when combined with PCA, also yielded superior predictive performance. To avoid feature selection methods having the same scoring preferences, we compared the top 100 genes scored by the three feature selection methods. As observed from <xref ref-type="fig" rid="F2">Figure 2D</xref>, there is almost no intersection among the top 100 genes selected by MIC, CV2, and PCA, demonstrating the effectiveness of each feature selection method.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Performance evaluation of different feature selection combined with machine learning schemes (Train dataset).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Method</th>
<th align="center">Feature selection</th>
<th align="center">No. of features</th>
<th align="center">Accuracy (%)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">KNN</td>
<td align="center">PCA</td>
<td align="center">360</td>
<td align="center">47.15</td>
</tr>
<tr>
<td align="left">XGBoost</td>
<td align="center">PCA</td>
<td align="center">10,000</td>
<td align="center">94.11</td>
</tr>
<tr>
<td align="left">SVM</td>
<td align="center">PCA</td>
<td align="center">14,000</td>
<td align="center">88.14</td>
</tr>
<tr>
<td align="left">RFC</td>
<td align="center">PCA</td>
<td align="center">4,800</td>
<td align="center">87.79</td>
</tr>
<tr>
<td align="left">KNN</td>
<td align="center">CV2</td>
<td align="center">760</td>
<td align="center">31.72</td>
</tr>
<tr>
<td align="left">XGBoost</td>
<td align="center">CV2</td>
<td align="center">10,000</td>
<td align="center">92.29</td>
</tr>
<tr>
<td align="left">SVM</td>
<td align="center">CV2</td>
<td align="center">10,000</td>
<td align="center">84.33</td>
</tr>
<tr>
<td align="left">RFC</td>
<td align="center">CV2</td>
<td align="center">18,000</td>
<td align="center">87.04</td>
</tr>
<tr>
<td align="left">KNN</td>
<td align="center">MIC</td>
<td align="center">60</td>
<td align="center">85.77</td>
</tr>
<tr>
<td align="left">XGBoost</td>
<td align="center">MIC</td>
<td align="center">210</td>
<td align="center">94.33</td>
</tr>
<tr>
<td align="left">SVM</td>
<td align="center">MIC</td>
<td align="center">660</td>
<td align="center">93.72</td>
</tr>
<tr>
<td align="left">RFC</td>
<td align="center">MIC</td>
<td align="center">4,800</td>
<td align="center">87.79</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Performance evaluation of different feature selection combined with machine learning schemes (Test dataset).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Method</th>
<th align="center">Feature selection</th>
<th align="center">No. of features</th>
<th align="center">Accuracy (%)</th>
<th align="center">Precision (%)</th>
<th align="center">Recall (%)</th>
<th align="center">F1-measure (%)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">KNN</td>
<td align="center">PCA</td>
<td align="center">360</td>
<td align="center">45.23</td>
<td align="center">45.64</td>
<td align="center">46.06</td>
<td align="center">45.47</td>
</tr>
<tr>
<td align="left">XGBoost</td>
<td align="center">PCA</td>
<td align="center">10,000</td>
<td align="center">92.89</td>
<td align="center">92.13</td>
<td align="center">92.04</td>
<td align="center">92.33</td>
</tr>
<tr>
<td align="left">SVM</td>
<td align="center">PCA</td>
<td align="center">14,000</td>
<td align="center">87.05</td>
<td align="center">87.50</td>
<td align="center">86.61</td>
<td align="center">86.92</td>
</tr>
<tr>
<td align="left">RFC</td>
<td align="center">PCA</td>
<td align="center">4,800</td>
<td align="center">87.23</td>
<td align="center">87.36</td>
<td align="center">86.97</td>
<td align="center">87.23</td>
</tr>
<tr>
<td align="left">KNN</td>
<td align="center">CV2</td>
<td align="center">760</td>
<td align="center">27.41</td>
<td align="center">27.23</td>
<td align="center">27.02</td>
<td align="center">26.51</td>
</tr>
<tr>
<td align="left">XGBoost</td>
<td align="center">CV2</td>
<td align="center">10,000</td>
<td align="center">91.83</td>
<td align="center">92.12</td>
<td align="center">92.04</td>
<td align="center">92.00</td>
</tr>
<tr>
<td align="left">SVM</td>
<td align="center">CV2</td>
<td align="center">10,000</td>
<td align="center">82.41</td>
<td align="center">84.35</td>
<td align="center">82.57</td>
<td align="center">82.90</td>
</tr>
<tr>
<td align="left">RFC</td>
<td align="center">CV2</td>
<td align="center">18,000</td>
<td align="center">86.33</td>
<td align="center">87.86</td>
<td align="center">86.68</td>
<td align="center">86.85</td>
</tr>
<tr>
<td align="left">KNN</td>
<td align="center">MIC</td>
<td align="center">60</td>
<td align="center">86.91</td>
<td align="center">88.64</td>
<td align="center">87.06</td>
<td align="center">87.47</td>
</tr>
<tr>
<td align="left">XGBoost</td>
<td align="center">MIC</td>
<td align="center">210</td>
<td align="center">94.08</td>
<td align="center">94.24</td>
<td align="center">94.26</td>
<td align="center">94.24</td>
</tr>
<tr>
<td align="left">SVM</td>
<td align="center">MIC</td>
<td align="center">660</td>
<td align="center">93.51</td>
<td align="center">93.80</td>
<td align="center">93.74</td>
<td align="center">93.74</td>
</tr>
<tr>
<td align="left">RFC</td>
<td align="center">MIC</td>
<td align="center">4,800</td>
<td align="center">87.13</td>
<td align="center">88.31</td>
<td align="center">87.46</td>
<td align="center">87.62</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s1-2">
<title>Performance of ScnML on the test dataset</title>
<p>To further validate the robustness of the model, receiver operating characteristic (ROC) curves and confusion matrices were used to evaluate the prediction performance of ScnML. We observe that the AUC of the ScnML model is 0.96 (<xref ref-type="fig" rid="F3">Figure 3A</xref>). The confusion matrix validates the predictive performance of the model for each type of spinal cord neural subpopulation, and the low misclassification rate demonstrates the robustness of the model (<xref ref-type="fig" rid="F3">Figure 3B</xref>). In addition, Uniform Manifold Approximation and Projection (UMAP) of 6,000 single cells revealed that the overall performance of the 210 marker genes was significantly better than that of all genes (<xref ref-type="fig" rid="F3">Figures 3C, D</xref>). In particular, the samples from different categories were almost blended together in the clustering process that exploited all genes (<xref ref-type="fig" rid="F3">Figure 3C</xref>). However, employing the 210 optimal genes generates a distinct distribution of cell subpopulations, demonstrating clear clustering findings (<xref ref-type="fig" rid="F3">Figure 3D</xref>). We also performed heat map clustering analysis on the ScnML gene set and obtained excellent clustering results, demonstrating the advantages of machine learning (<xref ref-type="sec" rid="s10">Supplementary Figure S1</xref>).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Predictive performance of ScnML. <bold>(A)</bold> The Receiver Operating Characteristic (ROC) curves for the ScnML model evaluated on the training dataset. <bold>(B)</bold> Confusion matrix for ScnML, used to assess the predictive performance of the model for each cell subpopulation classification. <bold>(C)</bold> UMAP shows clustering performance for six spinal cord nervous cell subpopulations at all gene set levels. <bold>(D)</bold> UMAP shows clustering performance for six spinal cord nervous cell subpopulations at ScnML gene set levels.</p>
</caption>
<graphic xlink:href="fgene-15-1413484-g003.tif"/>
</fig>
</sec>
<sec id="s1-3">
<title>Gene function analysis</title>
<p>We performed functional enrichment analysis of the ScnML gene set to explore biological processes related to sci pathophysiology and potential recovery mechanisms. The analysis revealed significant enrichment in genes associated with axon ensheathment, myelination, and the ensheathment of neurons, highlighting the pivotal role of myelin repair and axonal regeneration post-injury (<xref ref-type="bibr" rid="B8">Franklin and Ffrench-Constant, 2008</xref>; <xref ref-type="bibr" rid="B12">Lee et al., 2012</xref>) (<xref ref-type="sec" rid="s10">Supplementary Figure S2</xref>). Additionally, processes such as glial cell differentiation and gliogenesis were prominently featured, underscoring the importance of glial responses in scar formation and neural tissue remodeling (<xref ref-type="bibr" rid="B20">Sofroniew, 2009</xref>). Importantly, our findings also suggest that the regulation of cell-substrate adhesion and leukocyte migration, including myeloid cells, as key components in the inflammatory response and subsequent healing processes (<xref ref-type="sec" rid="s10">Supplementary Figures S3, S4</xref>). The modulation of cell adhesion dynamics is particularly critical, as it influences axonal growth and neural cell interaction with the extracellular matrix, which are essential for effective nerve repair (<xref ref-type="bibr" rid="B32">Zhu et al., 2015</xref>).</p>
</sec>
<sec id="s1-4">
<title>Expression analysis of the ScnML gene set</title>
<p>In addition, we explored the representation of the 210 marker genes in the biological landscape. We identified potential marker genes such as Atp1a2, which is highly expressed in astrocytes; C1qa and Ly86, which are specifically expressed in microglia; and Vtn, which characterizes a subpopulation of endothelial cells (<xref ref-type="fig" rid="F4">Figure 4A</xref>). These genes have been verifiably reported. Furthermore, the use of multiple genes to characterize cellular subpopulations improves accuracy. For instance, Meg3, Snhg11, and Malat1 ensure the identification of neuron subpopulations; Atp1a2, Cst3, and Dbi are highly expressed in astrocytes; and Cst3, C1qb, and Ctss exhibit high expression levels in microglia (<xref ref-type="fig" rid="F4">Figure 4B</xref>).</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Computational analysis of ScnML gene set. <bold>(A)</bold> UMAP shows reported marker genes for spinal cord neuronal cell subpopulations. <bold>(B)</bold> Violin plot shows potential marker genes for subpopulations of spinal cord nerve cells.</p>
</caption>
<graphic xlink:href="fgene-15-1413484-g004.tif"/>
</fig>
<p>We analyzed single-cell expression profiles containing all genes and separately, the 210 genes, as the basis for constructing a partition-based graph abstraction (PAGA) to describe the spinal cord neuronal cell bioscape. Both displayed the same topological structure, such as a tight association between microglia and astrocytes, indicating that ScnML screened for key molecular markers and removed redundant information (<xref ref-type="fig" rid="F5">Figures 5A, B</xref>). We utilized Scanpy to compare the expression levels of the top 20 genes in each cell subpopulation with their expression levels in the other five clusters. For example, the expression levels of Cst3, Dbi, and Malat1 in the astrocyte subpopulation were each higher than the combined totals from the remaining five cellular subpopulations. In the neuron cell subpopulation, Meg3, Snhg11, and Malat1 showed high levels of expression, suggesting their potential as marker genes (<xref ref-type="fig" rid="F5">Figure 5C</xref> and <xref ref-type="sec" rid="s10">Supplementary Table S1</xref>). These results indicate that ScnML possesses irreplaceable advantages in processing scRNA-seq data without relying on prior biological knowledge.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>
<bold>(A, B)</bold> Expression trajectory analysis of 210 marker genes (downward) and all genes (upward) of spinal cord nerve cell subpopulations colored by cell type using PAGA. The thicker the line, the closer the cell connection. <bold>(C)</bold> Comparison of marker genes selected by ScnML (210 marker genes) using split violin plots. The expression level of marker genes in specific cells is shown on the left (Blue), and the total expression level in the remaining five cell types is shown on the right (Orange).</p>
</caption>
<graphic xlink:href="fgene-15-1413484-g005.tif"/>
</fig>
</sec>
</sec>
<sec id="s2" sec-type="conclusion">
<title>Conclusion</title>
<p>Single-cell sequencing technology has been extensively used in both basic science research and the clinical setting, promoting the exploration of cellular differentiation and molecular heterogeneity. In this research, we designed and developed a machine learning-based predictive model, ScnML, for predicting spinal cord nerve cell subpopulations. ScnML addresses the computational inefficiencies and overfitting problems caused by high-dimensional feature spaces, thereby improving the model&#x2019;s prediction accuracy and robustness. Results from an independent dataset show that ScnML outperformed other methods, achieving an accuracy of 94.08% and a ROC of 0.96. More significantly, through the analysis of the ScnML model, we have successfully identified a set of key genes that can be utilized as reliable biomarkers for spinal cord neuronal cell subpopulations. This discovery provides an important molecular tool for deeper comprehension of spinal cord nerve cells&#x2019; intricacies, with far-reaching impacts on future neurobiology research.</p>
</sec>
<sec id="s3" sec-type="methods">
<title>Methods</title>
<sec id="s3-1">
<title>Dataset construction and preprocessing</title>
<p>The single-cell transcriptome dataset of crush-injured adult mouse spinal cord that support the findings of this study are available in figshare with the identifier (<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.6084/m9.figshare.17702045">https://doi.org/10.6084/m9.figshare.17702045</ext-link>) (<xref ref-type="bibr" rid="B13">Li et al., 2022</xref>). Based on the same processing method used by Liu et al. the raw sequence data were aligned to the mm10 (Ensembl 84) reference genome and cell numbers and unique molecular identifiers (UMIs) were estimated using CellRanger (3.1.0). The 6,000 single-cell transcriptome samples were used to classify six spinal cord injury cell subpopulations, including Endothelial, Astrocyte, Microglia, Neuron, Oligodendrocyte (ODC), and Pericyte cells. These single-cell transcriptome samples were randomly divided into a 4800-sample training set and a 2200-sample testing set with a ratio of 7:3. To construct a stringent and robust benchmark dataset, we applied a filtration criterion, excluding genes with unique feature counts of zero or less. This process yielded a final set of 27,998 genes, each expressed in at least one of the 6,000 cells surveyed.</p>
</sec>
<sec id="s3-2">
<title>Mutual information coefficient</title>
<p>The Mutual Information Coefficient (MIC) is predicated on the idea that the presence of a relationship between two variables allows for the construction of a grid that effectively partitions their scatter plot, encapsulating the essence of their interaction. To enable equitable comparisons across grids of different sizes, the mutual information values derived from these partitions are normalized. This normalization ensures a consistent framework for evaluating the strength and complexity of relationships between variables, irrespective of their scale or the intricacy of their association (<xref ref-type="bibr" rid="B31">Zhou et al., 2004</xref>; <xref ref-type="bibr" rid="B19">Reshef et al., 2011</xref>; <xref ref-type="bibr" rid="B2">Albanese et al., 2012</xref>).<disp-formula id="e1">
<mml:math id="m1">
<mml:mi>I</mml:mi>
<mml:mfenced separators="&#x7c;" open="(" close=")">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>;</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x003D;</mml:mo>
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mi>p</mml:mi>
<mml:mfenced separators="&#x7c;" open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mi mathvariant="italic">log</mml:mi>
<mml:mfrac>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mfenced separators="&#x7c;" open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mfenced separators="&#x7c;" open="(" close=")">
<mml:mi>x</mml:mi>
</mml:mfenced>
<mml:mi>p</mml:mi>
<mml:mfenced separators="&#x7c;" open="(" close=")">
<mml:mi>y</mml:mi>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x003D;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mfenced separators="&#x7c;" open="(" close=")">
<mml:mi>X</mml:mi>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mfenced separators="&#x7c;" open="(" close=")">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(1)</label>
</disp-formula>Where <inline-formula id="inf1">
<mml:math id="m2">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mfenced close=")" open="(" separators="&#x7c;">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>;</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> representing Mutual Information Entropy, is a measure of the information about variable <inline-formula id="inf2">
<mml:math id="m3">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mrow>
<mml:mfenced close=")" open="(" separators="&#x7c;">
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> contained in variable <inline-formula id="inf3">
<mml:math id="m4">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mrow>
<mml:mfenced close=")" open="(" separators="&#x7c;">
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</sec>
<sec id="s3-3">
<title>Biological analysis</title>
<p>We performed an extensive analysis to assess the represent capability of 210 marker genes in identifying cell subpopulations. The clustering analyses were performed using the Scanpy software (version 1.9.1), and default parameters were used for all analyses (<xref ref-type="bibr" rid="B26">Wolf et al., 2018</xref>). Partition-based graph abstraction (PAGA) was also implemented via Scanpy, while uniform manifold approximation and projection (UMAP) visualizations were generated using the umap-learn Python package (version 0.3.9), with parameters set to default values. Furthermore, functional enrichment analysis was executed employing the enrichGO function from the clusterProfiler package (version 4.6.2).</p>
</sec>
<sec id="s3-4">
<title>eXtreme Gradient Boosting</title>
<p>eXtreme Gradient Boosting (XGBoost) is a highly sophisticated and efficient machine learning algorithm that has gained widespread recognition for its performance in various predictive modeling competitions (<xref ref-type="bibr" rid="B5">Chen and Guestrin, 2016</xref>; <xref ref-type="bibr" rid="B24">Wang et al., 2023b</xref>). XGBoost has gained prominence for its efficiency and effectiveness in various predictive modeling competitions. It operates by constructing a series of decision trees in a sequential manner, where each subsequent tree aims to correct the errors of its predecessors. This approach enables the model to learn complex patterns in the data, enhancing its predictive accuracy. One of the key strengths of XGBoost is its ability to handle large datasets with speed and precision, making it an ideal choice for our study. In addition, compared to models such as KNN and SVM, XGBoost provides a direct way to evaluate the importance of each input variable.</p>
</sec>
<sec id="s3-5">
<title>Model evaluation</title>
<p>The four classic metrics were used to quantify the performance of model predictions, including Accuracy, Recall, Precision, and F1_measure, defined as (<xref ref-type="bibr" rid="B10">Fu et al., 2019</xref>; <xref ref-type="bibr" rid="B23">Wang et al., 2021b</xref>; <xref ref-type="bibr" rid="B11">Joshi et al., 2021</xref>; <xref ref-type="bibr" rid="B14">Liang et al., 2021</xref>; <xref ref-type="bibr" rid="B22">Wang et al., 2023c</xref>; <xref ref-type="bibr" rid="B17">Liu et al., 2023</xref>; <xref ref-type="bibr" rid="B18">Qian et al., 2023</xref>):<disp-formula id="e2">
<mml:math id="m5">
<mml:mrow>
<mml:mtext>Accuracy</mml:mtext>
<mml:mo>&#x003D;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
<disp-formula id="e3">
<mml:math id="m6">
<mml:mrow>
<mml:mtext>Recall</mml:mtext>
<mml:mo>&#x003D;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
<disp-formula id="e4">
<mml:math id="m7">
<mml:mrow>
<mml:mtext>Precision</mml:mtext>
<mml:mo>&#x003D;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
<disp-formula id="e5">
<mml:math id="m8">
<mml:mrow>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mn>1</mml:mn>
<mml:mtext>&#x2009;measure</mml:mtext>
<mml:mo>&#x003D;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo>&#x2a;</mml:mo>
<mml:mrow>
<mml:mfenced close=")" open="(" separators="&#x7c;">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>Where <inline-formula id="inf4">
<mml:math id="m9">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mtext>&#x2009;and&#x2009;</mml:mtext>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represent the numbers of true positives, true negatives, false positives and false negatives, respectively. In addition, ROC was used to evaluate the performance of the ScnML (<xref ref-type="bibr" rid="B29">Zeng et al., 2016</xref>; <xref ref-type="bibr" rid="B33">Zulfiqar et al., 2024</xref>).</p>
</sec>
</sec>
</body>
<back>
<sec id="s4" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s10">Supplementary Material</xref>, further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec id="s5">
<title>Ethics statement</title>
<p>Ethical approval was not required for the study involving animals in accordance with the local legislation and institutional requirements because All data used in this study are from public databases.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>LL: Writing&#x2013;original draft. YH: Writing&#x2013;original draft, Data curation, Formal Analysis. YZ: Formal Analysis, Writing&#x2013;original draft, Validation. YL: Writing&#x2013;original draft, Data curation. SM: Writing&#x2013;original draft, Project administration. QW: Project administration, Writing&#x2013;original draft.</p>
</sec>
<sec id="s7" sec-type="funding-information">
<title>Funding</title>
<p>The authors declare that no financial support was received for the research, authorship, and/or publication of this article.</p>
</sec>
<sec id="s8" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s9" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2024.1413484/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2024.1413484/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Table5.DOCX" id="SM1" mimetype="application/DOCX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table3.DOCX" id="SM2" mimetype="application/DOCX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Al-Allak</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bertelli</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Lewis</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Random forests: the new generation of machine learning algorithms to predict survival in breast cancer</article-title>. <source>Brit J. Surg.</source> <volume>100</volume>, <fpage>47</fpage>. <pub-id pub-id-type="doi">10.1016/j.ijsu.2013.06.112</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Albanese</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Filosi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Visintainer</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Riccadonna</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Jurman</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Furlanello</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Minerva and minepy: a C engine for the MINE suite and its R, Python and MATLAB wrappers</article-title>. <source>Bioinformatics</source> <volume>29</volume> (<issue>3</issue>), <fpage>407</fpage>&#x2013;<lpage>408</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bts707</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alizadeh</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Dyck</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Karimi-Abdolrezaee</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Traumatic spinal cord injury: an overview of pathophysiology, models and acute injury mechanisms</article-title>. <source>Front. Neurol.</source> <volume>10</volume>, <fpage>282</fpage>. <pub-id pub-id-type="doi">10.3389/fneur.2019.00282</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Single-cell RNA sequencing for traumatic spinal cord injury</article-title>. <source>FASEB J.</source> <volume>36</volume> (<issue>12</issue>), <fpage>e22656</fpage>. <pub-id pub-id-type="doi">10.1096/fj.202200943R</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>T. Q.</given-names>
</name>
<name>
<surname>Guestrin</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>XGBoost: a scalable tree boosting system</article-title>,&#x201d; in <source>Kdd&#x27;16: proceedings of the 22nd acm sigkdd international conference on knowledge discovery and data mining</source>, <fpage>785</fpage>&#x2013;<lpage>794</lpage>. <pub-id pub-id-type="doi">10.1145/2939672.2939785</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cortes</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Vapnik</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Support-vector networks</article-title>. <source>Mach. Learn.</source> <volume>20</volume> (<issue>3</issue>), <fpage>273</fpage>&#x2013;<lpage>297</lpage>. <pub-id pub-id-type="doi">10.1007/bf00994018</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Delile</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Rayon</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Melchionda</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Edwards</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Briscoe</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Sagner</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Single cell transcriptomics reveals spatial and temporal dynamics of gene expression in the developing mouse spinal cord</article-title>. <source>Development</source> <volume>146</volume> (<issue>12</issue>), <fpage>dev173807</fpage>. <pub-id pub-id-type="doi">10.1242/dev.173807</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Franklin</surname>
<given-names>R. J.</given-names>
</name>
<name>
<surname>Ffrench-Constant</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Remyelination in the CNS: from biology to therapy</article-title>. <source>Nat. Rev. Neurosci.</source> <volume>9</volume> (<issue>11</issue>), <fpage>839</fpage>&#x2013;<lpage>855</lpage>. <pub-id pub-id-type="doi">10.1038/nrn2480</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Exercise training promotes functional recovery after spinal cord injury</article-title>. <source>Neural Plast.</source> <volume>2016</volume>, <fpage>4039580</fpage>. <pub-id pub-id-type="doi">10.1155/2016/4039580</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liao</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Improved pre-miRNAs identification through mutual information of pre-miRNA sequences and structures</article-title>. <source>Front. Genet.</source> <volume>10</volume>, <fpage>119</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2019.00119</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Joshi</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Masilamani</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Ramesh</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>An ensembled SVM based approach for predicting adverse drug reactions</article-title>. <source>Curr. Bioinforma.</source> <volume>16</volume> (<issue>3</issue>), <fpage>422</fpage>&#x2013;<lpage>432</lpage>. <pub-id pub-id-type="doi">10.2174/1574893615999200707141420</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Morrison</surname>
<given-names>B. M.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lengacher</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Farah</surname>
<given-names>M. H.</given-names>
</name>
<name>
<surname>Hoffman</surname>
<given-names>P. N.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Oligodendroglia metabolically support axons and contribute to neurodegeneration</article-title>. <source>Nature</source> <volume>487</volume> (<issue>7408</issue>), <fpage>443</fpage>&#x2013;<lpage>448</lpage>. <pub-id pub-id-type="doi">10.1038/nature11314</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Shao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Temporal and spatial cellular and molecular pathological alterations with single-cell resolution in the adult spinal cord after injury</article-title>. <source>Signal Transduct. Target Ther.</source> <volume>7</volume> (<issue>1</issue>), <fpage>65</fpage>. <pub-id pub-id-type="doi">10.1038/s41392-022-00885-4</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liang</surname>
<given-names>P. F.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Long</surname>
<given-names>C. S.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>W. R. T.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zuo</surname>
<given-names>Y. C.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>HelPredictor models single-cell transcriptome to predict human embryo lineage allocation</article-title>. <source>Brief. Bioinform</source> <volume>22</volume> (<issue>6</issue>), <fpage>bbab196</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbab196</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liau</surname>
<given-names>E. S.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y. C.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>W. S.</given-names>
</name>
<name>
<surname>Calon</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Nedelec</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2023a</year>). <article-title>Single-cell transcriptomic analysis reveals diversity within mammalian spinal motor neurons</article-title>. <source>Nat. Commun.</source> <volume>14</volume> (<issue>1</issue>), <fpage>46</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-022-35574-x</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liau</surname>
<given-names>E. S.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>S. Q.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y. C.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>W. S.</given-names>
</name>
<name>
<surname>Calon</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Nedelec</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2023b</year>). <article-title>Single-cell transcriptomic analysis reveals diversity within mammalian spinal motor neurons</article-title>. <source>Nat. Commun.</source> <volume>14</volume> (<issue>1</issue>), <fpage>46</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-022-35574-x</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>M. Z.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xi</surname>
<given-names>QLMG</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>Y. C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H. C.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>P. F.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>A computational framework of routine test data for the cost-effective chronic disease prediction</article-title>. <source>Brief. Bioinform</source> <volume>24</volume> (<issue>2</issue>), <fpage>bbad054</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbad054</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qian</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Multi-view kernel sparse representation for identification of membrane protein types</article-title>. <source>Ieee-Acm Trans. Comput. Biol. Bioinforma.</source> <volume>20</volume> (<issue>2</issue>), <fpage>1234</fpage>&#x2013;<lpage>1245</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2022.3191325</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reshef</surname>
<given-names>D. N.</given-names>
</name>
<name>
<surname>Reshef</surname>
<given-names>Y. A.</given-names>
</name>
<name>
<surname>Finucane</surname>
<given-names>H. K.</given-names>
</name>
<name>
<surname>Grossman</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Mcvean</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Turnbaugh</surname>
<given-names>P. J.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>Detecting novel associations in large data sets</article-title>. <source>Science</source> <volume>334</volume>, <fpage>1518</fpage>&#x2013;<lpage>1524</lpage>. <pub-id pub-id-type="doi">10.1126/science.1205438</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sofroniew</surname>
<given-names>M. V.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Molecular dissection of reactive astrogliosis and glial scar formation</article-title>. <source>Trends Neurosci.</source> <volume>32</volume> (<issue>12</issue>), <fpage>638</fpage>&#x2013;<lpage>647</lpage>. <pub-id pub-id-type="doi">10.1016/j.tins.2009.08.002</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Long</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zuo</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2021a</year>). <article-title>eHSCPr discriminating the cell identity involved in endothelial to hematopoietic transition</article-title>. <source>Bioinformatics</source> <volume>37</volume> (<issue>15</issue>), <fpage>2157</fpage>&#x2013;<lpage>2164</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btab071</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y. N.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Hong</surname>
<given-names>J. P.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>J. R.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y. Q.</given-names>
</name>
<etal/>
</person-group> (<year>2023c</year>). <article-title>NRTPredictor: identifying rice root cell state in single-cell RNA-seq via ensemble learning</article-title>. <source>Plant Methods</source> <volume>19</volume> (<issue>1</issue>), <fpage>119</fpage>. <pub-id pub-id-type="doi">10.1186/s13007-023-01092-0</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Xi</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Hong</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zuo</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2021b</year>). <article-title>IHEC_RAAC: a online platform for identifying human enzyme classes via reduced amino acid cluster strategy</article-title>. <source>Amino Acids</source> <volume>53</volume> (<issue>2</issue>), <fpage>239</fpage>&#x2013;<lpage>251</lpage>. <pub-id pub-id-type="doi">10.1007/s00726-021-02941-9</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z. Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H. C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J. Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H. S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>M. Z.</given-names>
</name>
<etal/>
</person-group> (<year>2023b</year>). <article-title>A cost-effective machine learning-based method for preeclampsia risk assessment and driver genes discovery</article-title>. <source>Cell Biosci.</source> <volume>13</volume> (<issue>1</issue>), <fpage>41</fpage>. <pub-id pub-id-type="doi">10.1186/s13578-023-00991-y</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Long</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2023a</year>). <article-title>Single-cell RNA sequencing in orthopedic research</article-title>. <source>Bone Res.</source> <volume>11</volume> (<issue>1</issue>), <fpage>10</fpage>. <pub-id pub-id-type="doi">10.1038/s41413-023-00245-0</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wolf</surname>
<given-names>F. A.</given-names>
</name>
<name>
<surname>Angerer</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Theis</surname>
<given-names>F. J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>SCANPY: large-scale single-cell gene expression data analysis</article-title>. <source>Genome Biol.</source> <volume>19</volume> (<issue>1</issue>), <fpage>15</fpage>. <pub-id pub-id-type="doi">10.1186/s13059-017-1382-0</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>EWAS Data Hub: a resource of DNA methylation array data and metadata</article-title>. <source>Nucleic Acids Res.</source> <volume>48</volume> (<issue>D1</issue>), <fpage>D890</fpage>&#x2013;<lpage>D895</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkz840</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>EWAS Open Platform: integrated data, knowledge and toolkit for epigenome-wide association study</article-title>. <source>Nucleic Acids Res.</source> <volume>50</volume> (<issue>D1</issue>), <fpage>D1004</fpage>&#x2013;<lpage>D1009</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkab972</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zeng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Integrative approaches for predicting microRNA function and prioritizing disease-related microRNA using biological interaction networks</article-title>. <source>Brief. Bioinform</source> <volume>17</volume> (<issue>2</issue>), <fpage>193</fpage>&#x2013;<lpage>203</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbv033</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Noah</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Mcpartland</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>Hirsch</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Support vector machine prediction of individual Autism Diagnostic Observation Schedule (ADOS) scores based on neural responses during live eye-to-eye contact</article-title>. <source>Sci. Rep-Uk</source> <volume>14</volume> (<issue>1</issue>), <fpage>3232</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-024-53942-z</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Dougherty</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Russ</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Suh</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Gene clustering based on clusterwide mutual information</article-title>. <source>J. Comput. Biol.</source> <volume>11</volume> (<issue>1</issue>), <fpage>147</fpage>&#x2013;<lpage>161</lpage>. <pub-id pub-id-type="doi">10.1089/106652704773416939</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Soderblom</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Trojanowsky</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>D. H.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>J. K.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Fibronectin matrix assembly after spinal cord injury</article-title>. <source>J. Neurotrauma</source> <volume>32</volume> (<issue>15</issue>), <fpage>1158</fpage>&#x2013;<lpage>1167</lpage>. <pub-id pub-id-type="doi">10.1089/neu.2014.3703</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zulfiqar</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Ahmad</surname>
<given-names>R. M.</given-names>
</name>
<name>
<surname>Ahmed</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Deep-STP: a deep learning-based approach to predict snake toxin proteins by using word embeddings</article-title>. <source>Front. Med.</source> <volume>10</volume>, <fpage>1291352</fpage>. <pub-id pub-id-type="doi">10.3389/fmed.2023.1291352</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>