<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.3 20070202//EN" "archivearticle.dtd">
<article article-type="methods-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Cell Dev. Biol.</journal-id>
<journal-title>Frontiers in Cell and Developmental Biology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Cell Dev. Biol.</abbrev-journal-title>
<issn pub-type="epub">2296-634X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1149535</article-id>
<article-id pub-id-type="doi">10.3389/fcell.2023.1149535</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Cell and Developmental Biology</subject>
<subj-group>
<subject>Methods</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>EdeepSADPr: an extensive deep-learning architecture for prediction of the <italic>in situ</italic> crosstalks of serine phosphorylation and ADP-ribosylation</article-title>
<alt-title alt-title-type="left-running-head">Jiang et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fcell.2023.1149535">10.3389/fcell.2023.1149535</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Jiang</surname>
<given-names>Haoqiang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2232449/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Shang</surname>
<given-names>Shipeng</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1887884/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sha</surname>
<given-names>Yutong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Lin</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2181553/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>He</surname>
<given-names>Ningning</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/826502/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Li</surname>
<given-names>Lei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1021282/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>College of Basic Medicine</institution>, <institution>Qingdao University</institution>, <addr-line>Qingdao</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Sino Genomics Technology Co.</institution>, <institution>Ltd.</institution>, <addr-line>Qingdao</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>College of Computer Science and Technology</institution>, <institution>Qingdao University</institution>, <addr-line>Qingdao</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Faculty of Biomedical and Rehabilitation Engineering</institution>, <institution>University of Health and Rehabilitation Sciences</institution>, <addr-line>Qingdao</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/102594/overview">Utpal Ghosh</ext-link>, University of Kalyani, India</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/643203/overview">Jia Meng</ext-link>, Xi&#x2019;an Jiaotong-Liverpool University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/730660/overview">Xiaojian Shao</ext-link>, National Research Council Canada (NRC), Canada</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Lei Li, <email>lileime@hotmail.com</email>
</corresp>
<fn fn-type="equal" id="fn1">
<label>
<sup>&#x2020;</sup>
</label>
<p>These authors have contributed equally to this work and share first authorship</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>28</day>
<month>04</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>11</volume>
<elocation-id>1149535</elocation-id>
<history>
<date date-type="received">
<day>22</day>
<month>01</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>17</day>
<month>04</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Jiang, Shang, Sha, Zhang, He and Li.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Jiang, Shang, Sha, Zhang, He and Li</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>The <italic>in situ</italic> post-translational modification (PTM) crosstalk refers to the interactions between different types of PTMs that occur on the same residue site of a protein. The crosstalk sites generally have different characteristics from those with the single PTM type. Studies targeting the latter&#x2019;s features have been widely conducted, while studies on the former&#x2019;s characteristics are rare. For example, the characteristics of serine phosphorylation (pS) and serine ADP-ribosylation (SADPr) have been investigated, whereas those of their <italic>in situ</italic> crosstalks (pSADPr) are unknown. In this study, we collected 3,250 human pSADPr, 7,520 SADPr, 151,227&#xa0;pS and 80,096 unmodified serine sites and explored the features of the pSADPr sites. We found that the characteristics of pSADPr sites are more similar to those of SADPr compared to pS or unmodified serine sites. Moreover, the crosstalk sites are likely to be phosphorylated by some kinase families (e.g., AGC, CAMK, STE and TKL) rather than others (e.g., CK1 and CMGC). Additionally, we constructed three classifiers to predict pSADPr sites from the pS dataset, the SADPr dataset and the protein sequences separately. We built and evaluated five deep-learning classifiers in ten-fold cross-validation and independent test datasets. We also used the classifiers as base classifiers to develop a few stacking-based ensemble classifiers to improve performance. The best classifiers had the AUC values of 0.700, 0.914 and 0.954 for recognizing pSADPr sites from the SADPr, pS and unmodified serine sites, respectively. The lowest prediction accuracy was achieved by separating pSADPr and SADPr sites, which is consistent with the observation that pSADPr&#x2019;s characteristics are more similar to those of SADPr than the rest. Finally, we developed an online tool for extensively predicting human pSADPr sites based on the CNN<sub>OH</sub> classifier, dubbed EdeepSADPr. It is freely available through <ext-link ext-link-type="uri" xlink:href="http://edeepsadpr.bioinfogo.org/">http://edeepsadpr.bioinfogo.org/</ext-link>. We expect our investigation will promote a comprehensive understanding of crosstalks.</p>
</abstract>
<kwd-group>
<kwd>proteomics</kwd>
<kwd>machine learning and AI</kwd>
<kwd>post-translational modification (PTM)</kwd>
<kwd>phosphorylation</kwd>
<kwd>prediction model</kwd>
<kwd>ADP-ribosylation</kwd>
<kwd>bioinforamtics</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>The <italic>in situ</italic> post-translational modification (PTM) crosstalk refers to the interactions between different types of PTMs that occur on the same residue site of a protein. Different PTM types on the same site have different effects on the activity, stability, localization, and interactions of the modified protein (<xref ref-type="bibr" rid="B29">Yang and Gregoire, 2006</xref>; <xref ref-type="bibr" rid="B10">Hunter, 2007</xref>; <xref ref-type="bibr" rid="B20">Swaney et al., 2013</xref>; <xref ref-type="bibr" rid="B28">Xu et al., 2018</xref>). The crosstalk sites generally have different characteristics from those with the single PTM type; Nevertheless, the former is rarely investigated compared to the latter. This study focused on the crosstalk between serine phosphorylation (pS) and ADP-ribosylation (SADPr). Serine phosphorylation, catalyzed by hundreds of kinases, plays a regulatory role in the cell cycle, growth, apoptosis, and signal transduction (<xref ref-type="bibr" rid="B33">Zolnierowicz and Bollen, 2000</xref>). Serine ADP-ribosylation, catalyzed by over twenty ADP-ribosyltransferases (<xref ref-type="bibr" rid="B14">Luscher et al., 2018</xref>), regulates many cellular processes, including chromatin organization, epigenetic transcription regulation, cell differentiation and cytoplasm stress response (<xref ref-type="bibr" rid="B17">Nowak et al., 2020</xref>; <xref ref-type="bibr" rid="B2">Brustel et al., 2022</xref>). Both serine modifications can co-occur on the same residue on a competitive basis as the <italic>in situ</italic> PTM crosstalk (dubbed pSADPr). This crosstalk represents a significantly high degree of overlap, similar to the site-specific crosstalk between lysine acetylation and ubiquitylation (<xref ref-type="bibr" rid="B11">Larsen et al., 2018</xref>). Identification of PTM crosstalk sites has emerged to be an intriguing topic and attracted much attention, relevant works of which have been ongoing before our study (<xref ref-type="bibr" rid="B18">Peng et al., 2014</xref>; <xref ref-type="bibr" rid="B22">Venne et al., 2014</xref>; <xref ref-type="bibr" rid="B27">Xu et al., 2021</xref>). For example, the classifier mUSP was developed to predict <italic>in situ</italic> crosstalk sites of ubiquitylation and SUMOylation (<xref ref-type="bibr" rid="B27">Xu et al., 2021</xref>). Nevertheless, the <italic>in situ</italic> crosstalk of serine phosphorylation and ADP-ribosylation has not been investigated. Although a few <italic>in silico</italic> classifiers have been developed for predicting pS and SADPr sites (<xref ref-type="bibr" rid="B13">Luo et al., 2019</xref>; <xref ref-type="bibr" rid="B19">Sha et al., 2021</xref>), the classifier for predicting pSADPr sites is unavailable.</p>
<p>
<xref ref-type="fig" rid="F1">Figure 1</xref> showed the overview map of this study. This study collected 3,250 human pSADPr, 151,227&#xa0;pS, 7,520 SADPr and 80,096 unmodified serine sites. Accordingly, we investigated the characteristics of pSADPr and constructed classifiers to predict pSADPr sites. We found that pSADPr&#x2019;s characteristics are more similar to those of SADPr than pS and unmodified serine sites. We also found that pSADPr sites were preferred to be phosphorylated by four subfamilies of serine kinases (i.e., AGC, CAMK, STE and TKL). Moreover, we built and evaluated five deep-learning classifiers in ten-fold cross-validation and independent test datasets. We also developed a few advanced stacking-based ensemble classifiers. The best classifiers had the AUC values of 0.700, 0.914 and 0.954 for recognizing pSADPr sites from the SADPr, pS and unmodified serine sites. Finally, we developed an online tool for extensively predicting human pSADPr sites, dubbed EdeepSADPr. It is freely available through <ext-link ext-link-type="uri" xlink:href="http://edeepsadpr.bioinfogo.org/">http://edeepsadpr.bioinfogo.org/</ext-link>. We anticipate that accurate prediction by EdeepSADPr will facilitate the discovery of new pSADPr sites and promote the understanding of their functional characteristics.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Flowchart of the model construction.</p>
</caption>
<graphic xlink:href="fcell-11-1149535-g001.tif"/>
</fig>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Materials and methods</title>
<sec id="s2-1">
<title>2.1 Data collection and preprocessing</title>
<p>
<xref ref-type="fig" rid="F2">Figure 2</xref> shows the procedure of dataset construction and preprocessing. 7,520 human SADPr sites with high confidence (i.e., ADPr peptides with Andromeda scores &#x3e;40 and localization probability &#x3e;0.75) were collected from the literature (<xref ref-type="bibr" rid="B11">Larsen et al., 2018</xref>; <xref ref-type="bibr" rid="B7">Hendriks et al., 2019</xref>; <xref ref-type="bibr" rid="B3">Buch-Larsen et al., 2020</xref>; <xref ref-type="bibr" rid="B17">Nowak et al., 2020</xref>) (<xref ref-type="fig" rid="F2">Figure 2A</xref>). 151,227 human pS sites were obtained from the database PhosphositePlus (<xref ref-type="bibr" rid="B8">Hornbeck et al., 2012</xref>) and the literature (<xref ref-type="bibr" rid="B13">Luo et al., 2019</xref>) (<xref ref-type="fig" rid="F2">Figure 2A</xref>). We compared both datasets and found 3,250 pSADPr peptides, 147,977&#xa0;pS peptides, and 4,270 SADPr peptides. We also collected 80,096 unmodified serine (UM) sites after removing modified serine sites (i.e., pSADPr, SADPr and pS) from the reported dataset (<xref ref-type="bibr" rid="B13">Luo et al., 2019</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Schematic diagram of data collection and preprocessing for human pSADPr datasets. Construction of the pSADPr, pS and SADPr datasets <bold>(A)</bold>. The construction and preprocessing of the pSADPr-SADPr dataset <bold>(B)</bold>, the pSADPr-pS dataset <bold>(C)</bold> and the pSADPr-UM dataset <bold>(D)</bold>. UM stands for unmodified serine.</p>
</caption>
<graphic xlink:href="fcell-11-1149535-g002.tif"/>
</fig>
<p>Each serine site of the above datasets was represented by a 41-residue-long sequence segment with the serine at the center (<xref ref-type="bibr" rid="B19">Sha et al., 2021</xref>). CD-HIT (<xref ref-type="bibr" rid="B12">Li and GodzikCd-hit, 2006</xref>; <xref ref-type="bibr" rid="B9">Huang et al., 2010</xref>) was applied to eliminate the homologous peptides by setting the threshold to 60% sequence identity, which is valuable for avoiding overestimation. Specifically, we combined the pSADPr peptides with SADPr peptides, pS peptides, and UM peptides, respectively, and clustered them using CD-HIT. Accordingly, we obtained 4,959 clusters, 30,106 clusters and 66,526 clusters. We selected one sequence randomly from each cluster according to the criterion: One pSADPr peptide was selected if it was included in the cluster; otherwise, one of the other peptides was selected. After that, 2,378 pSADPr, 2,581 SADPr, 27,728&#xa0;pS and 64,148 UM peptides were collected (<xref ref-type="fig" rid="F2">Figures 2B&#x2013;D</xref>). Furthermore, each of the three datasets was divided into 11 groups, where ten groups were used as a cross-validation dataset, and the rest group was considered an independent test dataset (<xref ref-type="fig" rid="F2">Figures 2B&#x2013;D</xref>). It should be noted that if the central serine residue is located near the N or C terminus of the protein sequence, the complement symbol &#x201c;_&#x201d; was added to the input sequences at the affected terminus to ensure the length was maintained. All these data are available at <ext-link ext-link-type="uri" xlink:href="http://edeepsadpr.bioinfogo.org/">http://edeepsadpr.bioinfogo.org/</ext-link>.</p>
</sec>
<sec id="s2-2">
<title>2.2 Feature encoding schemes</title>
<p>We selected five encoding features representing the input peptides for the model construction. They included the One-Hot encoding (OH) (<xref ref-type="bibr" rid="B23">Wang D. et al., 2020</xref>), the Enhanced Amino Acid Composition Encoding (EAAC) (<xref ref-type="bibr" rid="B5">Chen et al., 2018</xref>), the Enhanced Grouped Amino Acids Content encoding (EGAAC) (<xref ref-type="bibr" rid="B5">Chen et al., 2018</xref>), the ZSCALE Encoding (ZSCALE) and the Word Embedding (WE).</p>
<sec id="s2-2-1">
<title>2.2.1 One-hot (OH) encoding</title>
<p>In the One-hot coding, the 20 amino acids and complement symbol &#x201c;_&#x201d; are encoded into a 21-dimensional binary vector. In the vector corresponding to an amino acid, the element related to the amino acid is marked as 1 and others are marked as 0. For example, &#x201c;A&#x201d; is represented by &#x201c;100000000000000000000&#x201d; and &#x201c;V&#x201d; is represented by &#x201c;0100000000000000000000.&#x201d;</p>
</sec>
<sec id="s2-2-2">
<title>2.2.2 ZSCALE encoding</title>
<p>In ZSCALE encoding, every amino acid type is characterized by five physicochemical descriptor variables (<xref ref-type="bibr" rid="B4">Chen et al., 2012</xref>; <xref ref-type="bibr" rid="B31">Zhang et al., 2020</xref>). Therefore, each input sequence is represented as a vector of 205 (&#x3d;41 &#xd7; 5) dimensions. The filling character &#x201c;_&#x201d; is encoded as a 5-dimensional zero vector.</p>
</sec>
<sec id="s2-2-3">
<title>2.2.3 Word embedding (WE) encoding</title>
<p>Word embedding (<xref ref-type="bibr" rid="B6">Ge and Moh, 2018</xref>) relies on the numerical encoding approach (<xref ref-type="bibr" rid="B15">Lyu et al., 2020</xref>), which maps each type of amino acid residue to an integer. After the NUM encoding, each integer is mapped to a predefined five-dimension word vector. Therefore, each sequence is encoded as a vector of 205 (&#x3d; 41 &#xd7; 5) items.</p>
</sec>
<sec id="s2-2-4">
<title>2.2.4 Enhanced amino acid composition (EAAC) encoding</title>
<p>In EAAC encoding, the frequency of each amino acid from the N-terminal to the C-terminal within a fixed sliding window size (the default length being 5) is calculated (<xref ref-type="bibr" rid="B15">Lyu et al., 2020</xref>). Therefore, each peptide sequence is encoded as a vector of 740 &#x3d; ((41&#x2013;5 &#x2b; 1) &#xd7; 20) items.</p>
</sec>
<sec id="s2-2-5">
<title>2.2.5 Enhanced grouped amino acids content (EGAAC) encoding</title>
<p>The EGAAC encoding is developed based on grouped amino acid content (GAAC) characteristics (<xref ref-type="bibr" rid="B25">Wei et al., 2021</xref>). In the GAAC encoding, the 20 amino acid types are divided into five groups according to their physical and chemical properties (G1: GAVLMI, FYW, G3: KRH, G4: DE, and G5: STCPNQ). In the EGAAC encoding, the GAAC value is calculated from N-terminal to C-terminal within a fixed sliding window (the default length being 5).</p>
</sec>
</sec>
<sec id="s2-3">
<title>2.3 The architecture of deep-learning classifiers</title>
<p>We constructed five classifiers based on Convolutional Neural Network (CNN). They included the model combined with the One-Hot Encoding (CNN<sub>OH</sub>), the model with the Word Embedding Encoding (CNN<sub>WE</sub>), the model with the ZSCALE Encoding (CNN<sub>ZSCALE</sub>), the model with the EAAC encoding (CNN<sub>EAAC</sub>) and the model with the EGAAC encoding (CNN<sub>EGAAC</sub>). We took the CNN Model with the One-Hot encoding (CNN<sub>OH</sub>) as an example to demonstrate the architecture (<xref ref-type="fig" rid="F3">Figure 3</xref>).<list list-type="simple">
<list-item>
<p>(1) Input layer. Each sequence is converted into a feature vector with One-Hot encoding.</p>
</list-item>
<list-item>
<p>(2) The convolution layer. It contains two convolution sublayers followed by two sequentially connected blocks. Each block includes a convolution sublayer and a max pooling sublayer. There are 128 convolution kernels with the sizes of 1 and 3 for the first and second convolution sublayers, respectively. A dropout layer with a rate of 0.7 follows each convolution kernel to prevent potential overfitting. In these two blocks, there were 128 convolution kernels with a size of 9 and 10 for these two convolution sublayers of two blocks, respectively; the parameter pool_size of the max-pooling sublayer was set as 2; the dropout rate was set to 0.5. The rectified linear unit (ReLU) is considered the activation function.</p>
</list-item>
<list-item>
<p>(3) Fully connected layer. It contains a dense sublayer with 128 neurons without flattening and a global average pooling sublayer to calculate and output an average value.</p>
</list-item>
<list-item>
<p>(4) Output layer: This layer contains a single neuron, activated by a sigmoid function, to output the probability score (within the range from 0 to 1), indicating the likelihood of the crosstalk. If the probability score of an input sequence is greater than a specified threshold, the central serine in the sequence is predicted as a crosstalk site.</p>
</list-item>
</list>
</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>The architecture of a one-dimensional convolutional neural network with the One-Hot encoding approach (i.e., CNN<sub>OH</sub>).</p>
</caption>
<graphic xlink:href="fcell-11-1149535-g003.tif"/>
</fig>
</sec>
<sec id="s2-4">
<title>2.4 Performance evaluation</title>
<p>Several statistical measures were used to evaluate prediction performance, including sensitivity (SN), specificity (SP), overall accuracy (ACC), Matthew correlation coefficient (MCC) and the area under the receiver operating characteristic (ROC) curve (AUC). The definitions of SN, SP, ACC, and MCC are given as follows:<disp-formula id="equ1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ2">
<mml:math id="m2">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ3">
<mml:math id="m3">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ4">
<mml:math id="m4">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:msqrt>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>In the above formulas, TP, TN, FP, and FN are the number of true positives, true negatives, false positives, and true negatives, respectively.</p>
</sec>
</sec>
<sec sec-type="results|discussion" id="s3">
<title>3 Results and discussion</title>
<sec id="s3-1">
<title>3.1 Construction and functional investigation of the pSADPr datasets</title>
<p>We created three datasets for constructing classifiers to predict pSADPr sites (<xref ref-type="fig" rid="F2">Figure 2</xref>). The first dataset was the pSADPr-SADPr dataset, containing pSADPr and SADPr peptides. The related model was used to recognize pSADPr sites from known SADPr sites (<xref ref-type="fig" rid="F2">Figure 2B</xref>). The second was the pSADPr-pS dataset, including pSADPr and pS peptides (<xref ref-type="fig" rid="F2">Figure 2C</xref>). The third was the pSADPr-UM dataset, containing pSADPr and UM peptides (<xref ref-type="fig" rid="F2">Figure 2D</xref>). Because the vast majority of serine residues are unmodified in the human proteome, the model based on the third dataset was expected to recognize pSADPr sites from the human proteome (<xref ref-type="fig" rid="F2">Figure 2D</xref>). Each of the three datasets contained two parts: cross-validation and independent test datasets (<xref ref-type="fig" rid="F2">Figures 2B&#x2013;D</xref>).</p>
<p>We explored the characteristics of the pSADPr crosstalks by comparing pSADPr-containing and other peptides in the three datasets through the Two-Sample-Logo program (<xref ref-type="bibr" rid="B21">Vacic et al., 2006</xref>). For the pSADPr-SADPr dataset, the amino acid R was significantly enriched at positions &#x2212;2 and &#x2212;3 (<italic>i.e.</italic>, P-2 and P-3), whereas K was depleted at P-1 (<xref ref-type="fig" rid="F4">Figure 4A</xref>). For the rest datasets, the pSADPr crosstalks showed similar characteristics (<xref ref-type="fig" rid="F4">Figures 4B, C</xref>). Specifically, K was enriched entirely except P&#x2b;1 and G was enriched at P1 and P2; D and E were depleted at P-3 to P&#x2b;5 and L was depleted entirely. The maximum enriched/depleted value (29.3%) for the pSADPr-pS dataset was similar to that (32.0%) for the pSADPr-UM dataset, and both were more than twice as large as that (13.2%) for the pSADPr-SADPr dataset (<xref ref-type="fig" rid="F4">Figure 4</xref>). It indicates that the differences between pSADPr and SADPr sites are smaller than those between pSADPr and pS/UM sites. In other words, it is easy to distinguish pSADPr sites from pS/UM sites, compared to recognizing pSADPr sites from SADPr sites.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Sequence pattern surrounding the pSADPr sites. Enriched and depleted residues flanking the central pSADPr sites were shown for the pSADPr-SADPr dataset <bold>(A)</bold>, the pSADPr-pS dataset <bold>(B)</bold>, and the pSADPr-UM dataset <bold>(C)</bold> (<italic>p</italic> &#x3c; 0.05, <italic>t</italic>-test with Bonferroni correction). The patterns were generated using the Two-Sample-Logo program (<xref ref-type="bibr" rid="B21">Vacic et al., 2006</xref>).</p>
</caption>
<graphic xlink:href="fcell-11-1149535-g004.tif"/>
</fig>
<p>The human serine kinase family contains a few subfamilies, each with its characteristics. We explored which subfamilies preferred phosphorylating the pSADPr sites. To perform this analysis, we used the human pS sites as the background and the pSADPr sites as the test dataset. We employed the GPS program (<xref ref-type="bibr" rid="B24">Wang C. et al., 2020</xref>) to predict pS sites for each subfamily from both datasets (<xref ref-type="fig" rid="F5">Figure 5</xref>). We found that four subfamilies (i.e., AGC, CAMK, STE and TKL) tended to phosphorylate pSADPr sites (<italic>p</italic> &#x3c; 5.0 &#xd7; 10<sup>&#x2212;26</sup>, hyper-geometric test). In comparison, two subfamilies (i.e., CK1 and CMGC) prefer not to phosphorylate pSADPr sites (<italic>p</italic> &#x3c; 5.1 &#xd7; 10<sup>&#x2212;29</sup>, hyper-geometric test). For example, 68% of pSADPr sites could be phosphorylated by the AGC subfamily, whereas only 44% of pS sites are modified by this subfamily (<italic>p</italic> &#x3d; 2.3 &#xd7; 10<sup>&#x2212;174</sup>, hyper-geometric test). This observation suggests that the pSADPr sites may be related to specific subfamilies of serine kinases.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Enrichment analysis of human pSADPr sites as the substrates of serine kinase subfamilies predicted by GPS (<xref ref-type="bibr" rid="B24">Wang C. et al., 2020</xref>). Human pS sites were used as the background. <italic>p</italic>-value was calculated using the hyper-geometric test.</p>
</caption>
<graphic xlink:href="fcell-11-1149535-g005.tif"/>
</fig>
<p>In the three datasets, the pSADPr-pS and pSADPr-UM datasets were imbalanced because the numbers of pS and UM peptides were far more than the number of pSADPr peptides (<xref ref-type="fig" rid="F2">Figures 2C, D</xref>). To explore the effect of the imbalanced dataset on the predictor&#x2019;s performance, we built the related balanced cross-validation dataset where the number (2,162) of randomly selected pS or UM peptides was the same as that of pSADPr peptides. We constructed the CNN<sub>OH</sub> models related to the imbalanced and balanced datasets and evaluated their prediction performances in terms of the independent test. The CNN<sub>OH</sub> model based on the imbalanced dataset had better performance than the counterpart constructed using the balanced dataset (<italic>p</italic> &#x3d; 0.002 for both pSADPr-pS and pSADPr-UM datasets, Wilcoxon rank sum test; <xref ref-type="fig" rid="F6">Figure 6</xref>). Therefore, we chose the imbalanced dataset for model construction.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Performance comparisons between the CNN<sub>OH</sub> models based on balanced and imbalanced datasets in the independent test dataset. The models were developed for the pSADPr-pS dataset <bold>(A)</bold> and the pSADPr-UM dataset <bold>(B)</bold>.</p>
</caption>
<graphic xlink:href="fcell-11-1149535-g006.tif"/>
</fig>
</sec>
<sec id="s3-2">
<title>3.2 Construction and evaluation of CNN-based classifiers</title>
<p>We constructed five CNN classifiers (i.e., CNN<sub>OH</sub>, CNN<sub>WE</sub>, CNN<sub>EAAC</sub>, CNN<sub>EGAAC</sub> and CNN<sub>ZSCALE</sub>) to recognize pSADPr sites from the three datasets and compared their prediction performances. Here, we used the pSADPr-SADPr dataset to demonstrate the process. Three out of the five classifiers (i.e., CNN<sub>OH</sub>, CNN<sub>WE</sub> and CNN<sub>ZSCALE</sub>) showed similar performances and superiority over the rest two (i.e., CNN<sub>EAAC</sub> and CNN<sub>EGAAC</sub>) in ten-fold cross-validation and independent test (<xref ref-type="table" rid="T1">Table 1</xref>; <xref ref-type="fig" rid="F7">Figure 7</xref> and <xref ref-type="sec" rid="s10">Supplementary Figure S1</xref>). For instance, the CNN<sub>OH</sub> model had an AUC value of 0.712, larger than that (0.659) of the CNN<sub>EAAC</sub> model in the cross-validation. We repeated this analysis for the pSADPr-pS and pSADPr-UM datasets and made similar observations that the three classifiers had the best performances (<xref ref-type="sec" rid="s10">Supplementary Tables S1, S2</xref>; <xref ref-type="sec" rid="s10">Supplementary Figures S1&#x2013;S5</xref>). Furthermore, we compared the classifiers&#x2019; performances for the three datasets. We found that the AUC values (0.921 and 0.953) of the CNN<sub>OH</sub> classifiers for pSADPr-pS and pSADPr-UM datasets were significantly larger than that (0.712) for the pSADPr-SADPr dataset. These results were consistent with our observation that the differences between pSADPr and SADPr sites are smaller than those between pSADPr and pS/UM sites (<xref ref-type="fig" rid="F4">Figure 4</xref>). Since the One-Hot feature is the simplest compared to the WE and ZSCALE features, we chose the CNN classifier with the One-Hot scheme as the representative of the three classifiers.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Prediction performances of CNN-based classifiers for the pSADPr-SADPr dataset<xref ref-type="table-fn" rid="Tfn1">
<sup>a</sup>
</xref>.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Classifier</th>
<th align="left">SN</th>
<th align="left">SP</th>
<th align="left">ACC</th>
<th align="left">MCC</th>
<th align="left">AUC</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td colspan="6" align="left">Ten-fold Cross-validation</td>
</tr>
<tr>
<td align="left">CNN<sub>OH</sub>
</td>
<td align="left">0.599 &#xb1; 0.031</td>
<td align="left">0.694 &#xb1; 0.001</td>
<td align="left">0.649 &#xb1; 0.016</td>
<td align="left">0.294 &#xb1; 0.031</td>
<td align="left">0.712 &#xb1; 0.020</td>
</tr>
<tr>
<td align="left">CNN<sub>ZSCALE</sub>
</td>
<td align="left">0.598 &#xb1; 0.059</td>
<td align="left">0.694 &#xb1; 0.001</td>
<td align="left">0.649 &#xb1; 0.025</td>
<td align="left">0.293 &#xb1; 0.058</td>
<td align="left">0.705 &#xb1; 0.030</td>
</tr>
<tr>
<td align="left">CNN<sub>WE</sub>
</td>
<td align="left">0.591 &#xb1; 0.089</td>
<td align="left">0.694 &#xb1; 0.001</td>
<td align="left">0.644 &#xb1; 0.044</td>
<td align="left">0.285 &#xb1; 0.088</td>
<td align="left">0.696 &#xb1; 0.043</td>
</tr>
<tr>
<td align="left">CNN<sub>EAAC</sub>
</td>
<td align="left">0.523 &#xb1; 0.040</td>
<td align="left">0.694 &#xb1; 0.001</td>
<td align="left">0.611 &#xb1; 0.021</td>
<td align="left">0.219 &#xb1; 0.040</td>
<td align="left">0.659 &#xb1; 0.016</td>
</tr>
<tr>
<td align="left">CNN<sub>EGAAC</sub>
</td>
<td align="left">0.488 &#xb1; 0.034</td>
<td align="left">0.694 &#xb1; 0.001</td>
<td align="left">0.595 &#xb1; 0.018</td>
<td align="left">0.185 &#xb1; 0.034</td>
<td align="left">0.621 &#xb1; 0.029</td>
</tr>
<tr>
<td colspan="6" align="left">Independent test</td>
</tr>
<tr>
<td align="left">CNN<sub>OH</sub>
</td>
<td align="left">0.608 &#xb1; 0.034</td>
<td align="left">0.694 &#xb1; 0.000</td>
<td align="left">0.653 &#xb1; 0.016</td>
<td align="left">0.303 &#xb1; 0.033</td>
<td align="left">0.700 &#xb1; 0.010</td>
</tr>
<tr>
<td align="left">CNN<sub>ZSCALE</sub>
</td>
<td align="left">0.583 &#xb1; 0.037</td>
<td align="left">0.694 &#xb1; 0.000</td>
<td align="left">0.641 &#xb1; 0.018</td>
<td align="left">0.278 &#xb1; 0.036</td>
<td align="left">0.692 &#xb1; 0.017</td>
</tr>
<tr>
<td align="left">CNN<sub>WE</sub>
</td>
<td align="left">0.557 &#xb1; 0.058</td>
<td align="left">0.694 &#xb1; 0.000</td>
<td align="left">0.628 &#xb1; 0.028</td>
<td align="left">0.253 &#xb1; 0.057</td>
<td align="left">0.682 &#xb1; 0.022</td>
</tr>
<tr>
<td align="left">CNN<sub>EAAC</sub>
</td>
<td align="left">0.500 &#xb1; 0.016</td>
<td align="left">0.694 &#xb1; 0.000</td>
<td align="left">0.601 &#xb1; 0.008</td>
<td align="left">0.197 &#xb1; 0.016</td>
<td align="left">0.637 &#xb1; 0.008</td>
</tr>
<tr>
<td align="left">CNN<sub>EGAAC</sub>
</td>
<td align="left">0.488 &#xb1; 0.044</td>
<td align="left">0.694 &#xb1; 0.000</td>
<td align="left">0.595 &#xb1; 0.021</td>
<td align="left">0.185 &#xb1; 0.043</td>
<td align="left">0.621 &#xb1; 0.016</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="Tfn1">
<label>
<sup>a</sup>
</label>
<p>Ten models were constructed and evaluated in ten-fold cross-validation. Their average performance and standard deviation were separately calculated for the cross-validation and the independent test datasets.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Performance comparison of CNN-based classifiers built for the pSADPr-SADPr dataset in ten-fold cross-validation <bold>(A)</bold> and independent test <bold>(B)</bold>.</p>
</caption>
<graphic xlink:href="fcell-11-1149535-g007.tif"/>
</fig>
</sec>
<sec id="s3-3">
<title>3.3 Construction and evaluation of stacking ensemble learning classifiers</title>
<p>A stacking-based ensemble learning architecture is one of the ensemble techniques in which multiple learning models are integrated to produce one optimal predictive model, which performs better than the base models taken alone. In the stacking ensemble architecture, a meta-learner is trained to output a prediction based on the different base learner&#x2019;s predictions. The stacking ensemble architecture has been used to improve the prediction performance in various bioinformatics applications (e.g., lysine acetylation site prediction) (<xref ref-type="bibr" rid="B16">Mishra et al., 2019</xref>; <xref ref-type="bibr" rid="B30">Zhang et al., 2021</xref>; <xref ref-type="bibr" rid="B1">Basith et al., 2022</xref>). Here, we introduced the two-stage stacking ensemble approach to improve the performance of the pSADPr site prediction (<xref ref-type="fig" rid="F8">Figure 8</xref>). In the first stage, different CNN algorithms (e.g., CNN<sub>OH</sub>, CNN<sub>WE</sub> and CNN<sub>ZSCALE</sub>) were selected to construct base classifiers. Specifically, ten base classifiers for each CNN algorithm were built and validated using the ten-fold cross-validation dataset. The base classifiers were then used for prediction in the independent test dataset, and their prediction results were averaged. Therefore, each CNN algorithm corresponds to the validation result and the averaged result for the independent test dataset. In the second stage, the validation and the averaged results were merged as a meta cross-validation dataset and a meta-independent test dataset, respectively (<xref ref-type="fig" rid="F8">Figure 8</xref>). The former dataset was used to train and validate a meta-classifier, whereas the latter was employed to evaluate the meta-classifier&#x2019;s performance. Here, we constructed the meta-classifier using the random forest algorithm (RF), which was optimized using the GridSearchCV package. The optimized parameters included max_depth as 8, max_features as &#x201c;sqrt,&#x201d; min_samples_leaf as 20, min_samples_split as 300 and n_estimators as 100.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>The architecture of the two-stage stacking ensemble classifier.</p>
</caption>
<graphic xlink:href="fcell-11-1149535-g008.tif"/>
</fig>
<p>According to the above analysis, the three classifiers (i.e., CNN<sub>OH</sub>, CNN<sub>WE</sub> and CNN<sub>ZSCALE</sub>) had better performances than two other classifiers (i.e., CNN<sub>EAAC</sub> and CNN<sub>EGAAC</sub>) for all three datasets. Based on the observation, we fused them as base classifiers to build the two-stage stacking ensemble approach with a good performance. We started with the fusion of the three best classifiers until we fused all the classifiers. The related stacking models included Stacking<sub>O&#x2b;Z&#x2b;W</sub>, Stacking<sub>O&#x2b;Z&#x2b;W&#x2b;E</sub> and Stacking<sub>O&#x2b;Z&#x2b;W&#x2b;E&#x2b;EG</sub>, where O stands for OH, Z for ZSCALE, W for WE, E for EAAC and, EG for EGAAC. For the pSADPr-SADPr dataset, the three stacking models showed similar performances in meta ten-fold cross-validation and independent test (<xref ref-type="table" rid="T2">Table 2</xref>; <xref ref-type="fig" rid="F9">Figure 9</xref> and <xref ref-type="sec" rid="s10">Supplementary Figure S6</xref>). For instance, their average AUC/MCC values were around 0.719/0.313 in cross-validation (<xref ref-type="table" rid="T2">Table 2</xref>). The stacking models for the two other datasets (pSADPr-pS and pSADPr-UM) also performed similarly (<xref ref-type="sec" rid="s10">Supplementary Figures S7&#x2013;S10</xref>).</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Prediction performances of stacking ensemble classifiers for the pSADPr-SADPr dataset.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Classifier</th>
<th align="left">SN</th>
<th align="left">SP</th>
<th align="left">ACC</th>
<th align="left">MCC</th>
<th align="left">AUC</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td colspan="6" align="left">Cross-validation</td>
</tr>
<tr>
<td align="left">CNN<sub>O&#x2b;Z&#x2b;W</sub>
</td>
<td align="left">0.618 &#xb1; 0.029</td>
<td align="left">0.694 &#xb1; 0.001</td>
<td align="left">0.657 &#xb1; 0.014</td>
<td align="left">0.313 &#xb1; 0.029</td>
<td align="left">0.719 &#xb1; 0.021</td>
</tr>
<tr>
<td align="left">CNN<sub>O&#x2b;Z&#x2b;W&#x2b;E</sub>
</td>
<td align="left">0.621 &#xb1; 0.030</td>
<td align="left">0.694 &#xb1; 0.001</td>
<td align="left">0.658 &#xb1; 0.015</td>
<td align="left">0.315 &#xb1; 0.030</td>
<td align="left">0.719 &#xb1; 0.019</td>
</tr>
<tr>
<td align="left">CNN<sub>O&#x2b;Z&#x2b;W&#x2b;E&#x2b;EG</sub>
</td>
<td align="left">0.617 &#xb1; 0.039</td>
<td align="left">0.694 &#xb1; 0.001</td>
<td align="left">0.657 &#xb1; 0.019</td>
<td align="left">0.311 &#xb1; 0.039</td>
<td align="left">0.718 &#xb1; 0.022</td>
</tr>
<tr>
<td colspan="6" align="left">Independent test</td>
</tr>
<tr>
<td align="left">CNN<sub>O&#x2b;Z&#x2b;W</sub>
</td>
<td align="left">0.578 &#xb1; 0.009</td>
<td align="left">0.694 &#xb1; 0.000</td>
<td align="left">0.638 &#xb1; 0.004</td>
<td align="left">0.274 &#xb1; 0.009</td>
<td align="left">0.704 &#xb1; 0.003</td>
</tr>
<tr>
<td align="left">CNN<sub>O&#x2b;Z&#x2b;W&#x2b;E</sub>
</td>
<td align="left">0.584 &#xb1; 0.012</td>
<td align="left">0.694 &#xb1; 0.000</td>
<td align="left">0.641 &#xb1; 0.006</td>
<td align="left">0.279 &#xb1; 0.012</td>
<td align="left">0.703 &#xb1; 0.002</td>
</tr>
<tr>
<td align="left">CNN<sub>O&#x2b;Z&#x2b;W&#x2b;E&#x2b;EG</sub>
</td>
<td align="left">0.597 &#xb1; 0.022</td>
<td align="left">0.694 &#xb1; 0.000</td>
<td align="left">0.647 &#xb1; 0.011</td>
<td align="left">0.292 &#xb1; 0.021</td>
<td align="left">0.703 &#xb1; 0.002</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>Performance comparison between CNN-based classifiers and the stacking-based ensemble classifiers for the pSADPr-SADPr dataset in the ten-fold cross-validation <bold>(A)</bold> and independent test <bold>(B)</bold>. <italic>p</italic> values were calculated using the two-sided Mann&#x2013;Whitney U test.</p>
</caption>
<graphic xlink:href="fcell-11-1149535-g009.tif"/>
</fig>
</sec>
<sec id="s3-4">
<title>3.4 Comparison of CNN-based models and stacking ensemble models</title>
<p>We compared the performances of the CNN-based models and the stacking ensemble models for each of the three datasets. We found no statistical difference between the CNN<sub>OH</sub> model and these stacking ensemble models for each dataset (<xref ref-type="fig" rid="F9">Figure 9</xref> and <xref ref-type="sec" rid="s10">Supplementary Figures S9, S10</xref>). The observation that the meta-classifiers perform similarly to the base classifier is consistent with the previous report for predicting bacterial Type IV secreted effectors, in which the meta-classifier and base classifier performed similarly (<xref ref-type="bibr" rid="B26">Xiong et al., 2018</xref>). It suggests that the base classifiers may have sufficient predictive ability, and the stacking ensemble architecture does not constantly improve prediction accuracy.</p>
</sec>
<sec id="s3-5">
<title>3.5 Construction of the online EdeepSADPr predictor</title>
<p>We developed an online prediction tool for predicting human pSADPr sites extensively from different conditions, dubbed EdeepSADPr. This tool consists of three models, each corresponding to the prediction from the SADPr dataset, the serine phosphorylation dataset or the human proteome. As the CNN<sub>OH</sub> classifier had no less predictive performance than other methods, we selected this classifier to construct EdeepSADPr. The usage of this tool was described as follows. After the model selection, the input sequence with the fasta format would be uploaded. The prediction results were output in tabular form with five columns: sequence header, position, sequence, prediction score, and prediction category. The predicted results can also be downloaded as a data file. EdeepSADPr is accessible via <ext-link ext-link-type="uri" xlink:href="http://edeepsadpr.bioinfogo.org/">http://edeepsadpr.bioinfogo.org/</ext-link>.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s4">
<title>4 Conclusion</title>
<p>The main goal of this study is the development of a model to predict pSADPr sites based on protein sequence information and the investigation of pSADPr&#x2019;s characteristics. We developed different deep-learning classifiers and used them as base classifiers to construct a few stacking-based ensemble models. We found that the base classifiers and the ensemble models had similar performances. The reason why the performance of the ensemble model was not improved is that there may not be much difference between the features used for model construction or the base models may not comprehensively cover the pSADPr&#x2019;s characteristics. In the near future, we may integrate sequential information, structural information and evolutionary information to improve model performance (<xref ref-type="bibr" rid="B27">Xu et al., 2021</xref>). Additionally, the performance may be boosted by increasing the data amount and optimizing the model architecture (<xref ref-type="bibr" rid="B32">Zhu et al., 2022</xref>). Moreover, we found the characteristics of pSADPr sites, which may boost the understanding of this crosstalk. In summary, we developed the first classifier to predict human pSADPr sites and expect accurate prediction facilitate the discovery of new pSADPr sites. This architecture is applicable to the model construction for predicting other types of <italic>in situ</italic> crosstalks.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s10">Supplementary Material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>LL conceived this project. HJ and YS constructed the algorithms; HJ analyzed the data. LZ and HJ constructed the website. HJ, SS, YS, NH, and LL wrote the manuscript. All authors have read and agreed to the published version of the manuscript.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>This work was partially supported by the National Natural Science Foundation of China (Grant 32071430 and Grant 32271504) and Innovation Capability Improvement Project of Science and Technology for Small and Medium-sized Enterprises in Shandong Province (2021TSGC1295).</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>HJ was an intern of Sino Genomics Technology Co., Ltd.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fcell.2023.1149535/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fcell.2023.1149535/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet1.docx" id="SM1" mimetype="application/docx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Basith</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Manavalan</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Stallion: A stacking-based ensemble learning framework for prokaryotic lysine acetylation site prediction</article-title>. <source>Brief. Bioinform</source> <volume>23</volume> (<issue>1</issue>)&#x2013;<lpage>bbab412</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbab412</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brustel</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Muramoto</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Fumimoto</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Ellins</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Pears</surname>
<given-names>C. J.</given-names>
</name>
<name>
<surname>Lakin</surname>
<given-names>N. D.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Linking DNA repair and cell cycle progression through serine ADP-ribosylation of histones</article-title>. <source>Nat. Commun.</source> <volume>13</volume> (<issue>1</issue>), <fpage>185</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-021-27867-4</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Buch-Larsen</surname>
<given-names>S. C.</given-names>
</name>
<name>
<surname>Hendriks</surname>
<given-names>I. A.</given-names>
</name>
<name>
<surname>Lodge</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Rykaer</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Furtwangler</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Shishkova</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Mapping physiological ADP-ribosylation using activated ion electron transfer dissociation</article-title>. <source>Cell Rep.</source> <volume>32</volume> (<issue>12</issue>), <fpage>108176</fpage>. <pub-id pub-id-type="doi">10.1016/j.celrep.2020.108176</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>Y. Z.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Gong</surname>
<given-names>Y. A.</given-names>
</name>
<name>
<surname>Ying</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>SUMOhydro: A novel method for the prediction of sumoylation sites based on hydrophobic properties</article-title>. <source>PLoS One</source> <volume>7</volume> (<issue>6</issue>)&#x2013;<lpage>e39195</lpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0039195</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Leier</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Marquez-Lago</surname>
<given-names>T. T.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>iFeature: a Python package and web server for features extraction and selection from protein and peptide sequences</article-title>. <source>Bioinformatics</source> <volume>34</volume> (<issue>14</issue>), <fpage>2499</fpage>&#x2013;<lpage>2502</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty140</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ge</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Moh</surname>
<given-names>T. S.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Improving text classification with word embedding</article-title>,&#x201d; in <conf-name>IEEE International Conference on Big Data</conf-name>, <conf-loc>Boston, MA, USA</conf-loc>, <conf-date>11-14 December 2017</conf-date>.</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hendriks</surname>
<given-names>I. A.</given-names>
</name>
<name>
<surname>Larsen</surname>
<given-names>S. C.</given-names>
</name>
<name>
<surname>Nielsen</surname>
<given-names>M. L.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>An advanced strategy for comprehensive profiling of ADP- ribosylation sites using mass spectrometry-based proteomics</article-title>. <source>Mol. Cell. Proteomics</source> <volume>18</volume> (<issue>5</issue>), <fpage>1010</fpage>&#x2013;<lpage>1026</lpage>. <pub-id pub-id-type="doi">10.1074/mcp.TIR119.001315</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hornbeck</surname>
<given-names>P. V.</given-names>
</name>
<name>
<surname>Kornhauser</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Tkachev</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Skrzypek</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Murray</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>PhosphoSitePlus: A comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse</article-title>. <source>Nucleic Acids Res.</source> <volume>40</volume>, <fpage>D261</fpage>&#x2013;<lpage>D270</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkr1122</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Niu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>CD-HIT suite: A web server for clustering and comparing biological sequences</article-title>. <source>Bioinformatics</source> <volume>26</volume> (<issue>5</issue>), <fpage>680</fpage>&#x2013;<lpage>682</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btq003</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hunter</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>The age of crosstalk: Phosphorylation, ubiquitination, and beyond</article-title>. <source>Mol. Cell</source> <volume>28</volume> (<issue>5</issue>), <fpage>730</fpage>&#x2013;<lpage>738</lpage>. <pub-id pub-id-type="doi">10.1016/j.molcel.2007.11.019</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Larsen</surname>
<given-names>S. C.</given-names>
</name>
<name>
<surname>Hendriks</surname>
<given-names>I. A.</given-names>
</name>
<name>
<surname>Lyon</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Jensen</surname>
<given-names>L. J.</given-names>
</name>
<name>
<surname>Nielsen</surname>
<given-names>M. L.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Systems-wide analysis of serine ADP-ribosylation reveals widespread occurrence and site-specific overlap with phosphorylation</article-title>. <source>Cell Rep.</source> <volume>24</volume> (<issue>9</issue>), <fpage>2493</fpage>&#x2013;<lpage>2505</lpage>. <pub-id pub-id-type="doi">10.1016/j.celrep.2018.07.083</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>GodzikCd-hit</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Cd-Hit: A fast program for clustering and comparing large sets of protein or nucleotide sequences</article-title>. <source>Bioinformatics</source> <volume>22</volume> (<issue>13</issue>), <fpage>1658</fpage>&#x2013;<lpage>1659</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btl158</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>X. M.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>DeepPhos: Prediction of protein phosphorylation sites with deep learning</article-title>. <source>Bioinformatics</source> <volume>35</volume> (<issue>16</issue>), <fpage>2766</fpage>&#x2013;<lpage>2773</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty1051</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luscher</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Butepage</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Eckei</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Krieg</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Verheugd</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Shilton</surname>
<given-names>B. H.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>ADP-ribosylation, a multifaceted posttranslational modification involved in the control of cell physiology in Health and disease</article-title>. <source>Chem. Rev.</source> <volume>118</volume> (<issue>3</issue>), <fpage>1092</fpage>&#x2013;<lpage>1136</lpage>. <pub-id pub-id-type="doi">10.1021/acs.chemrev.7b00122</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lyu</surname>
<given-names>X. R.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S. H.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>C. Y.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>N. N.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>DeepCSO: A deep-learning network approach to predicting cysteine S-sulphenylation sites</article-title>. <source>Front. Cell Dev. Biol.</source> <volume>8</volume>, <fpage>594587</fpage>. <pub-id pub-id-type="doi">10.3389/fcell.2020.594587</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mishra</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Pokhrel</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Hoque</surname>
<given-names>M. T.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>StackDPPred: A stacking based prediction of DNA-binding protein from sequence</article-title>. <source>Bioinformatics</source> <volume>35</volume> (<issue>3</issue>), <fpage>433</fpage>&#x2013;<lpage>441</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty653</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nowak</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Rosenthal</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Karlberg</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Butepage</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Thorsell</surname>
<given-names>A. G.</given-names>
</name>
<name>
<surname>Dreier</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Engineering Af1521 improves ADP-ribose binding and identification of ADP-ribosylated proteins</article-title>. <source>Nat. Commun.</source> <volume>11</volume> (<issue>1</issue>), <fpage>5199</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-020-18981-w</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peng</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Scholten</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Heck</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>van Breukelen</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Identification of enriched PTM crosstalk motifs from large-scale experimental data sets</article-title>. <source>J. Proteome Res.</source> <volume>13</volume> (<issue>1</issue>), <fpage>249</fpage>&#x2013;<lpage>259</lpage>. <pub-id pub-id-type="doi">10.1021/pr4005579</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sha</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>DeepSADPr: A hybrid-learning architecture for serine ADP-ribosylation site prediction</article-title>. <source>Methods</source> <volume>203</volume>, <fpage>575</fpage>&#x2013;<lpage>583</lpage>. <pub-id pub-id-type="doi">10.1016/j.ymeth.2021.09.008</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Swaney</surname>
<given-names>D. L.</given-names>
</name>
<name>
<surname>Beltrao</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Starita</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rush</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Fields</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Global analysis of phosphorylation and ubiquitylation cross-talk in protein degradation</article-title>. <source>Nat. Methods</source> <volume>10</volume> (<issue>7</issue>), <fpage>676</fpage>&#x2013;<lpage>682</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.2519</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vacic</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Iakoucheva</surname>
<given-names>L. M.</given-names>
</name>
<name>
<surname>Radivojac</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Two sample logo: A graphical representation of the differences between two sets of sequence alignments</article-title>. <source>Bioinformatics</source> <volume>22</volume> (<issue>12</issue>), <fpage>1536</fpage>&#x2013;<lpage>1537</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btl151</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Venne</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Kollipara</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zahedi</surname>
<given-names>R. P.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>The next level of complexity: Crosstalk of posttranslational modifications</article-title>. <source>Proteomics</source> <volume>14</volume> (<issue>4-5</issue>), <fpage>513</fpage>&#x2013;<lpage>524</lpage>. <pub-id pub-id-type="doi">10.1002/pmic.201300344</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Yuchi</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2020a</year>). <article-title>MusiteDeep: A deep-learning based webserver for protein post-translational modification site prediction and visualization</article-title>. <source>Nucleic Acids Res.</source> <volume>48</volume> (<issue>W1</issue>), <fpage>W140</fpage>&#x2013;<lpage>W146</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkaa275</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2020b</year>). <article-title>Gps 5.0: An update on the prediction of kinase-specific phosphorylation sites in proteins</article-title>. <source>Genomics Proteomics Bioinforma.</source> <volume>18</volume> (<issue>1</issue>), <fpage>72</fpage>&#x2013;<lpage>80</lpage>. <pub-id pub-id-type="doi">10.1016/j.gpb.2020.01.001</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wei</surname>
<given-names>X. L.</given-names>
</name>
<name>
<surname>Sha</surname>
<given-names>Y. T.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>Y. M.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>N. N.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>DeepKcrot: A deep-learning architecture for general and species-specific lysine crotonylation site prediction</article-title>. <source>IEEE Access</source> <volume>9</volume>, <fpage>49504</fpage>&#x2013;<lpage>49513</lpage>. <pub-id pub-id-type="doi">10.1109/access.2021.3068413</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>D. Q.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>PredT4SE-Stack: Prediction of bacterial type IV secreted effectors from protein sequences using a stacked ensemble method</article-title>. <source>Front. Microbiol.</source> <volume>9</volume>, <fpage>2571</fpage>. <pub-id pub-id-type="doi">10.3389/fmicb.2018.02571</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>H. D.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>R. P.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y. G.</given-names>
</name>
<name>
<surname>Qiu</surname>
<given-names>J. D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>mUSP: a high-accuracy map of the <italic>in situ</italic> crosstalk of ubiquitylation and SUMOylation proteome predicted via the feature enhancement approach</article-title>. <source>Brief. Bioinform</source> <volume>22</volume> (<issue>3</issue>), <fpage>bbaa050</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbaa050</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>H. D.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L. N.</given-names>
</name>
<name>
<surname>Wen</surname>
<given-names>P. P.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>S. P.</given-names>
</name>
<name>
<surname>Qiu</surname>
<given-names>J. D.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Site-specific systematic analysis of lysine modification crosstalk</article-title>. <source>Proteomics</source> <volume>18</volume> (<issue>9</issue>), <fpage>e1700292</fpage>. <pub-id pub-id-type="doi">10.1002/pmic.201700292</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>X. J.</given-names>
</name>
<name>
<surname>Gregoire</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>A recurrent phospho-sumoyl switch in transcriptional repression and beyond</article-title>. <source>Mol. Cell</source> <volume>23</volume> (<issue>6</issue>), <fpage>779</fpage>&#x2013;<lpage>786</lpage>. <pub-id pub-id-type="doi">10.1016/j.molcel.2006.08.009</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>SBP-SITA: A sequence-based prediction tool for S-itaconation</article-title>. <comment>bioRxiv</comment>.</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>DeepKhib: A deep-learning framework for lysine 2-hydroxyisobutyrylation sites prediction</article-title>. <source>Front. Cell Dev. Biol.</source> <volume>8</volume>&#x2013;<lpage>580217</lpage>. <pub-id pub-id-type="doi">10.3389/fcell.2020.580217</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>ResSUMO: A deep learning architecture based on residual structure for prediction of lysine SUMOylation sites</article-title>. <source>Cells</source> <volume>11</volume> (<issue>17</issue>)&#x2013;<lpage>2646</lpage>. <pub-id pub-id-type="doi">10.3390/cells11172646</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zolnierowicz</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bollen</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Protein phosphorylation and protein phosphatases. De Panne, Belgium, September 19-24, 1999</article-title>. <source>EMBO J.</source> <volume>19</volume> (<issue>4</issue>), <fpage>483</fpage>&#x2013;<lpage>488</lpage>. <pub-id pub-id-type="doi">10.1093/emboj/19.4.483</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>