<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Microbiol.</journal-id>
<journal-title>Frontiers in Microbiology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Microbiol.</abbrev-journal-title>
<issn pub-type="epub">1664-302X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmicb.2022.1015140</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Microbiology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Mirror proteases of Ac-Trypsin and Ac-LysargiNase precisely improve novel event identifications in <italic>Mycolicibacterium smegmatis</italic> MC<sup>2</sup> 155 by proteogenomic analysis</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Jiang</surname> <given-names>Songhao</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2015973/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Shi</surname> <given-names>Jiahui</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2016216/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Li</surname> <given-names>Yanchang</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2017474/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname> <given-names>Zhenpeng</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/421373/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Chang</surname> <given-names>Lei</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2017471/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname> <given-names>Guibin</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/790154/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Wu</surname> <given-names>Wenhui</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2015972/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Yu</surname> <given-names>Liyan</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1032220/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Dai</surname> <given-names>Erhei</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1198288/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname> <given-names>Lixia</given-names></name>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2016542/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Lyu</surname> <given-names>Zhitang</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c003"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/867260/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Xu</surname> <given-names>Ping</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1853079/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Zhang</surname> <given-names>Yao</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/512431/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Key Laboratory of Microbial Diversity Research and Application of Hebei, School of Life Sciences, Hebei University</institution>, <addr-line>Baoding</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>Beijing Proteome Research Center, National Center for Protein Sciences Beijing, State Key Laboratory of Proteomics, Research Unit of Proteomics and Research and Development of New Drug of Chinese Academy of Medical Sciences, Institute of Lifeomics</institution>, <addr-line>Beijing</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>Guangzhou University of Chinese Medicine, Second Clinical Medicine College, Guangzhou Higher Education Mega Center</institution>, <addr-line>Guangzhou</addr-line>, <country>China</country></aff>
<aff id="aff4"><sup>4</sup><institution>Research Unit of Proteomics and Research and Development of New Drug, Institute of Medicinal Biotechnology, Chinese Academy of Medical Sciences and Peking Union Medical College</institution>, <addr-line>Beijing</addr-line>, <country>China</country></aff>
<aff id="aff5"><sup>5</sup><institution>The Fifth Hospital of Shijiazhuang, School of Public Health</institution>, <addr-line>Shijiazhuang</addr-line>, <country>China</country></aff>
<aff id="aff6"><sup>6</sup><institution>Key Research Laboratory for Infectious Disease Prevention for State Administration of Traditional Chinese Medicine, Tianjin Institute of Respiratory Diseases, Haihe Hospital, Tianjin University</institution>, <addr-line>Tianjin</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Pitter F. Huesgen, Julich Research Center (HZ), Germany</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Mao Peng, Westerdijk Fungal Biodiversity Institute, Netherlands; Rui Chen, National Research Council Canada (NRC-CNRC), Canada</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Ping Xu <email>xuping&#x00040;ncpsb.org.cn</email></corresp>
<corresp id="c002">Yao Zhang <email>zhangyaowsw&#x00040;163.com</email></corresp>
<corresp id="c003">Zhitang Lyu <email>lzt325&#x00040;126.com</email></corresp>
<fn fn-type="other" id="fn001"><p>This article was submitted to Microbiological Chemistry and Geomicrobiology, a section of the journal Frontiers in Microbiology</p></fn></author-notes>
<pub-date pub-type="epub">
<day>12</day>
<month>10</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>13</volume>
<elocation-id>1015140</elocation-id>
<history>
<date date-type="received">
<day>09</day>
<month>08</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>12</day>
<month>09</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2022 Jiang, Shi, Li, Zhang, Chang, Wang, Wu, Yu, Dai, Zhang, Lyu, Xu and Zhang.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Jiang, Shi, Li, Zhang, Chang, Wang, Wu, Yu, Dai, Zhang, Lyu, Xu and Zhang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license> </permissions>
<abstract>
<p>Accurate identification of novel peptides remains challenging because of the lack of evaluation criteria in large-scale proteogenomic studies. Mirror proteases of trypsin and lysargiNase can generate complementary <italic>b</italic>/<italic>y</italic> ion series, providing the opportunity to efficiently assess authentic novel peptides in experiments other than filter potential targets by different false discovery rates (FDRs) ranking. In this study, a pair of in-house developed acetylated mirror proteases, Ac-Trypsin and Ac-LysargiNase, were used in <italic>Mycolicibacterium smegmatis</italic> MC<sup>2</sup> 155 for proteogenomic analysis. The mirror proteases accurately identified 368 novel peptides, exhibiting 75&#x02013;80% <italic>b</italic> and <italic>y</italic> ion coverages against 65&#x02013;68% <italic>y</italic> or <italic>b</italic> ion coverages of Ac-Trypsin (38.9% <italic>b</italic> and 68.3% <italic>y</italic>) or Ac-LysargiNase (65.5% <italic>b</italic> and 39.6% <italic>y</italic>) as annotated peptides from <italic>M. smegmatis</italic> MC<sup>2</sup> 155. The complementary <italic>b</italic> and <italic>y</italic> ion series largely increased the reliability of overlapped sequences derived from novel peptides. Among these novel peptides, 311 peptides were annotated in other public <italic>M. smegmatis</italic> strains, and 57 novel peptides with more continuous <italic>b</italic> and <italic>y</italic> pairs were obtained for further analysis after spectral quality assessment. This enabled mirror proteases to successfully correct six annotated proteins&#x00027; N-termini and detect 17 new coding open reading frames (ORFs). We believe that mirror proteases will be an effective strategy for novel peptide detection in both prokaryotic and eukaryotic proteogenomics.</p></abstract>
<kwd-group>
<kwd><italic>Mycolicibacterium smegmatis</italic></kwd>
<kwd>proteogenomics</kwd>
<kwd>Ac-Trypsin</kwd>
<kwd>Ac-LysargiNase</kwd>
<kwd>mirror</kwd>
</kwd-group>
<counts>
<fig-count count="5"/>
<table-count count="0"/>
<equation-count count="0"/>
<ref-count count="49"/>
<page-count count="14"/>
<word-count count="7780"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>Introduction</title>
<p>Proteogenomics has emerged as the inter-discipline of genomics and proteomics, which was proposed by Jaffe in 2004 (Jaffe et al., <xref ref-type="bibr" rid="B17">2004</xref>). This field has been driven by advances in various sequencing and proteomic strategies (Renuse et al., <xref ref-type="bibr" rid="B34">2011</xref>; Menschaert and Fenyo, <xref ref-type="bibr" rid="B28">2017</xref>; Ang et al., <xref ref-type="bibr" rid="B2">2019</xref>). High throughput sequencing technologies have increased the speed of study and the depth of data coverage (Ruggles et al., <xref ref-type="bibr" rid="B35">2017</xref>). The integration of multi-omic datasets allows not only for the improvement in gene re-annotation (Castellana and Bafna, <xref ref-type="bibr" rid="B4">2010</xref>; de Souza et al., <xref ref-type="bibr" rid="B6">2011</xref>; Herbst et al., <xref ref-type="bibr" rid="B15">2019</xref>; Li et al., <xref ref-type="bibr" rid="B25">2020</xref>) but also for the identification of novel peptides or their variants in precision medicine (Zhang et al., <xref ref-type="bibr" rid="B45">2016a</xref>; Ferrarotto et al., <xref ref-type="bibr" rid="B8">2021</xref>; Dong et al., <xref ref-type="bibr" rid="B7">2022</xref>).</p>
<p>Novel peptides detected by proteogenomics provide unpredictable information and insights into basic and disease research, which are difficult to identify by traditional annotating strategies (Castellana and Bafna, <xref ref-type="bibr" rid="B4">2010</xref>). However, a primary challenge for newly identified peptides is filtering them out from the bloated database and evaluating their authenticity. Erroneous identification might be caused by incomplete fragmentation, noise, and &#x0201C;isometric&#x0201D; peptides (Castellana and Bafna, <xref ref-type="bibr" rid="B4">2010</xref>). To assess novel peptide identification, we used their scoring and ranking of the match of experimental and theoretical spectra in proteogenomic analysis based on different error-rate estimation methods (Zhang et al., <xref ref-type="bibr" rid="B47">2015</xref>; Li et al., <xref ref-type="bibr" rid="B24">2017</xref>; Aggarwal et al., <xref ref-type="bibr" rid="B1">2022</xref>).</p>
<p>There is a rare application for improving continuous and complementary ion fragments in novel peptide evaluation. Huesgen et al. and Yang et al. found that mirror proteases, trypsin, and lysargiNase can generate a deeper <italic>b</italic>/<italic>y</italic> ion coverage because of their C- and N-terminal digestion characteristics, which allowed precision <italic>De Novo</italic> peptide sequencing in the large-scale proteome. In contrast with LysC and LysN (Raijmakers et al., <xref ref-type="bibr" rid="B33">2010</xref>) and other mirror proteases (Fossati et al., <xref ref-type="bibr" rid="B9">2021</xref>), trypsin and lysargiNase have higher digesting specificity and generate more peptides with less complexity, providing a greater opportunity to increase proteomic coverage.</p>
<p>In addition, we found that in-house developed proteases, acelyted Trypsin (Ac-Trypsin; Wu et al., <xref ref-type="bibr" rid="B41">2016</xref>) and LysargiNase (Ac-LysargiNase; Zhang et al., <xref ref-type="bibr" rid="B46">2019</xref>), demonstrate lower self-digestion, superior stability, and higher activity than their recombinational and commercial products, whether in simple protein substrate or quantitative proteomics studies.</p>
<p>In this study, we proposed mirror proteases, Ac-Trypsin and Ac-LysargiNase, which can rapidly and accurately select and assess novel peptides by dramatically improving both <italic>b</italic> and <italic>y</italic> ion coverage. Compared with 65&#x02013;68% <italic>b</italic> or <italic>y</italic> ion coverage from a single protease, complimentary use of Ac-Trypsin and Ac-LysargiNase increased both <italic>b</italic> and <italic>y</italic> ion coverage to 75&#x02013;80%, which allowed us to obtain 368 novel peptides with high-quality spectra in <italic>Mycolicibacterium smegmatis</italic> MC<sup>2</sup> 155. We used these veritable novel peptides to correct six recorded proteins&#x00027; N-termini and identify 17 novel ORFs. Besides improving gene reannotation on <italic>M. smegmatis</italic> MC<sup>2</sup> 155, the conserved homology of novel ORFs can also be reannotated in other closely related genetic species in the family <italic>Mycobacteriaceae</italic>.</p></sec>
<sec sec-type="materials and methods" id="s2">
<title>Materials and methods</title>
<sec>
<title>Strain culture and protein sample preparation</title>
<p><sup><bold>1</bold>5</sup>NH<sub>4</sub>Cl was purchased from Cambridge Isotope Laboratories, Andover, MA, United States. <italic>M. smegmatis</italic> MC<sup>2</sup> 155 was cultured in an M9 minimal medium with <sup>15</sup>N metabolite labeling described previously (Zhu et al., <xref ref-type="bibr" rid="B49">2022</xref>). Briefly, the <sup>14</sup>NH<sub>4</sub>Cl- and <sup>15</sup>NH<sub>4</sub>Cl -labeled cells were equally mixed and disrupted with a Soniprep sonicator (Scientz, Ningbo, China) for 15 min (2 s-on, 4 s-off) at 30% amplitude. Supernatants were collected after centrifugation at 13,000 rpm for 15 min, and protein concentration was measured by a gel-assisted method as described previously (Zhang et al., <xref ref-type="bibr" rid="B48">2016b</xref>).</p></sec>
<sec>
<title>In-gel Ac-Trypsin and Ac-LysargiNase digestion</title>
<p>To achieve deep coverage of <italic>M. smegmatis</italic> proteome, we reduced 240 &#x003BC;g of proteins with 5 mM of dithiothreitol (DTT) and alkylated them with 20 mM of iodoacetamide (IAA). The alkylated proteins were split into two samples, separated by a 10% SDS-PAGE for 8 cm, and stained with Coomassie Blue G250. Two gel lanes were excised into 13 fractions based on the molecular weight (MW) and protein abundance and digested with Ac-Trypsin (Wu et al., <xref ref-type="bibr" rid="B41">2016</xref>; 12.5 ng/&#x003BC;L) and Ac-LysargiNase (Zhang et al., <xref ref-type="bibr" rid="B46">2019</xref>; 12.5 ng/&#x003BC;L) at 37&#x000B0;C for 12&#x02013;24 h. The extracted peptides were desalted with a homemade C<sub>18</sub> StageTip (Zhai et al., <xref ref-type="bibr" rid="B44">2013</xref>), dried, and dissolved in loading buffer (1% acetonitrile, ACN, and 1% formic acid, FA) for MS analysis.</p></sec>
<sec>
<title>LC-MS/MS analysis</title>
<p>The dissolved peptides (500 ng) were analyzed, as described previously. Briefly, the liquid chromatography-tandem mass spectrometry (LC-MS/MS) consisted of an EASY-nLC 1200 system (Thermo Fisher Scientific, San Jose, CA, United States) equipped with a self-packed capillary column (75 &#x003BC;m i.d. &#x000D7; 15 cm, 3 &#x003BC;m C<sub>18</sub> reversed-phase fused silica) coupled to an Orbitrap Fusion Lumos (Thermo Fisher Scientific). For full MS scans, the automatic gain control (AGC) was set at 5.0 &#x000D7; 10<sup>5</sup>. The scan ranged from 300 to 1,400 <italic>m/z</italic> at a resolution of 1.2 &#x000D7; 10<sup>5</sup> and a maximum injection time (MIT) of 50 ms. For the MS<sub>2</sub> scan, only spectra with a charge state of 2&#x02013;6 were selected for fragmentation by higher energy collision-induced dissociation (HCD) with a normalized collision energy of 32%, an AGC of 1 &#x000D7; 10<sup>5</sup>, and an MIT of 35 ms. The dynamic exclusion was set to 30 s.</p></sec>
<sec>
<title>Database construction and searching</title>
<p>The annotated protein database of <italic>M. smegmatis</italic> MC<sup>2</sup> 155 was downloaded from NCBI (NZ_CP009494, <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/genome/1026?genome_assembly_id=212020">https://www.ncbi.nlm.nih.gov/genome/1026?genome_assembly_id=212020</ext-link>), including 6,385 entries. Using pAnno (Zhang et al., <xref ref-type="bibr" rid="B47">2015</xref>) in pFind software, the six-frame database was constructed based on the complete genome sequence of MC<sup>2</sup> 155 according to a stop-to-stop translating strategy [183,120 entries with at least seven amino acids (AAs)]. Two MS/MS datasets digested with different proteases were analyzed using pFind (v 3.1.5; Chi et al., <xref ref-type="bibr" rid="B5">2018</xref>) to search against the annotated and six-frame databases. For the two digestion modes, trypsin and lysargiNase, a maximum of two missed cleavages was tolerated; 20 ppm was tolerated in MS and MS/MS search modes. Cysteine carbamidomethyl was set as a fixed modification, whereas acetylation of protein N-termini and methionine oxidation were added as variable modifications. A minimal peptide length of 7 AAs was required. The false discovery rates (FDR) for the peptide-spectrum match (PSM), peptide, and protein were set to &#x0003C; 1%. <sup>15</sup>N labeling mode was chosen for novel peptide verification.</p></sec>
<sec>
<title>Novel peptide selection and verification</title>
<p>The identified peptides were from three types: annotated proteins, novel ORFs, and N-terminal corrections according to the encoding gene position. The potential novel peptides mapping to the N-terminal extension of the annotated proteins and the unannotated regions were obtained and evaluated for their accuracy by &#x0201C;mirror proteases&#x0201D; digesting the evidence, Q-value, raw score, and <sup>14</sup>N and <sup>15</sup>N spectrum similarity. A Q-value was calculated based on the target-decoy approach. A raw score is the reliability of the PSM. <sup>14</sup>N and <sup>15</sup>N spectrum similarity was calculated by pBuild in pFind software, as described previously (Zhu et al., <xref ref-type="bibr" rid="B49">2022</xref>). In addition, novel peptides with higher, moderate, and lower scores were selected and synthesized for further verification. The spectrum similarity of original and synthesized peptides was checked. A Student&#x00027;s <italic>t</italic>-test was calculated by R (<sup>&#x0002A;&#x0002A;&#x0002A;</sup><italic>P</italic> &#x0003C; 0.001, <sup>&#x0002A;&#x0002A;</sup><italic>P</italic> &#x0003C; 0.01, <sup>&#x0002A;</sup><italic>P</italic> &#x0003C; 0.05).</p></sec>
<sec>
<title>Reanalysis of public RNA-seq and Ribo-seq datasets</title>
<p>The public RNA-seq (SRR17866681, 2-Feb-2022) and Ribo-seq (E-MTAB-2929, 1-Jan-2015; Shell et al., <xref ref-type="bibr" rid="B37">2015</xref>) datasets were downloaded from NCBI and ArrayExpress, respectively, and aligned to reference DNA sequence by HISAT2 (v 2.2.1; Kim et al., <xref ref-type="bibr" rid="B19">2019</xref>), and SAMtools (v 1.11; Li et al., <xref ref-type="bibr" rid="B22">2009</xref>). The expression of RNA-seq and Ribo-seq was calculated by Cufflinks (v2.2.1; Trapnell et al., <xref ref-type="bibr" rid="B40">2012</xref>) locally and visualized by IGV (v 2.11.1; Thorvaldsdottir et al., <xref ref-type="bibr" rid="B39">2013</xref>) and R (v 4.1.2).</p></sec>
<sec>
<title>N-terminal labeling and negative enrichment</title>
<p>A total of 150 &#x003BC;g proteins were reduced by 10 mM dithiothreitol (DTT) at 45&#x000B0;C for 60 min and alkylated by 20 mM IAA for 45 min at room temperature. Then, 40 mM formaldehyde (Sigma-Aldrich, Saint Louis, MO, United States) and 20 mM sodium cyanoborohydride (Sigma-Aldrich) were added to the above protein sample and incubated at 37&#x000B0;C overnight. A solution of 1 M Tris (pH 6.8) with a final concentration of 0.1 M was added to quench the reaction.</p>
<p>The dimethylated proteins were digested with Ac-Trypsin at a protein/enzyme ratio of 50:1 (w/w) at 37&#x000B0;C for 14 h. The high molecular weight dendritic hyperbranched polyglycerol-aldehydes (HPG-ALD, Vancouver, BC, Canada; Kleifeld et al., <xref ref-type="bibr" rid="B20">2010</xref>) polymer was added to the peptide sample. 20 mM sodium cyanoborohydride was immediately added and incubated overnight at 37&#x000B0;C. The naturally blocked and experimentally labeled N-terminal peptides were collected by ultrafiltration with the 30 kDa MWCO Amicon column (Sartorius, Gottingen, Germany).</p>
<p>The N-terminal peptide sample was fractionated by the RP-Tip (Reverse-Phase Tip; Ni et al., <xref ref-type="bibr" rid="B31">2019</xref>) with an increasing acetonitrile step-gradient (6, 9, 12, 15, 18, 21, 25, 30, 35, and 50%, pH = 10). These 10 fractions were combined into six samples (6 &#x0002B; 25%, 9 &#x0002B; 30%, 12 &#x0002B; 35%, 15%, 18%, and 21 &#x0002B; 50%) and dried for LC-MS/MS analysis.</p>
<p>Six raw files were searched against the six-frame database of <italic>M. smegmatis</italic> MC<sup>2</sup> 155 with pFind. The parameters of database searching were as follows: (1) a maximum of three missed cleavages were tolerated; (2) semi-specific cleavage was set; (3) 20 ppm was tolerated in MS and MS/MS search modes; (4) cysteine carbamidomethyl and dimethyl at lysine were set as fixed modification; (5) acetylation of any N-termini, dimethylation at any N-terminal, pyroglutamate at glutamate and glutamine, and oxidation of methionine were added as variable modifications; (6) A minimal peptide length of 7 AAs was required, and (7) the false discovery rates (FDR) for the peptide-spectrum match (PSM); peptide and protein were set to &#x0003C; 1%.</p></sec>
<sec>
<title>Correction of N-termini</title>
<p>The N-terminal-derived sequences were verified by a comparative genomic approach.</p>
<p>Firstly, the newly translated stop-to-stop protein sequences were compared with NCBI publicly annotated sequences using BLASTP. Secondly, the potential expression at the transcriptional and translational levels was checked by the public RNA-seq and Ribo-seq datasets of <italic>M. smegmatis</italic> MC<sup>2</sup> 155, respectively. Thirdly, the potential start codon was confirmed based on the Ribo-seq and direct evidence of N-terminal labeling with dimethylation.</p></sec>
<sec>
<title>Verification of novel ORFs</title>
<p>For novel ORFs, novel peptides were assessed by continuous <italic>b/y</italic> pairs and spectra quality. All potential novel encoding ORFs were rechecked in their annotating state with the publicly available annotated proteins in the NCBI database by BLASTP. The matched orthologous protein sequences were used to construct neighbor-joining (NJ) phylogenetic trees of the novel ORFs encoding proteins by MEGA software (v 10.1.8; Kumar et al., <xref ref-type="bibr" rid="B21">2018</xref>). To observe the conservation of novel ORFs, we used their nucleotide sequences to compare them with 207 reference strains in <italic>Mycobacteriaceae</italic> by local BLASTN analysis. Sequences sharing at least 60% sequence coverage and 70% identity with other genetically distant species belong to conserved genes and can also be used for reannotation for their homologous genes, as described previously (Gallien et al., <xref ref-type="bibr" rid="B10">2009</xref>).</p></sec></sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<sec>
<title>Experiment design</title>
<p>To achieve deep coverage of proteomics for <italic>M. smegmatis</italic> MC<sup>2</sup> 155, we separated two same samples by a 10% SDS-PAGE gel (8 cm) and digested by Ac-Trypsin (Wu et al., <xref ref-type="bibr" rid="B41">2016</xref>) and Ac-LysargiNase (Yang et al., <xref ref-type="bibr" rid="B42">2019</xref>; Zhang et al., <xref ref-type="bibr" rid="B46">2019</xref>) with high activity and stability (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 1</xref>). The extracted peptides were detected by an Orbitrap Fusion Lumos mass spectrometer under the same conditions. The raw files from different proteases were searched against the annotated database for evaluating protein coverage and the six-frame database for selecting novel peptides. To evaluate the contribution of mirror proteases to the accurate identification of novel peptides, we extracted the merging spectra from mirror peptides and calculated the coverage of the <italic>b</italic>/<italic>y</italic> ion series. Here, mirror peptides were the overlapped sequences digested simultaneously by Ac-Trypsin and Ac-LysargiNase. In addition, two or more unique peptides from the same novel events were also conserved because of different digesting characteristics. These novel peptides were further assessed by spectra score prediction and their <sup>15</sup>N-labeling spectra matching. All verified novel peptides were used for the genomic reannotation of <italic>M. smegmatis</italic> MC<sup>2</sup> 155 (<xref ref-type="fig" rid="F1">Figure 1</xref>).</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Workflow of this study. Our in-house mirror proteases, Ac-Trypsin and Ac-LysargiNase, were used to assess the authenticity of novel peptides in a large-scale proteogenomic study based on <sup>14</sup>N- and <sup>15</sup>N-labeling cells of <italic>M. smegmatis</italic> MC<sup>2</sup> 155. The verified novel peptides were further used for novel event analysis, including N-termini corrections and novel ORF identifications.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-13-1015140-g0001.tif"/>
</fig></sec>
<sec>
<title>Large-scale mirror proteome of <italic>M. smegmatis</italic> by Ac-Trypsin and Ac-LysargiNase</title>
<p>In <xref ref-type="supplementary-material" rid="SM2">Supplementary Figure 2</xref>, Ac-Trypsin and Ac-LysargiNase showed higher specificity with more than 95% based on the annotated protein database searching, which also resulted in the identification of more fully digested peptides. In Ac-Trypsin digestion datasets, 47,324 peptides were detected from 4,442 protein groups and 4,102 unique proteins (<xref ref-type="supplementary-material" rid="SM2">Supplementary Table 1</xref>). In the Ac-LysargiNase datasets, 31,551 peptides were from 3,871 protein groups and 3,602 unique proteins (<xref ref-type="supplementary-material" rid="SM3">Supplementary Table 2</xref>). The average ratio of the PSM count per peptide and peptides per protein group was more than three and eight (<xref ref-type="fig" rid="F2">Figure 2A</xref>). Among these peptides, 23,175 (40.94%) were mirror peptides, 24,141 unique peptides were from Ac-Trypsin, and 8,748 peptides from Ac-LysargiNase digests (<xref ref-type="fig" rid="F2">Figure 2B</xref>, <xref ref-type="supplementary-material" rid="SM4">Supplementary Table 3</xref>). At the protein level, 3,478 (78.46%) were shared in two different datasets (<xref ref-type="fig" rid="F2">Figure 2C</xref>), indicating that a larger complement and verification datasets were obtained. The number of non-redundant proteins increased quite steeply and fast with the addition of gel fraction, suggesting the high separation resolution of the gel we used in this study (<xref ref-type="fig" rid="F2">Figure 2D</xref>, <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 1</xref>). The total number of unique proteins rose to 4,102 and 3,602 for the Ac-Trypsin and Ac-LysargiNase datasets, which accounted for 64.24 and 56.41% of the total annotated proteins for <italic>M. smegmatis</italic> MC<sup>2</sup> 155. In addition, the average protein sequence coverage increased from 27.59% of Ac-LysargiNase and 39.15% of Ac-Trypsin to 42.99% when two separation methods were introduced (<xref ref-type="fig" rid="F2">Figure 2E</xref>, <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 3</xref>), indicating a high overlap region of the sequenced peptides and high specificity as well as activity of both proteases we applied in this study.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Mirror proteases helpfully improved the proteome coverage of <italic>M. smegmatis</italic> MC<sup>2</sup> 155 based on the annotated database searching. <bold>(A)</bold> The application of Ac-Trypsin and Ac-LysargiNase in proteomics of <italic>M. smegmatis</italic> MC<sup>2</sup> 155. Comparison of the identified peptides <bold>(B)</bold> and proteins <bold>(C)</bold> by Ac-Trypsin and Ac-LysargiNase. <bold>(D)</bold> Protein identification saturation using gel-separation methods by Ac-Trypsin and Ac-LysargiNase. <bold>(E)</bold> Comparison of the protein sequence coverage by single protease and combined mirror proteases. <bold>(F)</bold> The proportion of the N-terminal and C-terminal spectra in each fraction from Ac-Trypsin and Ac-LysargiNase digests. <bold>(G)</bold> Comparison of the ion coverages from single protease and combined mirror proteases for annotated peptides. Statistically significant differences by student&#x00027;s <italic>t</italic>-test are indicated for <italic>p</italic> values of <sup>&#x0002A;</sup><italic>p</italic> &#x0003C; 0.05, <sup>&#x0002A;&#x0002A;</sup><italic>p</italic> &#x0003C; 0.01, and <sup>&#x0002A;&#x0002A;&#x0002A;</sup><italic>p</italic> &#x0003C; 0.001.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-13-1015140-g0002.tif"/>
</fig>
<p>Due to the cleavage characteristics of proteases, Ac-Trypsin and Ac-LysargiNase digested proteins included significantly more N-terminal and C-terminal peptides, respectively (<xref ref-type="fig" rid="F2">Figure 2F</xref>, <italic>P</italic> &#x0003C; 0.001; Huesgen et al., <xref ref-type="bibr" rid="B16">2015</xref>). The spectra from these Ac-Trypsin and Ac-LysargiNase digesting peptides had strong <italic>y</italic> and <italic>b</italic> series ions, which led to different <italic>y</italic> and <italic>b</italic> ion coverages for both Ac-Trypsin and Ac-LysargiNase, respectively. The medium of <italic>b</italic> and <italic>y</italic> ion coverages was 29.70 and 74.69% for the Ac-Trypsin spectra, respectively, and 69.37 and 37.01% for the Ac-LysargiNase spectra. In comparison, both <italic>y</italic> and <italic>b</italic> ion coverages were improved to &#x0003E;80% when we combined two proteases (<xref ref-type="fig" rid="F2">Figure 2G</xref>, <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 3</xref>). We further investigated <italic>b</italic> and <italic>y</italic> ion coverages at different positions of all identified peptides. We found that Ac-Trypsin and Ac-LysargiNase spectra could provide complementary ions for whole peptides, including N-terminal, middle, and C-terminal AAs (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 4</xref>). These results support the high quality and deep coverage of <italic>M. smegmatis</italic> proteome generated by mirror proteases, providing verifiable MS evidence and an opportunity to identify novel peptides.</p></sec>
<sec>
<title>Ac-Trypsin and Ac-LysargiNase efficiently identified novel peptides</title>
<p>After the six-frame database search, 845 and 521 novel peptides were identified from Ac-Trypsin and Ac-LysargiNase digests (<xref ref-type="fig" rid="F3">Figure 3A</xref>, <xref ref-type="supplementary-material" rid="SM5">Supplementary Tables 4&#x02013;6</xref>). The average ratios for the number of PSMs per novel peptide and novel peptides per protein group were 1.78 and 1.22, which were significantly lower than those of annotated gene products for either Ac-Trypsin or Ac-LysargiNase digested proteome samples. In total, 126 and 119 peptides from 845 Ac-Trypsin and 521 Ac-LysargiNase digests were mirror peptides, respectively. Among them, 111 truncated sequences have the same AAs consistently from Ac-Trypsin and Ac-LysargiNase digests, excepting N- or C-terminal AAs (<xref ref-type="supplementary-material" rid="SM7">Supplementary Table 6</xref>, <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 5</xref>), which resulted in the identification of 74 shared proteins (<xref ref-type="fig" rid="F3">Figure 3B</xref>).</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Mirror proteases efficiently identified credible novel peptides. <bold>(A)</bold> The proteogenomic identification is based on the six-frame database of <italic>M. smegmatis</italic> MC<sup>2</sup> 155. <bold>(B)</bold> Venn diagram of the proteins identified from the Ac-Trypsin and Ac-LysargiNase datasets. <bold>(C)</bold> Comparison of the ion coverages from single protease and combined mirror proteases for novel peptides. Comparison of the <italic>Q</italic>-value <bold>(D)</bold> and raw score <bold>(E)</bold> spectra with single protease and mirror proteases digesting evidence from the Ac-Trypsin and Ac-LysargiNase datasets.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-13-1015140-g0003.tif"/>
</fig>
<p>As annotated peptides, Ac-Trypsin and Ac-LysargiNase mirror spectra provided complementary <italic>b</italic> and <italic>y</italic> ions with about 80% coverage for novel peptides compared with those of Ac-Trypsin (<italic>b</italic>-ion, 37.88%; <italic>y</italic>-ion, 68.25%) or Ac-LysargiNase (<italic>b</italic>-ion, 65.49%; <italic>y</italic>-ion, 39.60%; <xref ref-type="fig" rid="F3">Figure 3C</xref>). To further evaluate the accuracy of these novel peptides, we compared the <italic>Q</italic>-value and raw score of all spectra with single protease and mirror protease evidence. In <xref ref-type="fig" rid="F3">Figure 3D</xref>, the average <italic>Q</italic>-value of spectra for mirror peptides was 4.91 times lower than that from Ac-Trypsin (single protease, 0.0035; mirror proteases, 0.0007) and 4.22 times lower than that from Ac-LysargiNase (<italic>t</italic>-test <italic>P</italic> &#x0003C; 0.0001; single protease, 0.0044; mirror proteases, 0.0010). However, the average raw score of spectra with two proteases digesting evidence was 1.49 times higher than that from Ac-Trypsin (single protease, 6.79; mirror proteases, 10.12) and 1.41 times higher than that from Ac-LysargiNase (<italic>t</italic>-test <italic>P</italic> &#x0003C; 0.0001; single protease, 6.46; mirror proteases, 9.14, <xref ref-type="fig" rid="F3">Figure 3E</xref>). These results strongly supported that the novel peptides digested by mirror proteases were more confident than those from the single protease digested samples.</p>
<p>A total of 368 novel mirror peptides were observed and used for proteogenomic analysis. According to the genomic position, these 368 novel peptides were identified from two novel events. Among them, 145 peptides were localized to the N-termini extension regions of 40 annotated genes, and the other 223 peptides from 34 ORFs were in the un-annotating regions of <italic>M. smegmatis</italic> MC<sup>2</sup> 155 (<xref ref-type="supplementary-material" rid="SM7">Supplementary Table 6</xref>, <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 6A</xref>). To evaluate the annotation of these novel peptides, we compared their protein sequences to those in the NCBI nr database of other <italic>M. smegmatis</italic> stains by BLASTP. Results showed that as many as 126 novel peptides from N-termini extension and 185 from novel ORFs were annotated in other annotating versions or the other strains of <italic>M. smegmatis</italic>, implying that more than 80% of novel peptides were from encoding regions of other <italic>M. smegmatis</italic> known strains. The remaining 19 and 38 peptides (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figures 6B,C</xref>) corresponded to six newly calibrated N-terminus and 17 newly identified ORFs (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figures 6D,E</xref>).</p>
<p>Among the above mentioned 19 N-terminal novel peptides, 12 sequences were mirror peptides, and the other seven peptides were different products digested by mirror proteases (<xref ref-type="supplementary-material" rid="SM8">Supplementary Table 7</xref>). Among the other 38 novel peptides, 14 sequences belong to mirror peptides, one lysargiNase digested sequence was overlapped with another non-fully tryptic peptide, and the other 22 peptides were different products from mirror proteases digests (<xref ref-type="supplementary-material" rid="SM9">Supplementary Table 8</xref>). Complementary use of both proteases also increased <italic>b</italic>/<italic>y</italic> ion coverage and a sequence coverage of novel events (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figures 6B,C</xref>). These results imply that mirror proteases provide a higher quality of novel peptides in genome reannotation.</p></sec>
<sec>
<title>Novel peptides from mirror proteases efficiently corrected N-termini of 6 annotated proteins</title>
<p>In the 19 novel N-terminal peptides from six annotated proteins identified from the complementary use of both proteases (<xref ref-type="fig" rid="F4">Figure 4A</xref>, <xref ref-type="supplementary-material" rid="SM8">Supplementary Table 7</xref>, <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 6B</xref>), one ORF contained eight novel peptides, 1 ORF contained three peptides, and the other four ORFs each contained two peptides. Among them, 12 novel peptides were detected with essentially identical <sup>14</sup>N and <sup>15</sup>N spectra (cosin &#x0003E;0.9; <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 7</xref>). For orf|0|-|626711-627743| and orf|0|&#x0002B;|3349519-3351547|, novel peptides were distributed in front of the start codon GTG (V) and ATG (M) of previously annotated genes. For the other four ORFs, novel peptides covered the area of the originally erroneous start codons. Both mirror N-terminal novel sequences and <sup>14</sup>N/<sup>15</sup>N double labeling spectra confirmed the authenticity of the N-termini correction for six recorded genes by Ac-Trypsin and Ac-LysargiNase (<xref ref-type="fig" rid="F4">Figure 4A</xref>).</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>N-termini correction of 6 annotated proteins. <bold>(A)</bold> Novel peptides distribution in N-terminal extension regions. Peptide labeled star stands for <sup>14</sup>N and <sup>15</sup>N labeling spectra identification. The confirmed spectra of N-terminal peptides with <sup>14</sup>N and <sup>15</sup>N labeling forms were derived from the Ac-Trypsin <bold>(B)</bold> and Ac-LysargiNase <bold>(C)</bold> datasets, respectively. <bold>(D)</bold> The spectra of an N-termini labeled peptide with dimethyl modification from our N-terminomic dataset.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-13-1015140-g0004.tif"/>
</fig>
<p>For example, both peptides (L)SVDTAAPSAVELAGAVTEQLR and (L)SVDTAAPSAVELAGAVTEQL(R) contained as many as 21 AA residues. These two peptides covered the original start codon GTG (V) of the annotated protein WP_011729667.1 for polyprenyl synthetase family protein (<xref ref-type="fig" rid="F4">Figures 4B,C</xref>). As these two peptides are quite long, none of them were sequenced with either full <italic>y</italic> series or full <italic>b</italic> series product ions. Complementary with both Ac-Trypsin and Ac-LysargiNase, the sequences of all AA residues were confirmed with at least one product ion. These results proved the correctness and uniqueness of the peptide we identified. In addition to <sup>14</sup>N-labeling spectra, their corresponding <sup>15</sup>N-labeling forms were also identified as high quality. The <sup>14</sup>N and <sup>15</sup>N spectra were essentially the same as the cosin values of similarity (&#x02248;1.00) from two peptide pairs generated from Ac-Trypsin and Ac-LysargiNase. Further, we even identified one dimethyl-labeled N-termini peptide started with serine residue with a high-quality spectrum by a dimethylation labeling combined with a negative enrichment strategy (<xref ref-type="fig" rid="F4">Figure 4D</xref>), <sup><bold>&#x0002A;</bold></sup>SVDTAAPSAVELAGAVTEQLR, which included the previously erroneous codon-terminal valine. These results strongly support that the true start codon was CTG for leucine in front of the dimethyl-labeled serine residue considering the characteristics of aminopeptidase (<bold>L</bold><sup><bold>&#x0002A;</bold></sup>S<bold>V</bold>DTAAPSAVELAGAVTEQLR, <xref ref-type="fig" rid="F4">Figure 4D</xref>, <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 7</xref>).</p>
<p>In addition to mirror peptides, the novel N-terminal events were efficiently confirmed with different peptides identified from the Ac-Trypsin, and Ac-LysargiNase digested proteome. In the case of orf|0|&#x0002B;|3349519-3351547|, we identified two fully digested peptides, (R)GADHDVLR with three continuous <italic>b</italic>/<italic>y</italic> pairs and (L)RVQPDVGEL(R) with five <italic>b</italic>/<italic>y</italic> pairs, in N-terminal extension region of the recorded protein WP_011728939.1 for ABC transporter ATP-binding protein. Although its N-termini cannot be determined, the true start codon and a potential reason for this mis-annotation are worth elucidating.</p>
<p>In this study, we found erroneous N-termini in six annotated genes. Among them, three were previously annotated at the canonical start codon ATG, whereas the other three were at the non-canonical start codon GTG. Gallien et al. previously found overprediction of ATG and other non-canonical codons as the start codon using a gene prediction program by TMPP labeling strategy in <italic>M. smegmatis</italic> MC<sup>2</sup> 155 (Gallien et al., <xref ref-type="bibr" rid="B10">2009</xref>). Kelkar et al. and we also observed similar phenomena in <italic>M. tuberculosis</italic> (Kelkar et al., <xref ref-type="bibr" rid="B18">2011</xref>; Shi et al., <xref ref-type="bibr" rid="B38">2022</xref>). Other optimized multi-omic technologies, such as Ribo-RET (Meydan et al., <xref ref-type="bibr" rid="B29">2019</xref>; <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 7</xref>) and RNA-seq, also provided corresponding expression evidence (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 8</xref>), which further supported our finding through the complementary use of Ac-Trypsin and Ac-LysargiNase for deep coverage proteomics. On the other hand, as shown in <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 7</xref>, deep N-terminal proteomics would be the direct evidence for confirming the starting sites of encoding ORFs.</p></sec>
<sec>
<title>Mirror proteases helped in the identification of 17 novel ORFs</title>
<p>Of the identified 17 novel ORFs with 38 new peptides, six ORFs were detected with mirror peptides, which occupied more than 35% of all novel ORFs identified in this study. Among them, one ORF contained four novel peptides, two ORF contained three peptides, and the other 14 ORFs contained two peptides, indicating the high credibility of these new genes (<xref ref-type="fig" rid="F5">Figure 5A</xref>, <xref ref-type="supplementary-material" rid="SM9">Supplementary Table 8</xref>). All the novel mirror peptides identified from these novel ORFs were specifically digested products. Among them, four tryptic peptides could also be identified from Blackburn (Potgieter et al., <xref ref-type="bibr" rid="B32">2016</xref>) and Blackburn&#x00027;s (Giddey et al., <xref ref-type="bibr" rid="B12">2017</xref>) datasets after the six-frame database research (<xref ref-type="supplementary-material" rid="SM9">Supplementary Table 8</xref>, <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 10</xref>), further confirming the credibility of these identifications in this study. Surprisingly, we even noticed that orf|0|&#x0002B;|813403-813805| showed quite a high abundance, ranking top 163 of all identified 6,596 genes at the mRNA level (<xref ref-type="fig" rid="F5">Figure 5B</xref>, <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 12</xref>). While most of the novel ORFs had much low transcription abundance, which may suggest the deep coverage of the mirror proteome datasets. The reason for the missing annotation remains to be studied.</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>Verification of 17 novel ORFs. <bold>(A)</bold> novel peptide distribution in non-coding regions. Peptide labeled star stands for <sup>14</sup>N and <sup>15</sup>N labeling spectra identification. <bold>(B)</bold> FPKM rank of annotated and novel ORFs based on a public RNA-seq dataset. <bold>(C)</bold> Distribution of novel peptides digested by Ac-Trypsin and Ac-LysargiNase in orf|0|&#x0002B;|1060315-1061149|. The underlined sequences were identified as peptides derived from Ac-Trypsin and Ac-LysargiNase digestion in this study. The blue and orange arrows indicate the Ac-Trypsin and Ac-LysargiNase cleavage sites, respectively. The credible spectra of two different peptides, (R)DVAQVVGHQHGR and RDVAQVVGHQHG(R), from the Ac-Trypsin <bold>(D)</bold> and Ac-LysargiNase <bold>(E)</bold> dataset.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-13-1015140-g0005.tif"/>
</fig>
<p>Mirror proteases also helped identify novel unannotated ORFs with high confidence. For example, four fully digested peptides were derived from the novel protein orf|0|&#x0002B;|1060315-1061149|, including a pair of mirror peptides, (R)DVAQVVGHQHGR and RDVAQVVGHQHG(R), and other two tryptic peptides, (R)DLAGLGVGR and (R)DREHQTDPVDAQR&#x02014;with at least three continuous <italic>b</italic>/<italic>y</italic> ion pairs (<xref ref-type="fig" rid="F5">Figure 5C</xref>).</p>
<p>For mirror peptides, (R)DVAQVVGHQHGR and RDVAQVVGHQHG(R) were identified by 3 and 4 <italic>b</italic>/<italic>y</italic> ion pairs in Ac-Trypsin and Ac-LysargiNase digests (<xref ref-type="fig" rid="F5">Figures 5D,E</xref>). However, their meager theoretical peptide, RDVAQVVGHQHGR, five <italic>b</italic>/<italic>y</italic> ion pairs were efficiently matched with eight continuous <italic>b</italic> and <italic>y</italic> ions by combining Ac-LysargiNase and Ac-Trypsin digestion (<xref ref-type="fig" rid="F5">Figures 5D,E</xref>). In addition, the mirror peptide, RDVAQVVGHQHG(R), was identified with identical <sup>14</sup>N and <sup>15</sup>N spectra (cosin &#x02248; 1.00). The sequenced AAs were 34 and accounted for 12.2% of the total stop-to-stop translating protein sequence. Most importantly, public Ribo-seq observed the confirmed translation initiation site (TIS) reads (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 9</xref>) at the first leucine in orf|0|&#x0002B;|1060315-1061149| protein sequence, implying CTA (L) might be its start codon. The above evidence not only supported the confident identification but also indicated the N-termini of this novel ORF.</p>
<p>In one extreme case, our datasets detected two different peptides with identical <sup>14</sup>N and <sup>15</sup>N spectra from the novel gene orf|0|&#x0002B;|813403-813805|. The fully tryptic peptide (R)VATPGDSDASAQIEGLR was also identified from the Blackburn (Giddey et al., <xref ref-type="bibr" rid="B12">2017</xref>) dataset, which is the mirror peptide of RVATPGDSDASAQIEGL(R) from our dataset. In addition, two other fully tryptic peptides, (R)LQQESEAFR and (R)WVTVVADALNSASSSGR, were detected in Blackburn&#x00027;s (Potgieter et al., <xref ref-type="bibr" rid="B32">2016</xref>) and Blackburn (Giddey et al., <xref ref-type="bibr" rid="B12">2017</xref>) work (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figures 11A&#x02013;C</xref>). Even more importantly, we successfully identified its N-terminal peptide (V)AAAGLAWAVSR with dimethyl modification at the first alanine in our demethylation-labeling N-terminomic dataset (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 11D</xref>) and observed its expression signal at its TIS regions in a public Ribo-seq dataset, implying that the true start codon was GTC (V) in front of an N-terminal peptide and V might be removed by aminopeptidase (Gonzales and Robert-Baudouy, <xref ref-type="bibr" rid="B14">1996</xref>). Thus, we efficiently identified N- and C-terminal peptides of orf|0|&#x0002B;|813403-813805| and confirmed its encoding sequence was 99 AAs. In total, six unique peptides were accurately identified from this novel small ORF, which covered 71.72% of its protein sequence. Together with the high transcription abundance, these results suggest the high abundance of this small novel ORF in <italic>M. smegmatis</italic> MC<sup>2</sup> 155 proteome, which might play an important role in the biological process.</p>
<p>Unlike lysargiNase, trypsin-generated N-terminal peptides had C-terminal lysine or arginine residues. We identified 3 N-terminal peptides, (<sup>&#x0002A;</sup>)AVDVQHASVAVGARAALGAHR and (<sup>&#x0002A;</sup>)AVDVQHASVAVGA(R) from orf|0|-|4966072-4966744| with 224 AAs, and (<sup>&#x0002A;</sup>)PLVAPHPGDPAVVVGTLAAVQVEYVR from orf|0|&#x0002B;|1852179-1853043| with 288 AAs. (<sup>&#x0002A;</sup>)AVDVQHASVAVGA(R) was an uncompleted mirror peptide of (<sup>&#x0002A;</sup>)AVDVQHASVAVGARAALGAHR. These two different N-terminal peptides were derived from Ac-Trypsin, and Ac-LysargiNase digests, respectively. For novel ORF orf|0|&#x0002B;|1852179-1853043|, the other fully digested peptide RLGADEGIGLG(R) was also identified in Ac-LysargiNase digests. This spectra evidence supported the highly confident identification of novel ORFs and indicated the application of mirror proteases in genome re-annotation, even for unannotated ORFs.</p></sec>
<sec>
<title>Homologous genes of novel ORFs did not annotate in other recorded strains</title>
<p>To investigate the sequence similarity of 17 novel ORFs, we compared their protein sequences with publicly sequenced and annotated strains by BLASTP in the NCBI reference database. Among these ORFs, 6 ORFs matched other annotated proteins with more than 50% coverage and 40% identity, six ORFs only matched less sequence with lower similarity, and five did not show any homology with recorded proteins in NCBI databases (<xref ref-type="supplementary-material" rid="SM9">Supplementary Table 8</xref>). Among six higher similarity ORFs, orf|0|&#x0002B;|4368726-4369110| might be a 2-oxoacid dehydrogenases acyltransferase family protein, orf|0|&#x0002B;|3841204-3842056| might be an ion transporter based on the function annotation, while other four ORFs were for hypothetical proteins.</p>
<p>Further BLASTN analysis of these novel ORFs found that 13 ORFs showed higher coverage ranging from 95 to 100% and similarity ranging from 85.71 to 93.57% with their ortholog genes in <italic>Mycolicibacterium goodii</italic>. Although 13 novel ORFs showed higher homology with those of genetically closer species, they have not been annotated in these stains by far.</p>
<p>The other 4 ORFs showed lower identity with NCBI sequenced strains. The orf|0|-|1217294-1218644| had 100% sequence coverage and 78.06% identity with <italic>Nocard ioides okcheonensis</italic> MMS20-HV4-12, the orf|0|&#x0002B;|5885997-5886810| had 80% sequence coverage and 67.71% identity with <italic>Mycobacterium fortuitum</italic> subsp. <italic>fortuitum</italic> DSM 46621, while orf|0|&#x0002B;|813403-813805| and orf|0|-|5450187-5451333| showed very low coverage and identity with those from the public strains (<xref ref-type="supplementary-material" rid="SM9">Supplementary Tables 8</xref>, <xref ref-type="supplementary-material" rid="SM10">9</xref>, <xref ref-type="supplementary-material" rid="SM1">Supplementary Figures 13A&#x02013;D</xref>).</p>
<p>Among 17 novel ORFs, four genes were highly conserved in almost all type strains of 5 genera from the family <italic>Mycobacteriaceae</italic>, including orf|0|&#x0002B;|3841204-3842056|, orf|0|&#x0002B;|1060315-1061149|, orf|0|&#x0002B;|4368726-4369110|, and orf|0|&#x0002B;|5658318-5659182|, implying that these four novel ORFs may be <italic>M. smegmatis</italic> species-specific genes.</p></sec></sec>
<sec sec-type="discussion" id="s4">
<title>Discussion</title>
<p>Accurate identification of novel peptides in proteogenomic is hindered by inflated databases, whether in genomic reannotation (Mitchell et al., <xref ref-type="bibr" rid="B30">2018</xref>; Martinez et al., <xref ref-type="bibr" rid="B27">2020</xref>; Yu et al., <xref ref-type="bibr" rid="B43">2021</xref>) or mutated peptides in precision medicine research (Gao et al., <xref ref-type="bibr" rid="B11">2019</xref>; Gillette et al., <xref ref-type="bibr" rid="B13">2020</xref>; Cao et al., <xref ref-type="bibr" rid="B3">2021</xref>; Satpathy et al., <xref ref-type="bibr" rid="B36">2021</xref>). Various FDR filtering strategies have been proposed and applied to evaluate the authority of novel peptides (Li et al., <xref ref-type="bibr" rid="B23">2016</xref>, <xref ref-type="bibr" rid="B24">2017</xref>), while there are still many challenges in identifying and verifying novel peptides in large-scale proteogenomic studies (Aggarwal et al., <xref ref-type="bibr" rid="B1">2022</xref>). In this study, we proposed using mirror proteases of Ac-Trypsin and Ac-LysargiNase to create complementary <italic>b</italic>/<italic>y</italic> ion pairs of identified peptides to identify unannotated gene products confidently. Based on these mirror spectrum characteristics, Ac-Trypsin and Ac-LysargiNase provide strong evidence for novel peptides in proteogenomic analysis, which can be applied to all prokaryotic and eukaryotic samples.</p>
<p>As described previously (Huesgen et al., <xref ref-type="bibr" rid="B16">2015</xref>), the median coverages of the <italic>b</italic> and <italic>y</italic> series of ions were only 47&#x02013;69% in Ac-LysargiNase or Ac-Trypsin digests, whereas the coverage of both <italic>b</italic> and <italic>y</italic> ions from the combination of Ac-LysargiNase and Ac-Trypsin was increased to 75&#x02013;84%, which provided extensive evidence for accurately and confidently identifying novel peptides in <italic>M. smegmatis</italic> MC<sup>2</sup> 155. The same strategy can be applied in other proteogenomic studies to accurately and confidently identify unannotated genes missing from the traditional genomics study.</p>
<p>To confirm the accuracy of novel mirror peptides, we performed <sup>14</sup>N and <sup>15</sup>N metabolic labeling in cell culture to provide identical light and heavy spectra in one experiment. Results showed that each sequence of novel mirror peptides provided identical <sup>14</sup>N and <sup>15</sup>N-labeling spectra with a similarity score of cosin &#x0003E;0.90. To further assess the authenticity of these novel peptides from novel genes or gene events, we used dimethylation labeling technology, peptide synthesis, and public Ribo-seq datasets to check the N-termini of corrected annotated genes or novel ORFs. We reanalyzed public MS datasets for detecting other peptides derived from novel ORFs and compared the expression abundance at the mRNA level. All of these data supported the reliability of these novel events based on our novel mirror peptides.</p>
<p>In a word, the pair of mirror proteases, Ac-Trypsin and Ac-LysargiNase, helped in novel peptide identification and verification, which could be widely used for rapidly and precisely finding unannotated sequence variants in proteogenomic studies.</p>
</sec>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The raw MS/MS data generated in this study were deposited into iProX (<ext-link ext-link-type="uri" xlink:href="https://www.iprox.org/">https://www.iprox.org/</ext-link>) (Ma et al., <xref ref-type="bibr" rid="B26">2019</xref>) with the identifier IPX0003644000 (Ac-Trypsin), IPX0003886000 (Ac-LysargiNase), and IPX0004773001 (N-terminal enrichment). The datasets presented in this study can be found in online <xref ref-type="supplementary-material" rid="SM1">Supplementary materials</xref>.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>PX, YZ, and ZL conceived and designed the experiments. YZ performed <sup>15</sup>N labeling and proteome sample preparation. JS performed an N-terminal enrichment experiment. YL and GW performed MS data detection. SJ performed data analysis with the assistance of ZZ, LC, WW, LY, ED, and LZ. SJ, YZ, and PX wrote the manuscript with the help of all authors. All authors have read and approved the manuscript.</p>
</sec>
<sec sec-type="funding-information" id="s7">
<title>Funding</title>
<p>This study was supported by the Chinese National Basic Research Programs (2020YFE0202200, 2017YFA0505002, and 2017YFA0505700), the National Science Foundation (31901037, 32141003, 31870824, 32071431, and 32070668), the Beijing-Tianjin-Hebei Basic Research Cooperation Project (J200001), the Innovation Foundation of Medicine (AWS17J008, 20SWAQX34, and 19SWAQ17), the Foundation of State Key Laboratory of Proteomics (SKLP-KY201901), and the CAMS Innovation Fund for Medical Sciences (2019-I2M-5-017).</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s8">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
</body>
<back>
<ack><p>We are grateful to Yuping Xie at the Beijing Proteome Research Center for instrument support and peptide synthesis.</p>
</ack><sec sec-type="supplementary-material" id="s9">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fmicb.2022.1015140/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fmicb.2022.1015140/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.PDF" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_1.XLSX" id="SM2" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_2.XLSX" id="SM3" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_3.XLSX" id="SM4" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_4.XLSX" id="SM5" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_5.XLSX" id="SM6" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_6.XLSX" id="SM7" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_7.XLSX" id="SM8" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_8.XLSX" id="SM9" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_9.XLSX" id="SM10" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Aggarwal</surname> <given-names>S.</given-names></name> <name><surname>Raj</surname> <given-names>A.</given-names></name> <name><surname>Kumar</surname> <given-names>D.</given-names></name> <name><surname>Dash</surname> <given-names>D.</given-names></name> <name><surname>Yadav</surname> <given-names>A. K.</given-names></name></person-group> (<year>2022</year>). <article-title>False discovery rate: the Achilles&#x00027; heel of proteogenomics</article-title>. <source>Brief. Bioinform.</source> <volume>2022</volume>:<fpage>bbac163</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbac163</pub-id><pub-id pub-id-type="pmid">35534181</pub-id></citation></ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ang</surname> <given-names>M. Y.</given-names></name> <name><surname>Low</surname> <given-names>T. Y.</given-names></name> <name><surname>Lee</surname> <given-names>P. Y.</given-names></name> <name><surname>Wan Mohamad Nazarie</surname> <given-names>W. F.</given-names></name> <name><surname>Guryev</surname> <given-names>V.</given-names></name> <name><surname>Jamal</surname> <given-names>R.</given-names></name></person-group> (<year>2019</year>). <article-title>Proteogenomics: from next-generation sequencing (NGS) and mass spectrometry-based proteomics to precision medicine</article-title>. <source>Clin. Chim. Acta</source> <volume>498</volume>, <fpage>38</fpage>&#x02013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1016/j.cca.2019.08.010</pub-id><pub-id pub-id-type="pmid">31421119</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cao</surname> <given-names>L.</given-names></name> <name><surname>Huang</surname> <given-names>C.</given-names></name> <name><surname>Cui Zhou</surname> <given-names>D.</given-names></name> <name><surname>Hu</surname> <given-names>Y.</given-names></name> <name><surname>Lih</surname> <given-names>T. M.</given-names></name> <name><surname>Savage</surname> <given-names>S. R.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Proteogenomic characterization of pancreatic ductal adenocarcinoma</article-title>. <source>Cell</source> <volume>184</volume>, <fpage>5031</fpage>&#x02013;<lpage>5052</lpage> e5026. <pub-id pub-id-type="doi">10.1016/j.cell.2021.08.023</pub-id><pub-id pub-id-type="pmid">34534465</pub-id></citation></ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Castellana</surname> <given-names>N.</given-names></name> <name><surname>Bafna</surname> <given-names>V.</given-names></name></person-group> (<year>2010</year>). <article-title>Proteogenomics to discover the full coding content of genomes: a computational perspective</article-title>. <source>J. Proteomics</source> <volume>73</volume>, <fpage>2124</fpage>&#x02013;<lpage>2135</lpage>. <pub-id pub-id-type="doi">10.1016/j.jprot.2010.06.007</pub-id><pub-id pub-id-type="pmid">20620248</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chi</surname> <given-names>H.</given-names></name> <name><surname>Liu</surname> <given-names>C.</given-names></name> <name><surname>Yang</surname> <given-names>H.</given-names></name> <name><surname>Zeng</surname> <given-names>W. F.</given-names></name> <name><surname>Wu</surname> <given-names>L.</given-names></name> <name><surname>Zhou</surname> <given-names>W. J.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Comprehensive identification of peptides in tandem mass spectra using an efficient open search engine</article-title>. <source>Nat. Biotechnol.</source> 2018, nbt.4236. <pub-id pub-id-type="doi">10.1038/nbt.4236</pub-id><pub-id pub-id-type="pmid">30295672</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>de Souza</surname> <given-names>G. A.</given-names></name> <name><surname>Arntzen</surname> <given-names>M. O.</given-names></name> <name><surname>Fortuin</surname> <given-names>S.</given-names></name> <name><surname>Schurch</surname> <given-names>A. C.</given-names></name> <name><surname>Malen</surname> <given-names>H.</given-names></name> <name><surname>McEvoy</surname> <given-names>C. R.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>Proteogenomic analysis of polymorphisms and gene annotation divergences in prokaryotes using a clustered mass spectrometry-friendly database</article-title>. <source>Mol. Cell. Proteomics</source> <volume>10</volume>, <fpage>M110002527</fpage>. <pub-id pub-id-type="doi">10.1074/mcp.M110.002527</pub-id><pub-id pub-id-type="pmid">21030493</pub-id></citation></ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dong</surname> <given-names>L.</given-names></name> <name><surname>Lu</surname> <given-names>D.</given-names></name> <name><surname>Chen</surname> <given-names>R.</given-names></name> <name><surname>Lin</surname> <given-names>Y.</given-names></name> <name><surname>Zhu</surname> <given-names>H.</given-names></name> <name><surname>Zhang</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Proteogenomic characterization identifies clinically relevant subgroups of intrahepatic cholangiocarcinoma</article-title>. <source>Cancer Cell</source> <volume>40</volume>, <fpage>70</fpage>&#x02013;<lpage>87</lpage> e15. <pub-id pub-id-type="doi">10.1016/j.ccell.2021.12.006</pub-id><pub-id pub-id-type="pmid">34971568</pub-id></citation></ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ferrarotto</surname> <given-names>R.</given-names></name> <name><surname>Mitani</surname> <given-names>Y.</given-names></name> <name><surname>McGrail</surname> <given-names>D. J.</given-names></name> <name><surname>Li</surname> <given-names>K.</given-names></name> <name><surname>Karpinets</surname> <given-names>T. V.</given-names></name> <name><surname>Bell</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Proteogenomic analysis of salivary adenoid cystic carcinomas defines molecular subtypes and identifies therapeutic targets</article-title>. <source>Clin. Cancer Res.</source> <volume>27</volume>, <fpage>852</fpage>&#x02013;<lpage>864</lpage>. <pub-id pub-id-type="doi">10.1158/1078-0432.CCR-20-1192</pub-id><pub-id pub-id-type="pmid">33172898</pub-id></citation></ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fossati</surname> <given-names>A.</given-names></name> <name><surname>Richards</surname> <given-names>A. L.</given-names></name> <name><surname>Chen</surname> <given-names>K. H.</given-names></name> <name><surname>Jaganath</surname> <given-names>D.</given-names></name> <name><surname>Cattamanchi</surname> <given-names>A.</given-names></name> <name><surname>Ernst</surname> <given-names>J. D.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Toward comprehensive plasma proteomics by orthogonal protease digestion</article-title>. <source>J. Proteome Res.</source> <volume>20</volume>, <fpage>4031</fpage>&#x02013;<lpage>4040</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jproteome.1c00357</pub-id><pub-id pub-id-type="pmid">34319755</pub-id></citation></ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gallien</surname> <given-names>S.</given-names></name> <name><surname>Perrodou</surname> <given-names>E.</given-names></name> <name><surname>Carapito</surname> <given-names>C.</given-names></name> <name><surname>Deshayes</surname> <given-names>C.</given-names></name> <name><surname>Reyrat</surname> <given-names>J. M.</given-names></name> <name><surname>Van Dorsselaer</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>Ortho-proteogenomics: multiple proteomes investigation through orthology and a new MS-based protocol</article-title>. <source>Genome Res.</source> <volume>19</volume>, <fpage>128</fpage>&#x02013;<lpage>135</lpage>. <pub-id pub-id-type="doi">10.1101/gr.081901.108</pub-id><pub-id pub-id-type="pmid">18955433</pub-id></citation></ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gao</surname> <given-names>Q.</given-names></name> <name><surname>Zhu</surname> <given-names>H.</given-names></name> <name><surname>Dong</surname> <given-names>L.</given-names></name> <name><surname>Shi</surname> <given-names>W.</given-names></name> <name><surname>Chen</surname> <given-names>R.</given-names></name> <name><surname>Song</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Integrated proteogenomic characterization of HBV-related hepatocellular carcinoma</article-title>. <source>Cell</source> <volume>179</volume>, <fpage>561</fpage>&#x02013;<lpage>577</lpage> e522. <pub-id pub-id-type="doi">10.1016/j.cell.2019.08.052</pub-id><pub-id pub-id-type="pmid">31730861</pub-id></citation></ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Giddey</surname> <given-names>A. D.</given-names></name> <name><surname>de Kock</surname> <given-names>E.</given-names></name> <name><surname>Nakedi</surname> <given-names>K. C.</given-names></name> <name><surname>Garnett</surname> <given-names>S.</given-names></name> <name><surname>Nel</surname> <given-names>A. J.</given-names></name> <name><surname>Soares</surname> <given-names>N. C.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>A temporal proteome dynamics study reveals the molecular basis of induced phenotypic resistance in <italic>Mycobacterium smegmatis</italic> at sub-lethal rifampicin concentrations</article-title>. <source>Sci. Rep.</source> <volume>7</volume>, <fpage>43858</fpage>. <pub-id pub-id-type="doi">10.1038/srep43858</pub-id><pub-id pub-id-type="pmid">28262820</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gillette</surname> <given-names>M. A.</given-names></name> <name><surname>Satpathy</surname> <given-names>S.</given-names></name> <name><surname>Cao</surname> <given-names>S.</given-names></name> <name><surname>Dhanasekaran</surname> <given-names>S. M.</given-names></name> <name><surname>Vasaikar</surname> <given-names>S. V.</given-names></name> <name><surname>Krug</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Proteogenomic characterization reveals therapeutic vulnerabilities in lung adenocarcinoma</article-title>. <source>Cell</source> <volume>182</volume>, <fpage>200</fpage>&#x02013;<lpage>225</lpage> e235. <pub-id pub-id-type="doi">10.1016/j.cell.2020.06.013</pub-id><pub-id pub-id-type="pmid">32649874</pub-id></citation></ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gonzales</surname> <given-names>T.</given-names></name> <name><surname>Robert-Baudouy</surname> <given-names>J.</given-names></name></person-group> (<year>1996</year>). <article-title>Bacterial aminopeptidases: properties and functions</article-title>. <source>FEMS Microbiol. Rev.</source> <volume>18</volume>, <fpage>319</fpage>&#x02013;<lpage>344</lpage>. <pub-id pub-id-type="doi">10.1111/j.1574-6976.1996.tb00247.x</pub-id><pub-id pub-id-type="pmid">8703509</pub-id></citation></ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Herbst</surname> <given-names>F. A.</given-names></name> <name><surname>Goncalves</surname> <given-names>S. C. L.</given-names></name> <name><surname>Behr</surname> <given-names>T.</given-names></name> <name><surname>McIlroy</surname> <given-names>S. J.</given-names></name> <name><surname>Nielsen</surname> <given-names>P. H.</given-names></name></person-group> (<year>2019</year>). <article-title>Proteogenomic refinement of the <italic>Neomegalonema perideroedes</italic><sup>T</sup> genome annotation</article-title>. <source>Proteomics</source> <volume>19</volume>, <fpage>e1800330</fpage>. <pub-id pub-id-type="doi">10.1002/pmic.201800330</pub-id><pub-id pub-id-type="pmid">30865376</pub-id></citation></ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huesgen</surname> <given-names>P. F.</given-names></name> <name><surname>Lange</surname> <given-names>P. F.</given-names></name> <name><surname>Rogers</surname> <given-names>L. D.</given-names></name> <name><surname>Solis</surname> <given-names>N.</given-names></name> <name><surname>Eckhard</surname> <given-names>U.</given-names></name> <name><surname>Kleifeld</surname> <given-names>O.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>LysargiNase mirrors trypsin for protein C-terminal and methylation-site identification</article-title>. <source>Nat. Methods</source> <volume>12</volume>, <fpage>55</fpage>&#x02013;<lpage>58</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.3177</pub-id><pub-id pub-id-type="pmid">25419962</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jaffe</surname> <given-names>J. D.</given-names></name> <name><surname>Berg</surname> <given-names>H. C.</given-names></name> <name><surname>Church</surname> <given-names>G. M.</given-names></name></person-group> (<year>2004</year>). <article-title>Proteogenomic mapping i a complementary method to perform genome annotation</article-title>. <source>Proteomics</source> <volume>4</volume>, <fpage>59</fpage>&#x02013;<lpage>77</lpage>. <pub-id pub-id-type="doi">10.1002/pmic.200300511</pub-id><pub-id pub-id-type="pmid">14730672</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kelkar</surname> <given-names>D. S.</given-names></name> <name><surname>Kumar</surname> <given-names>D.</given-names></name> <name><surname>Kumar</surname> <given-names>P.</given-names></name> <name><surname>Balakrishnan</surname> <given-names>L.</given-names></name> <name><surname>Muthusamy</surname> <given-names>B.</given-names></name> <name><surname>Yadav</surname> <given-names>A. K.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>Proteogenomic analysis of <italic>Mycobacterium tuberculosis</italic> by high resolution mass spectrometry</article-title>. <source>Mol. Cell. Proteomics</source> <volume>10</volume>, <fpage>M111011627</fpage>. <pub-id pub-id-type="doi">10.1074/mcp.M111.011627</pub-id><pub-id pub-id-type="pmid">21969609</pub-id></citation></ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname> <given-names>D.</given-names></name> <name><surname>Paggi</surname> <given-names>J. M.</given-names></name> <name><surname>Park</surname> <given-names>C.</given-names></name> <name><surname>Bennett</surname> <given-names>C.</given-names></name> <name><surname>Salzberg</surname> <given-names>S. L.</given-names></name></person-group> (<year>2019</year>). <article-title>Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype</article-title>. <source>Nat. Biotechnol.</source> <volume>37</volume>, <fpage>907</fpage>&#x02013;<lpage>915</lpage>. <pub-id pub-id-type="doi">10.1038/s41587-019-0201-4</pub-id><pub-id pub-id-type="pmid">31375807</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kleifeld</surname> <given-names>O.</given-names></name> <name><surname>Doucet</surname> <given-names>A.</given-names></name> <name><surname>auf dem Keller</surname> <given-names>U.</given-names></name> <name><surname>Prudova</surname> <given-names>A.</given-names></name> <name><surname>Schilling</surname> <given-names>O.</given-names></name> <name><surname>Kainthan</surname> <given-names>R. K.</given-names></name> <etal/></person-group>. (<year>2010</year>). <article-title>Isotopic labeling of terminal amines in complex samples identifies protein N-termini and protease cleavage products</article-title>. <source>Nat. Biotechnol.</source> <volume>28</volume>, <fpage>281</fpage>&#x02013;<lpage>288</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.1611</pub-id><pub-id pub-id-type="pmid">20208520</pub-id></citation></ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kumar</surname> <given-names>S.</given-names></name> <name><surname>Stecher</surname> <given-names>G.</given-names></name> <name><surname>Li</surname> <given-names>M.</given-names></name> <name><surname>Knyaz</surname> <given-names>C.</given-names></name> <name><surname>Tamura</surname> <given-names>K.</given-names></name></person-group> (<year>2018</year>). <article-title>MEGA X: Molecular evolutionary genetics analysis across computing platforms</article-title>. <source>Mol. Biol. Evol.</source> <volume>35</volume>, <fpage>1547</fpage>&#x02013;<lpage>1549</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msy096</pub-id><pub-id pub-id-type="pmid">29722887</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Handsaker</surname> <given-names>B.</given-names></name> <name><surname>Wysoker</surname> <given-names>A.</given-names></name> <name><surname>Fennell</surname> <given-names>T.</given-names></name> <name><surname>Ruan</surname> <given-names>J.</given-names></name> <name><surname>Homer</surname> <given-names>N.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>The sequence alignment/map format and SAMtools</article-title>. <source>Bioinformatics</source> <volume>25</volume>, <fpage>2078</fpage>&#x02013;<lpage>2079</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp352</pub-id><pub-id pub-id-type="pmid">19505943</pub-id></citation></ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Joh</surname> <given-names>Y. S.</given-names></name> <name><surname>Kim</surname> <given-names>H.</given-names></name> <name><surname>Paek</surname> <given-names>E.</given-names></name> <name><surname>Lee</surname> <given-names>S. W.</given-names></name> <name><surname>Hwang</surname> <given-names>K. B.</given-names></name></person-group> (<year>2016</year>). <article-title>Evaluating the effect of database inflation in proteogenomic search on sensitive and reliable peptide identification</article-title>. <source>BMC Genomics</source> <volume>17</volume>(<supplement>Suppl.13</supplement>), <fpage>151</fpage>&#x02013;<lpage>162</lpage>. <pub-id pub-id-type="doi">10.1186/s12864-016-3327-5</pub-id><pub-id pub-id-type="pmid">28155652</pub-id></citation></ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Park</surname> <given-names>J.</given-names></name> <name><surname>Kim</surname> <given-names>H.</given-names></name> <name><surname>Hwang</surname> <given-names>K. B.</given-names></name> <name><surname>Paek</surname> <given-names>E.</given-names></name></person-group> (<year>2017</year>). <article-title>Systematic comparison of false-discovery-rate-controlling strategies for proteogenomic search using spike-in experiments</article-title>. <source>J. Proteome Res.</source> <volume>16</volume>, <fpage>2231</fpage>&#x02013;<lpage>2239</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jproteome.7b00033</pub-id><pub-id pub-id-type="pmid">28452485</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Zhou</surname> <given-names>R.</given-names></name> <name><surname>Xu</surname> <given-names>S.</given-names></name> <name><surname>Chen</surname> <given-names>X.</given-names></name> <name><surname>Hong</surname> <given-names>Y.</given-names></name> <name><surname>Lu</surname> <given-names>Q.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Improving gene annotation of the peanut genome by integrated proteogenomics workflow</article-title>. <source>J. Proteome Res.</source> <volume>19</volume>, <fpage>2226</fpage>&#x02013;<lpage>2235</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jproteome.9b00723</pub-id><pub-id pub-id-type="pmid">32367721</pub-id></citation></ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ma</surname> <given-names>J.</given-names></name> <name><surname>Chen</surname> <given-names>T.</given-names></name> <name><surname>Wu</surname> <given-names>S.</given-names></name> <name><surname>Yang</surname> <given-names>C.</given-names></name> <name><surname>Bai</surname> <given-names>M.</given-names></name> <name><surname>Shu</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>iProX: an integrated proteome resource</article-title>. <source>Nucleic Acids Res.</source> <volume>47</volume>, <fpage>D1211</fpage>&#x02013;<lpage>D1217</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gky869</pub-id><pub-id pub-id-type="pmid">30252093</pub-id></citation></ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Martinez</surname> <given-names>T. F.</given-names></name> <name><surname>Chu</surname> <given-names>Q.</given-names></name> <name><surname>Donaldson</surname> <given-names>C.</given-names></name> <name><surname>Tan</surname> <given-names>D.</given-names></name> <name><surname>Shokhirev</surname> <given-names>M. N.</given-names></name> <name><surname>Saghatelian</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>Accurate annotation of human protein-coding small open reading frames</article-title>. <source>Nat. Chem. Biol.</source> <volume>16</volume>, <fpage>458</fpage>&#x02013;<lpage>468</lpage>. <pub-id pub-id-type="doi">10.1038/s41589-019-0425-0</pub-id><pub-id pub-id-type="pmid">31819274</pub-id></citation></ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Menschaert</surname> <given-names>G.</given-names></name> <name><surname>Fenyo</surname> <given-names>D.</given-names></name></person-group> (<year>2017</year>). <article-title>Proteogenomics from a bioinformatics angle: a growing field</article-title>. <source>Mass Spectrom. Rev.</source> <volume>36</volume>, <fpage>584</fpage>&#x02013;<lpage>599</lpage>. <pub-id pub-id-type="doi">10.1002/mas.21483</pub-id><pub-id pub-id-type="pmid">26670565</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Meydan</surname> <given-names>S.</given-names></name> <name><surname>Marks</surname> <given-names>J.</given-names></name> <name><surname>Klepacki</surname> <given-names>D.</given-names></name> <name><surname>Sharma</surname> <given-names>V.</given-names></name> <name><surname>Baranov</surname> <given-names>P. V.</given-names></name> <name><surname>Firth</surname> <given-names>A. E.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Retapamulin-assisted ribosome profiling reveals the alternative bacterial proteome</article-title>. <source>Mol. Cell</source> <volume>74</volume>, <fpage>481</fpage>&#x02013;<lpage>493</lpage> e486. <pub-id pub-id-type="doi">10.1016/j.molcel.2019.02.017</pub-id><pub-id pub-id-type="pmid">30904393</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mitchell</surname> <given-names>N. M.</given-names></name> <name><surname>Sherrard</surname> <given-names>A. L.</given-names></name> <name><surname>Dasari</surname> <given-names>S.</given-names></name> <name><surname>Magee</surname> <given-names>D. M.</given-names></name> <name><surname>Grys</surname> <given-names>T. E.</given-names></name> <name><surname>Lake</surname> <given-names>D. F.</given-names></name></person-group> (<year>2018</year>). <article-title>Proteogenomic re-annotation of <italic>Coccidioides posadasii</italic> strain Silveira</article-title>. <source>Proteomics</source> <volume>18</volume>, <fpage>700011</fpage>. <pub-id pub-id-type="doi">10.1002/pmic.2018700011</pub-id><pub-id pub-id-type="pmid">29130603</pub-id></citation></ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ni</surname> <given-names>X.</given-names></name> <name><surname>Tan</surname> <given-names>Z.</given-names></name> <name><surname>Ding</surname> <given-names>C.</given-names></name> <name><surname>Zhang</surname> <given-names>C.</given-names></name> <name><surname>Song</surname> <given-names>L.</given-names></name> <name><surname>Yang</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>A region-resolved mucosa proteome of the human stomach</article-title>. <source>Nat. Commun.</source> <volume>10</volume>, <fpage>39</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-018-07960-x</pub-id><pub-id pub-id-type="pmid">30604760</pub-id></citation></ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Potgieter</surname> <given-names>M. G.</given-names></name> <name><surname>Nakedi</surname> <given-names>K. C.</given-names></name> <name><surname>Ambler</surname> <given-names>J. M.</given-names></name> <name><surname>Nel</surname> <given-names>A. J.</given-names></name> <name><surname>Garnett</surname> <given-names>S.</given-names></name> <name><surname>Soares</surname> <given-names>N. C.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Proteogenomic analysis of <italic>Mycobacterium smegmatis</italic> using high resolution mass spectrometry</article-title>. <source>Front. Microbiol.</source> <volume>7</volume>, <fpage>427</fpage>. <pub-id pub-id-type="doi">10.3389/fmicb.2016.00427</pub-id><pub-id pub-id-type="pmid">27092112</pub-id></citation></ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Raijmakers</surname> <given-names>R.</given-names></name> <name><surname>Neerincx</surname> <given-names>P.</given-names></name> <name><surname>Mohammed</surname> <given-names>S.</given-names></name> <name><surname>Heck</surname> <given-names>A. J.</given-names></name></person-group> (<year>2010</year>). <article-title>Cleavage specificities of the brother and sister proteases Lys-C and Lys-N</article-title>. <source>Chem. Commun.</source> <volume>46</volume>, <fpage>8827</fpage>&#x02013;<lpage>8829</lpage>. <pub-id pub-id-type="doi">10.1039/c0cc02523b</pub-id><pub-id pub-id-type="pmid">20953479</pub-id></citation></ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Renuse</surname> <given-names>S.</given-names></name> <name><surname>Chaerkady</surname> <given-names>R.</given-names></name> <name><surname>Pandey</surname> <given-names>A.</given-names></name></person-group> (<year>2011</year>). <article-title>Proteogenomics</article-title>. <source>Proteomics</source> <volume>11</volume>, <fpage>620</fpage>&#x02013;<lpage>630</lpage>. <pub-id pub-id-type="doi">10.1002/pmic.201000615</pub-id><pub-id pub-id-type="pmid">21246734</pub-id></citation></ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ruggles</surname> <given-names>K. V.</given-names></name> <name><surname>Krug</surname> <given-names>K.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Clauser</surname> <given-names>K. R.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Payne</surname> <given-names>S. H.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Methods, tools and current perspectives in proteogenomics</article-title>. <source>Mol. Cell. Proteomics</source> <volume>16</volume>, <fpage>959</fpage>&#x02013;<lpage>981</lpage>. <pub-id pub-id-type="doi">10.1074/mcp.MR117.000024</pub-id><pub-id pub-id-type="pmid">28456751</pub-id></citation></ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Satpathy</surname> <given-names>S.</given-names></name> <name><surname>Krug</surname> <given-names>K.</given-names></name> <name><surname>Jean Beltran</surname> <given-names>P. M.</given-names></name> <name><surname>Savage</surname> <given-names>S. R.</given-names></name> <name><surname>Petralia</surname> <given-names>F.</given-names></name> <name><surname>Kumar-Sinha</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>A proteogenomic portrait of lung squamous cell carcinoma</article-title>. <source>Cell</source> <volume>184</volume>, <fpage>4348</fpage>&#x02013;<lpage>4371</lpage> e4340. <pub-id pub-id-type="doi">10.1016/j.cell.2021.07.016</pub-id><pub-id pub-id-type="pmid">34358469</pub-id></citation></ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shell</surname> <given-names>S. S.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Lapierre</surname> <given-names>P.</given-names></name> <name><surname>Mir</surname> <given-names>M.</given-names></name> <name><surname>Chase</surname> <given-names>M. R.</given-names></name> <name><surname>Pyle</surname> <given-names>M. M.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>Leaderless transcripts and small proteins are common features of the mycobacterial translational landscape</article-title>. <source>PLoS Genet.</source> <volume>11</volume>, <fpage>e1005641</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pgen.1005641</pub-id><pub-id pub-id-type="pmid">26536359</pub-id></citation></ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shi</surname> <given-names>J.</given-names></name> <name><surname>Meng</surname> <given-names>S.</given-names></name> <name><surname>Wan</surname> <given-names>L.</given-names></name> <name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Jiang</surname> <given-names>S.</given-names></name> <name><surname>Zhu</surname> <given-names>H.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Deep N-terminomics of <italic>Mycobacterium tuberculosis</italic> H37Rv extensively correct annotated encoding genes</article-title>. <source>Genomics</source> <volume>114</volume>, <fpage>292</fpage>&#x02013;<lpage>304</lpage>. <pub-id pub-id-type="doi">10.1016/j.ygeno.2021.12.001</pub-id><pub-id pub-id-type="pmid">34915127</pub-id></citation></ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Thorvaldsdottir</surname> <given-names>H.</given-names></name> <name><surname>Robinson</surname> <given-names>J. T.</given-names></name> <name><surname>Mesirov</surname> <given-names>J. P.</given-names></name></person-group> (<year>2013</year>). <article-title>Integrative Genomics Viewer (IGV): high-performance genomics data visualization and exploration</article-title>. <source>Brief. Bioinform.</source> <volume>14</volume>, <fpage>178</fpage>&#x02013;<lpage>192</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbs017</pub-id><pub-id pub-id-type="pmid">22517427</pub-id></citation></ref>
<ref id="B40">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Trapnell</surname> <given-names>C.</given-names></name> <name><surname>Roberts</surname> <given-names>A.</given-names></name> <name><surname>Goff</surname> <given-names>L.</given-names></name> <name><surname>Pertea</surname> <given-names>G.</given-names></name> <name><surname>Kim</surname> <given-names>D.</given-names></name> <name><surname>Kelley</surname> <given-names>D. R.</given-names></name> <etal/></person-group>. (<year>2012</year>). <article-title>Differential gene and transcript expression analysis of RNA-seq experiments with TopHat and Cufflinks</article-title>. <source>Nat. Protoc.</source> <volume>7</volume>, <fpage>562</fpage>&#x02013;<lpage>578</lpage>. <pub-id pub-id-type="doi">10.1038/nprot.2012.016</pub-id><pub-id pub-id-type="pmid">22383036</pub-id></citation></ref>
<ref id="B41">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>F.</given-names></name> <name><surname>Zhao</surname> <given-names>M.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Su</surname> <given-names>N.</given-names></name> <name><surname>Xiong</surname> <given-names>Z.</given-names></name> <name><surname>Xu</surname> <given-names>P.</given-names></name></person-group> (<year>2016</year>). <article-title>Recombinant acetylated trypsin demonstrates superior stability and higher activity than commercial products in quantitative proteomics studies</article-title>. <source>Rapid Commun. Mass Sp.</source> <volume>30</volume>, <fpage>1059</fpage>&#x02013;<lpage>1066</lpage>. <pub-id pub-id-type="doi">10.1002/rcm.7535</pub-id><pub-id pub-id-type="pmid">27003043</pub-id></citation></ref>
<ref id="B42">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>H.</given-names></name> <name><surname>Li</surname> <given-names>Y. C.</given-names></name> <name><surname>Zhao</surname> <given-names>M. Z.</given-names></name> <name><surname>Wu</surname> <given-names>F. L.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Xiao</surname> <given-names>W. D.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Precision De Novo peptide sequencing using mirror proteases of Ac-LysargiNase and Trypsin for large-scale proteomics</article-title>. <source>Mol. Cell. Proteomics</source> <volume>18</volume>, <fpage>773</fpage>&#x02013;<lpage>785</lpage>. <pub-id pub-id-type="doi">10.1074/mcp.TIR118.000918</pub-id><pub-id pub-id-type="pmid">30622160</pub-id></citation></ref>
<ref id="B43">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>S.</given-names></name> <name><surname>Yang</surname> <given-names>M.</given-names></name> <name><surname>Xiong</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>Q.</given-names></name> <name><surname>Gao</surname> <given-names>X.</given-names></name> <name><surname>Miao</surname> <given-names>W.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Proteogenomic analysis provides novel insight into genome annotation and nitrogen metabolism in <italic>Nostoc</italic> sp. PCC 7120</article-title>. <source>Microbiol. Spectr.</source> <volume>9</volume>, <fpage>e0049021</fpage>. <pub-id pub-id-type="doi">10.1128/Spectrum.00490-21</pub-id><pub-id pub-id-type="pmid">34523988</pub-id></citation></ref>
<ref id="B44">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhai</surname> <given-names>L.</given-names></name> <name><surname>Chang</surname> <given-names>C.</given-names></name> <name><surname>Li</surname> <given-names>N.</given-names></name> <name><surname>Duong</surname> <given-names>D. M.</given-names></name> <name><surname>Chen</surname> <given-names>H.</given-names></name> <name><surname>Deng</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2013</year>). <article-title>Systematic research on the pretreatment of peptides for quantitative proteomics using a C<sub>18</sub> microcolumn</article-title>. <source>Proteomics</source> <volume>13</volume>, <fpage>2229</fpage>&#x02013;<lpage>2237</lpage>. <pub-id pub-id-type="doi">10.1002/pmic.201200591</pub-id><pub-id pub-id-type="pmid">23723153</pub-id></citation></ref>
<ref id="B45">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>H.</given-names></name> <name><surname>Liu</surname> <given-names>T.</given-names></name> <name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Payne</surname> <given-names>S. H.</given-names></name> <name><surname>Zhang</surname> <given-names>B.</given-names></name> <name><surname>McDermott</surname> <given-names>J. E.</given-names></name> <etal/></person-group>. (<year>2016a</year>). <article-title>Integrated proteogenomic characterization of human high-grade serous ovarian cancer</article-title>. <source>Cell</source> <volume>166</volume>, <fpage>755</fpage>&#x02013;<lpage>765</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2016.05.069</pub-id><pub-id pub-id-type="pmid">27372738</pub-id></citation></ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Zhao</surname> <given-names>M.</given-names></name> <name><surname>Xiao</surname> <given-names>W.</given-names></name> <name><surname>Chang</surname> <given-names>L.</given-names></name> <name><surname>Wang</surname> <given-names>F.</given-names></name> <name><surname>Xu</surname> <given-names>P.</given-names></name></person-group> (<year>2019</year>). <article-title>Recombinant expression, purification, and characterization of acetylated LysargiNase from <italic>Escherichia coli</italic> with high activity and stability</article-title>. <source>Rapid Commun. Mass Sp.</source> <volume>33</volume>, <fpage>1067</fpage>&#x02013;<lpage>1075</lpage>. <pub-id pub-id-type="doi">10.1002/rcm.8440</pub-id><pub-id pub-id-type="pmid">30900783</pub-id></citation></ref>
<ref id="B47">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>K.</given-names></name> <name><surname>Fu</surname> <given-names>Y.</given-names></name> <name><surname>Zeng</surname> <given-names>W. F.</given-names></name> <name><surname>He</surname> <given-names>K.</given-names></name> <name><surname>Chi</surname> <given-names>H.</given-names></name> <name><surname>Liu</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>A note on the false discovery rate of novel peptides in proteogenomics</article-title>. <source>Bioinformatics</source> <volume>31</volume>, <fpage>3249</fpage>&#x02013;<lpage>3253</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btv340</pub-id><pub-id pub-id-type="pmid">26076724</pub-id></citation></ref>
<ref id="B48">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Zhao</surname> <given-names>M.</given-names></name> <name><surname>Su</surname> <given-names>N.</given-names></name> <etal/></person-group>. (<year>2016b</year>). <article-title>Quantitative proteomics reveals membrane protein-mediated hypersaline sensitivity and adaptation in halophilic <italic>Nocardiopsis xinjiangensis</italic></article-title>. <source>J. Proteome Res.</source> <volume>15</volume>, <fpage>68</fpage>&#x02013;<lpage>85</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jproteome.5b00526</pub-id><pub-id pub-id-type="pmid">26549328</pub-id></citation></ref>
<ref id="B49">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhu</surname> <given-names>H.</given-names></name> <name><surname>Jiang</surname> <given-names>S.</given-names></name> <name><surname>Zhou</surname> <given-names>W.</given-names></name> <name><surname>Chi</surname> <given-names>H.</given-names></name> <name><surname>Sun</surname> <given-names>J.</given-names></name> <name><surname>Shi</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Ac-LysargiNase efficiently helps genome reannotation of <italic>Mycolicibacterium smegmatis</italic> MC<sup>2</sup> 155</article-title>. <source>J. Proteomics</source> <volume>264</volume>, <fpage>104622</fpage>. <pub-id pub-id-type="doi">10.1016/j.jprot.2022.104622</pub-id><pub-id pub-id-type="pmid">35598869</pub-id></citation></ref>
</ref-list> 
</back>
</article> 