<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mar. Sci.</journal-id>
<journal-title>Frontiers in Marine Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mar. Sci.</abbrev-journal-title>
<issn pub-type="epub">2296-7745</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmars.2021.808354</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Marine Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A Superior Contiguous Whole Genome Assembly for Shrimp (<italic>Penaeus indicus</italic>)</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Katneni</surname> <given-names>Vinaya Kumar</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x2020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/174897/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Shekhar</surname> <given-names>Mudagandur Shashi</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x2020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1329329/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Jangam</surname> <given-names>Ashok Kumar</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1425301/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Krishnan</surname> <given-names>Karthic</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1090018/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Prabhudas</surname> <given-names>Sudheesh K.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/355938/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Kaikkolante</surname> <given-names>Nimisha</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1139891/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Baghel</surname> <given-names>Dushyant Singh</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/502239/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Koyadan</surname> <given-names>Vijayan K.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Jena</surname> <given-names>Joykrushna</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1119750/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Mohapatra</surname> <given-names>Trilochan</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Nutrition Genetics and Biotechnology Division, Indian Council of Agricultural Research-Central Institute of Brackishwater Aquaculture</institution>, <addr-line>Chennai</addr-line>, <country>India</country></aff>
<aff id="aff2"><sup>2</sup><institution>Nucleome Informatics Pvt. Ltd.</institution>, <addr-line>Hyderabad</addr-line>, <country>India</country></aff>
<aff id="aff3"><sup>3</sup><institution>Indian Council of Agricultural Research</institution>, <addr-line>New Delhi</addr-line>, <country>India</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Taewoo Ryu, Okinawa Institute of Science and Technology Graduate University, Japan</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Jianbo Yuan, Institute of Oceanology, Chinese Academy of Sciences (CAS), China; Mengqiang Wang, Ocean University of China, China; Camilla Alves Santos, University of S&#x00E3;o Paulo, Brazil</p></fn>
<corresp id="c001">&#x002A;Correspondence: Vinaya Kumar Katneni, <email>Vinaya.Katneni@icar.gov.in</email></corresp>
<fn fn-type="equal" id="fn002"><p><sup>&#x2020;</sup>These authors have contributed equally to this work</p></fn>
<fn fn-type="other" id="fn004"><p>This article was submitted to Marine Molecular Biology and Ecology, a section of the journal Frontiers in Marine Science</p></fn>
</author-notes>
<pub-date pub-type="epub">
<day>21</day>
<month>01</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>8</volume>
<elocation-id>808354</elocation-id>
<history>
<date date-type="received">
<day>03</day>
<month>11</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>14</day>
<month>12</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2022 Katneni, Shekhar, Jangam, Krishnan, Prabhudas, Kaikkolante, Baghel, Koyadan, Jena and Mohapatra.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Katneni, Shekhar, Jangam, Krishnan, Prabhudas, Kaikkolante, Baghel, Koyadan, Jena and Mohapatra</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>Penaeid shrimp fishery and culture is a commercial enterprise contributing to employment, nutritional security and foreign exchange of developing countries. The genetic improvement programs being operated in shrimp benefit hugely from genomic resources. We report here a high-quality genome assembly for a penaeid shrimp, <italic>Penaeus indicus</italic>, which is the only Crustacean assembly to meet the reference standards of 1 and 10 Mb N50 lengths for contigs and scaffolds, respectively, among genomes of &#x003E;1.5 Gb assembly length. The assembly is 1.93 Gb length (34.4 Mb scaffold N50) with 28,720 protein-coding genes and 49.31% repeat elements. The <italic>P. indicus</italic> assembly has 31.99% of simple sequence repeats, the highest among sequenced animal genomes. In comparison to other shrimp genomes having short contig lengths, the <italic>P. indicus</italic> assembly has 346 un-gapped contigs of over 1 Mb length and betters other shrimp genomes on sequence contiguity. This contiguous genome revealed 15,563 coding single nucleotide polymorphisms (SNPs) of which 2,572 are non-synonymous. The assembly and the SNP data resources have applications to genetic improvement programs, evolutionary studies and stock management.</p>
</abstract>
<kwd-group>
<kwd>Indian white shrimp</kwd>
<kwd>complete genome</kwd>
<kwd>sequence contiguity</kwd>
<kwd><italic>Penaeus indicus</italic></kwd>
<kwd>genome annotation</kwd>
<kwd>coding SNPs</kwd>
</kwd-group>
<contract-sponsor id="cn001">Indian Council of Agricultural Research<named-content content-type="fundref-id">10.13039/501100001503</named-content></contract-sponsor>
<counts>
<fig-count count="4"/>
<table-count count="4"/>
<equation-count count="0"/>
<ref-count count="64"/>
<page-count count="11"/>
<word-count count="7886"/>
</counts>
</article-meta>
</front>
<body>
<sec id="S1" sec-type="intro">
<title>Introduction</title>
<p>Farmed shrimp are important contributors of seafood, provide nutritional security, support employment opportunities and are an export commodity earning foreign exchange for many developing countries. About 83% of the 6.55 million tonnes of global farmed shrimp production in 2019 (<xref ref-type="bibr" rid="B10">FAO, 2020</xref>) is contributed by a single species, <italic>Penaeus vannamei</italic>. Though <italic>P. vannamei</italic> is not a native species, several shrimp producing countries are importing the broodstock of this species to breed locally and produce post-larvae required for commercial cultures. Availability of genetically improved and specific pathogen free stocks is the main reason in choosing <italic>P. vannamei</italic> for shrimp production. Such global dependence on a single species is not an ideal scenario for sustainability of shrimp farming industry. There is a need to develop and promote other shrimp species that have natural distribution in shrimp producing countries. For example, <italic>Penaeus indicus</italic> has wide natural distribution in the Indo-West Pacific: East and South East Africa to South China, New Guinea and North Australia (<xref ref-type="bibr" rid="B16">Holthuis et al., 1980</xref>). Development of local shrimp species brings diversity required for sustainability and prevents inter-country disease spread through shrimp movement. The future genetic improvement programs with focus on species like <italic>P. indicus</italic> would benefit global aquaculture with increased productivity and sustainability. The shrimp genetic improvement programs benefit hugely from genomic resources. With the genomics revolution, there is great interest to decipher the whole genome sequence with an aim to integrate genomic information into breeding programs being operated to improve desired economic traits. In this line, we have already developed full-length transcript data for <italic>P. indicus</italic>, a valuable resource for functional studies (<xref ref-type="bibr" rid="B23">Katneni et al., 2020</xref>).</p>
<p>Few challenges like large genome size ranging from 2.14 to 2.91 Gb (<xref ref-type="bibr" rid="B50">Swathi et al., 2018</xref>), large number of chromosomes (<xref ref-type="bibr" rid="B6">Chow et al., 1990</xref>), high percentage of repetitive sequences (&#x223C;80%) and high genome heterozygosity (<xref ref-type="bibr" rid="B58">Yu et al., 2015</xref>) might be the reasons for delay in deciphering a shrimp genome till 2019. Also, there was difficulty in preparation of high quality genomic DNA and large-insert bacterial artificial chromosome (BAC) libraries due to presence of mucopolysaccharides, alkaline phosphatase, and other secondary metabolites in shrimp (<xref ref-type="bibr" rid="B63">Zhang et al., 2010</xref>). It was only very recently that the genome assemblies of three shrimp, <italic>P. vannamei</italic> (<xref ref-type="bibr" rid="B62">Zhang X. et al., 2019</xref>; <xref ref-type="bibr" rid="B60">Yuan et al., 2021</xref>), <italic>Penaeus monodon</italic> (<xref ref-type="bibr" rid="B54">Uengwetwanit et al., 2021</xref>), and <italic>Penaeus chinensis</italic> (<xref ref-type="bibr" rid="B60">Yuan et al., 2021</xref>) were successfully reported. The assembly of <italic>P. vannamei</italic> genome illustrated the high repetitive content in shrimp genome and utility of assembly tools like WTDBG (<xref ref-type="bibr" rid="B44">Ruan and Li, 2020</xref>) for such cases. The assembly presented for genome of <italic>P. monodon</italic> has merit in covering &#x003E;90% of full length. All these three reported genomes of <italic>P. vannamei</italic>, <italic>P. monodon</italic>, and <italic>P. chinensis</italic> contains shorter contigs with contig N50 values of 58, 79, and 59 Kb, respectively and some of the assemblies for example <italic>P. vannamei</italic> was later refined and updated for better parameters (<xref ref-type="bibr" rid="B60">Yuan et al., 2021</xref>). Only the genome assembly of <italic>P. monodon</italic> contained un-gapped contigs (<italic>n</italic> = 3) of over 1 Mb length. As reflected from contig N50 lengths, the available shrimp genomes are low in sequence contiguity, which is essential for accurate prediction of repetitive elements and protein-coding gene models in the genome. Benefits of contiguous assembly in repeat/gene annotation and resolution of complex regions/polymorphisms have been demonstrated in animal and plant genomes (<xref ref-type="bibr" rid="B21">Kalbfleisch et al., 2018</xref>; <xref ref-type="bibr" rid="B34">Michael et al., 2018</xref>; <xref ref-type="bibr" rid="B32">Low et al., 2019</xref>; <xref ref-type="bibr" rid="B39">Perumal et al., 2020</xref>). Therefore, taking advantage of the developments in long-read sequencing technologies and assembly algorithms, the present study was conducted with an aim to generate a contiguous genome assembly for <italic>P. indicus</italic> shrimp.</p>
<p>We report here a very high quality genome assembly of <italic>P. indicus</italic> covering 1.93 Gb with contig N50 of 1.4 Mb having very high number of 346 un-gapped contigs of over 1 Mb length and scaffold N50 of 34.4 Mb. Considering only the large genomes of &#x003E;1.5 Gb length, the assembly presented for <italic>P. indicus</italic> is the only Crustacean genome and one among the only nine Invertebrate genomes sequenced so far, to meet the reference standard of 1 Mb contig N50 and 10 Mb scaffold N50 lengths (<xref ref-type="bibr" rid="B42">Reference Standard For Genome Biology, 2018</xref>). The assembly was generated with Pacbio subreads, polished for indels with Illumina paired-end reads and scaffolded with HiC chromatin interaction data. We also report 2,572 high quality, non-synonymous coding single nucleotide polymorphisms (SNPs) identified for the first time in a finished genome assembly of shrimp. The contiguous assembly and the non-synonymous substitution data resources presented here have applications to genetic improvement programs, stock management and ecology and evolutionary studies in species of commercial significance.</p>
</sec>
<sec id="S2" sec-type="materials|methods">
<title>Materials and Methods</title>
<sec id="S2.SS1">
<title>Pacbio Library Preparation and Sequencing</title>
<p>The high molecular weight genomic DNA was isolated from muscle tissue of a single shrimp using QIAGEN genomic-tip 100/G kit (Qiagen, Germany). The size selection was done according to the protocol described under &#x201C;Procedure and Checklist&#x2013;20 Kb template preparation using BluePippin size selection system.&#x201D; The sequencing libraries were prepared using SMRTbell Express Template Prep Kit 2.0 (Pacific Biosciences, United States) and assessed for quality and quantity using the Pippin pulse field inversion gel electrophoresis system (Sage Science, United States). The sequencing was performed on Pacific Biosciences Sequel system using magnetic bead loading and 600-min movies. The Pacbio subreads of 5 Kb and longer were used to generate primary contig-level assembly.</p>
</sec>
<sec id="S2.SS2">
<title>Illumina Sequencing</title>
<p>For short-reads sequence data, 15 paired-end sequencing libraries (5 each with 350, 550, and 650 bp insert size) were prepared using Illumina Truseq Nano DNA Library Prep Kit (Illumina, United States). The PCR enriched libraries were sequenced on Illumina NextSeq500 using 2 &#x00D7; 150 bp chemistry in paired-end mode. The raw reads were trimmed for adapters and poor-quality bases/reads using a sliding window algorithm in paired-end mode as implemented in Trimmomatic V0.36 (<xref ref-type="bibr" rid="B4">Bolger et al., 2014</xref>). The trimmed reads equal to or longer than 75 bases only were further used for polishing (correction of base errors and indels) the contigs in primary genome assembly.</p>
</sec>
<sec id="S2.SS3">
<title>Arima HiC Data Generation</title>
<p>For HiC data, the library was prepared using Arima Hi-C kit (Arima Genomics, United States). Tissue crosslinking and proximity ligation was performed following Arima Hi-C animal tissue protocol. The Illumina sequencing library was prepared from proximally ligated DNA using Swift Accel-NGS 2S plus kit (Swift Biosciences, United States) following manufacture&#x2019;s guidelines. The library was sequenced on Illumina NovaSeq6000 platform with 150 bp paired-end mode. The HiC reads were used for scaffolding the contigs in primary assembly.</p>
</sec>
<sec id="S2.SS4">
<title>RNA Sequencing</title>
<p>Total RNA was extracted from the gill, hepatopancreas, muscle, pleopod and heart tissues using TRIzol method and utilized for cDNA synthesis. The cDNA sequencing libraries were prepared following the protocol of &#x201C;Sure select strand-specific RNA library prep for Illumina multiplexed sequencing&#x201D; (Agilent Technologies, United States). Sequencing was performed on Illumina NextSeq500 with 2 &#x00D7; 150 bp paired-end chemistry. Similar procedures were followed to generate pooled-RNAseq datasets for gill, hepatopancreas and muscle tissues, wherein, each sample was derived by pooling cDNA from nine different shrimp (three each from Chennai, Kanyakumari, and Puri Coast of India). The Pacific Biosciences Iso-Sequencing data for gills, hepatopancreas, muscle and pooled larvae of <italic>P. indicus</italic> as described previously (<xref ref-type="bibr" rid="B23">Katneni et al., 2020</xref>) along with the RNAseq data were used for genome annotation and the pooled-RNAseq data were used for identification of SNPs in candidate transcripts.</p>
</sec>
<sec id="S2.SS5">
<title>Genome Assembly</title>
<p>The Pacbio subreads with a minimum length of 5 Kb at 73&#x00D7; coverage were used in WTDBG2.5 (<xref ref-type="bibr" rid="B44">Ruan and Li, 2020</xref>) to generate a <italic>de novo</italic> contig-level assembly. To improve the quality of the genome, the contigs were polished in two steps, using Pacbio subreads in first step and Illumina short-reads in the second. In first step, polishing of contigs was performed with the <italic>Arrow</italic> algorithm of variantCaller tool<sup><xref ref-type="fn" rid="footnote1">1</xref></sup> using Pacbio subreads. In the second step, POLCA tool (<xref ref-type="bibr" rid="B64">Zimin and Salzberg, 2020</xref>) was used with Illumina short-reads to correct error bases and indels in contigs. The polished contigs were then scaffolded with 3D-DNA pipeline (<xref ref-type="bibr" rid="B8">Dudchenko et al., 2017</xref>) using HiC reads to generate the final draft assembly.</p>
</sec>
<sec id="S2.SS6">
<title>Assessing Quality of Assembled Genome</title>
<p>The quality of the final assembly was evaluated by aligning the Pacbio subreads and Illumina short-reads on to the genome scaffolds using bwa-mem v0.7.15-r1140 (<xref ref-type="bibr" rid="B29">Li and Durbin, 2009</xref>) and bowtie2 v2.3.4.3 (<xref ref-type="bibr" rid="B27">Langmead and Salzberg, 2012</xref>), respectively. Assembly evaluation based on alignment statistics was also performed with Illumina RNAseq reads and Pacbio IsoSeq transcripts using Hisat2 v2.2.0 (<xref ref-type="bibr" rid="B25">Kim et al., 2019</xref>) and GMAP v2020-06-30 (<xref ref-type="bibr" rid="B57">Wu and Watanabe, 2005</xref>), respectively. In addition, the genome was assessed for completeness by benchmarking against the arthropoda_odb10 (September 10, 2020) dataset of BUSCO single-copy orthologs (<xref ref-type="bibr" rid="B47">Seppey et al., 2019</xref>). Further, the assembly of <italic>P. indicus</italic> was compared with other shrimp genomes for assessment of sequence contiguity based on un-gapped contig length distribution and number of gaps in the finished genomes.</p>
</sec>
<sec id="S2.SS7">
<title>Repeat Annotation and Masking</title>
<p>Homology-based annotation of repeat elements was performed with Penaeidae subset (Taxonomy ID:6685) of RepBase library using the RMBlast search of RepeatMasker (<xref ref-type="bibr" rid="B19">Jurka et al., 2005</xref>)<sup><xref ref-type="fn" rid="footnote2">2</xref></sup> module implemented in OmicsBox v1.3.11 (<xref ref-type="bibr" rid="B3">Bioinformatics, 2019</xref>). For utility in gene prediction, the genome was soft masked for repeat regions with the same procedure excluding the low complexity and simple repeats.</p>
</sec>
<sec id="S2.SS8">
<title>Protein-Coding Gene Prediction and Annotation</title>
<p>Structural annotation of protein-coding regions in <italic>P. indicus</italic> genome was carried out by combining the gene models obtained from short-read RNAseq data, long-read IsoSeq data and proteins from related species (<xref ref-type="supplementary-material" rid="DS2">Supplementary Table 1</xref>), with <italic>ab initio</italic> gene predictions. The <italic>ab initio</italic> gene models were predicted on masked genome using Augustus v3.3.3 (<xref ref-type="bibr" rid="B49">Stanke et al., 2006</xref>) and GeneMark-ES v4.59 (<xref ref-type="bibr" rid="B31">Lomsadze et al., 2005</xref>) self training module. While predicting gene models in Augustus, hints generated by aligning IsoSeq data on to the genome using GMAP v2020-06-30 (<xref ref-type="bibr" rid="B57">Wu and Watanabe, 2005</xref>) were also given as input. The PASA v2.4.1 (<xref ref-type="bibr" rid="B13">Haas et al., 2003</xref>) was used to generate valid gene structures from IsoSeq data which utilizes near perfect alignments made by GMAP v2020-06-30 and BLAT v36 (<xref ref-type="bibr" rid="B24">Kent, 2002</xref>; <xref ref-type="bibr" rid="B57">Wu and Watanabe, 2005</xref>) to the genome. Similarly RNAseq data was aligned in a splice aware manner to genome using Hisat2 v2.2.0 (<xref ref-type="bibr" rid="B25">Kim et al., 2019</xref>) and gene models were generated using StringTie v2.1.4 (<xref ref-type="bibr" rid="B38">Pertea et al., 2015</xref>), from which likely coding sequences were identified using TransDecoder v5.5.0.<sup><xref ref-type="fn" rid="footnote3">3</xref></sup> Proteins from the related species were aligned to the genome using GenomeThreader v1.7.3 (<xref ref-type="bibr" rid="B12">Gremme, 2012</xref>) to derive valid gene models. All these <italic>ab initio</italic> and evidence based predicted gene models were combined as a weighted matrix in Evidence Modeler (<xref ref-type="bibr" rid="B14">Haas et al., 2008</xref>) to obtain the final set of non-redundant consensus gene models. Homology based annotation was performed using blastx against non-redundant protein database of Genbank and UniProt database to obtain functional description of the genes. The Interproscan and EggNOG Mapper module of OmicsBox v1.3.11 were used to obtain annotations of protein domains and orthology based annotations respectively (<xref ref-type="bibr" rid="B3">Bioinformatics, 2019</xref>). The annotations were merged and the final gene ontology annotations were obtained using OmicsBox v1.3.11. Pathway maps for the annotated genes were generated by mapping against the KEGG database (<xref ref-type="bibr" rid="B22">Kanehisa and Goto, 2000</xref>).</p>
</sec>
<sec id="S2.SS9">
<title>Gene Family and Phylogenetic Analysis</title>
<p>Protein-coding gene sets of 17 species in the phyllum, Arthropoda (<xref ref-type="supplementary-material" rid="DS2">Supplementary Table 2</xref>) including the <italic>P. indicus</italic> were subjected to gene family analysis using OrthoMCL v2.0.9 (<xref ref-type="bibr" rid="B11">Fischer et al., 2011</xref>). For this analysis, other Metazoan species belonging to the phylla, Mollusca, Chordata, and Echinodermata were also included. The datasets downloaded from NCBI<sup><xref ref-type="fn" rid="footnote4">4</xref></sup> were filtered based on length (minimum 50 amino acids) and selection of the longest isoform. Good protein list from 21 species was subjected to an all-versus-all search using blastp (<xref ref-type="bibr" rid="B1">Altschul et al., 1990</xref>) and then single copy orthologous gene (SCOG) sequences were extracted from orthomcl groups. MUSCLE v3.8.1551 (<xref ref-type="bibr" rid="B9">Edgar, 2004</xref>) was used to generate multiple sequence alignments which were then trimmed using &#x201C;trimAl&#x201D; v1.4 (<xref ref-type="bibr" rid="B5">Capella-Guti&#x00E9;rrez et al., 2009</xref>) tool. The FASconCAT v1.04 (<xref ref-type="bibr" rid="B26">K&#x00FC;ck and Meusemann, 2010</xref>) was used to concatenate the alignments, ProtTest v3.0 (<xref ref-type="bibr" rid="B7">Darriba et al., 2011</xref>) was used to find the best evolutionary model and RAxML v8.2.12 (<xref ref-type="bibr" rid="B48">Stamatakis, 2014</xref>) was used to build Maximum Likelihood tree. Visualization of the tree was done using FigTree v1.4.4 (<xref ref-type="bibr" rid="B41">Rambaut, 2009</xref>).</p>
</sec>
<sec id="S2.SS10">
<title>Variant Calling in Coding Sequences</title>
<p>Pooled-individual RNAseq datasets were trimmed with Trimmomatic v0.39 (<xref ref-type="bibr" rid="B4">Bolger et al., 2014</xref>) to remove poor quality reads and bases. Good quality reads were aligned using TopHat v2.1.1 (<xref ref-type="bibr" rid="B53">Trapnell et al., 2012</xref>) on to the gene sequences indexed with bowtie2 v2.3.4.3 (<xref ref-type="bibr" rid="B27">Langmead and Salzberg, 2012</xref>). The generated bam file was sorted using SAMtools v1.2 (<xref ref-type="bibr" rid="B30">Li et al., 2009</xref>) and then processed in bcftools-1.3.1 (<xref ref-type="bibr" rid="B28">Li, 2011</xref>) to generate variant call format (VCF) file. Those SNPs with a raw read depth of &#x2265;20 at SNP site, at least 10 reads each supporting the reference and alternative alleles and phred quality scores of &#x2265;100 were extracted from VCF file as good quality SNPs. The non-synonymous coding SNPs were analyzed for the possible functional impact on the proteins harboring them, using a standalone version of the PANTHER Coding SNP Analysis tool, PANTHER PSEP v1.01 (<xref ref-type="bibr" rid="B52">Tang and Thomas, 2016</xref>). The tool analyses the SNP by first identifying the Panther family of the protein using blastp (<xref ref-type="bibr" rid="B1">Altschul et al., 1990</xref>), followed by retrieving the ancestor sequence of the family and tracing the query protein though evolution, then reporting how long the SNP position was conserved in Millions of years. Finally based on the predicted values the results are classified as probably benign, possibly damaging or probably damaging if the values are &#x003C;180, &#x2265;180, or &#x2265;380, respectively.</p>
</sec>
</sec>
<sec id="S3" sec-type="results">
<title>Results</title>
<sec id="S3.SS1">
<title>Genome Assembly</title>
<p>Flow cytometry analysis using propidium iodide stained hemocytes indicated a genome size of 2.47 Gb for <italic>P. indicus</italic> (<xref ref-type="bibr" rid="B50">Swathi et al., 2018</xref>). Early attempts made by us to assemble <italic>P. indicus</italic> genome with 145&#x00D7; coverage of Illumina short reads sequence data were unsuccessful (<xref ref-type="supplementary-material" rid="DS2">Supplementary Note</xref>). Various assemblers like SOAPdenovo2 (<xref ref-type="bibr" rid="B33">Luo et al., 2015</xref>), CLC Genomics Workbench v10.0.1 (CLCbio, Denmark), and Platanus v1.2.4 (<xref ref-type="bibr" rid="B20">Kajitani et al., 2014</xref>) produced primary assemblies with large number of contigs. A final assembly reduced with Redundans v0.14a (<xref ref-type="bibr" rid="B40">Pryszcz and Gabald&#x00F3;n, 2016</xref>) contained 358,878 contigs covering about a quarter of genome length (607.78 Mb) with N50 of 1698 bases. Though short-reads could not generate a quality genome, k-mer analysis on them indicated high repetitive nature of <italic>P. indicus</italic> genome thereby suggesting the necessity of long sequence reads to assemble repeat-rich <italic>P. indicus</italic> genome.</p>
<p>About 73&#x00D7; coverage of PacBio Sequel long reads (<xref ref-type="supplementary-material" rid="DS2">Supplementary Table 3</xref>) generated using DNA of a <italic>P. indicus</italic> female shrimp were processed in WTDBG2.5 (<xref ref-type="bibr" rid="B44">Ruan and Li, 2020</xref>) to generate primary contigs. These contigs were corrected for error bases/indels in POLCA tool (<xref ref-type="bibr" rid="B64">Zimin and Salzberg, 2020</xref>) using Illumina reads (<xref ref-type="supplementary-material" rid="DS2">Supplementary Table 4</xref>). The assembled genome was of 1.98 Gb length and consisted of 12,051 contigs with N50 of 1.4 Mb (<xref ref-type="table" rid="T1">Table 1</xref>). Among large crustacean genomes of more than 1 Gb size, only the genome of <italic>Eriocheir sinensis</italic> (<xref ref-type="bibr" rid="B51">Tang et al., 2020</xref>) has a better contig N50 length (3.16 Mb) than obtained for <italic>P. indicus</italic>. Compared to the other shrimp genomes generated for <italic>P. vannamei</italic> (<xref ref-type="bibr" rid="B62">Zhang X. et al., 2019</xref>; <xref ref-type="bibr" rid="B60">Yuan et al., 2021</xref>), <italic>P. monodon</italic> (<xref ref-type="bibr" rid="B54">Uengwetwanit et al., 2021</xref>), and <italic>P. chinensis</italic> (<xref ref-type="bibr" rid="B60">Yuan et al., 2021</xref>), the assembly obtained for <italic>P. indicus</italic> has 24&#x2013;, 24&#x2013;, and 18-fold improvement, respectively for contig N50 length.</p>
<table-wrap position="float" id="T1">
<label>TABLE 1</label>
<caption><p>Summary statistics for assembled genome of <italic>P. indicus</italic>.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left"></td>
<td valign="top" align="center">Primary WTDBG contigs</td>
<td valign="top" align="center">Polished contigs</td>
<td valign="top" align="center" colspan="3">Scaffold-level<hr/></td>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="center">Scaffolds (&#x003E;5 Mbp length)</td>
<td valign="top" align="center">Scaffolds (&#x003C;5 Mbp length)</td>
<td valign="top" align="center">Total scaffolds</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Number of sequences</td>
<td valign="top" align="center">12,051</td>
<td valign="top" align="center">12,051</td>
<td valign="top" align="center">44</td>
<td valign="top" align="center">11,124</td>
<td valign="top" align="center">11,168</td>
</tr>
<tr>
<td valign="top" align="left">Longest sequence, bp</td>
<td valign="top" align="center">11,941,814</td>
<td valign="top" align="center">11,620,286</td>
<td valign="top" align="center">51,570,475</td>
<td valign="top" align="center">2,166,774</td>
<td valign="top" align="center">51,570,475</td>
</tr>
<tr>
<td valign="top" align="left">Total length, bp</td>
<td valign="top" align="center">1,980,186,105</td>
<td valign="top" align="center">1,931,735,305</td>
<td valign="top" align="center">1,572,756,073</td>
<td valign="top" align="center">362,884,318</td>
<td valign="top" align="center">1,935,640,391</td>
</tr>
<tr>
<td valign="top" align="left">N50 length, bp</td>
<td valign="top" align="center">1,462,103</td>
<td valign="top" align="center">1,417,948</td>
<td valign="top" align="center">38,524,896</td>
<td valign="top" align="center">75,000</td>
<td valign="top" align="center">34,405,730</td>
</tr>
<tr>
<td valign="top" align="left">L50, number</td>
<td valign="top" align="center">388</td>
<td valign="top" align="center">390</td>
<td valign="top" align="center">19</td>
<td valign="top" align="center">1257</td>
<td valign="top" align="center">24</td>
</tr>
<tr>
<td valign="top" align="left">N&#x2019;s per 100 kbp</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">240</td>
<td valign="top" align="center">48</td>
<td valign="top" align="center">204</td>
</tr>
</tbody>
</table></table-wrap>
<p>The scaffolding of polished contigs with 104&#x00D7; coverage (1.7 billion reads/258 Gb) of HiC reads in 3D-DNA (<xref ref-type="bibr" rid="B8">Dudchenko et al., 2017</xref>) resulted in an assembly with 11,168 scaffolds. The final assembly was of 1.935 Gb length (&#x223C;78% of genome) with N50 and longest scaffold length of 34.4 and 51.57 Mb, respectively (<xref ref-type="table" rid="T1">Table 1</xref>). Final assembly obtained for <italic>P. indicus</italic> had 44 scaffolds that were longer than 5 Mb, which equaled the reported haploid chromosome number (<xref ref-type="fig" rid="F1">Figure 1</xref>). These 44 longest scaffolds assumed as pseudochromosomes span about 1.57 Gb length and cover about 81.3% of the assembly (<xref ref-type="supplementary-material" rid="DS2">Supplementary Table 5</xref>). The genome length in <italic>P. indicus</italic> that is accounted by the pseudochromosomes alone is almost same as the total genome length presented for <italic>P. vannamei</italic> and <italic>P. chinensis</italic> (<xref ref-type="bibr" rid="B60">Yuan et al., 2021</xref>). The assembly obtained for <italic>P. indicus</italic> genome in the present study is the only shrimp genome that meets the reference standard of 1 Mb contig N50 for primary contigs and 10 Mb scaffold N50 length for final scaffolds (<xref ref-type="fig" rid="F2">Figure 2A</xref>).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption><p>Assembly of <italic>P. indicus</italic> genome. <bold>(A)</bold> Circos plot depicting the properties of 44 pseudochromosomes. Track 1 (outer-most): the 44 pseudochromosomes. Track 2: line diagram of contigs with peaks indicating the size of contig. Track 3: protein-coding genes plotted as heatmap with progressive colors (yellow&#x2013;orange&#x2013;red) indicating length of genes. Track 4: protein-coding genes on positive strand. Track 5: protein-coding genes on negative strand. Track 6 (innermost): GC content of the genome as a histogram in gray color, here GC values &#x003C;30 are shown in red and &#x003E;40 are shown in blue. <bold>(B)</bold> The longest scaffolds representing the pseudochromosomes as obtained with 3D-DNA tool.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-08-808354-g001.tif"/>
</fig>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption><p>Quality assessment of assembly generated for <italic>P. indicus</italic> in comparison to the other shrimp assemblies. <bold>(A)</bold> Plot of contig N50 and scaffold N50 lengths of finished large (&#x003E;1.5 Gb) crustacean genomes. The <italic>P. indicus</italic> is the only large crustacean genome assembly that has &#x003E;1 Mb contig N50 and &#x003E;10 Mb scaffold N50 lengths. <bold>(B)</bold> Un-gapped contig length distribution for finished shrimp genomes. Only the genome assembly of <italic>P. indicus</italic> has contigs longer than 1 Mb length. <bold>(C)</bold> Plot of the number of gaps (log<sub>10</sub> scale) in the finished genomes of shrimp species. The <italic>X</italic>-axis is the cumulative genome size by counting the individual scaffold lengths in decreasing order. The <italic>Y</italic>-axis is the total number of gaps within the scaffolds contributing to the corresponding genome size. Notice that the assembly of <italic>P. indicus</italic> has less number of N&#x2019;s than others. <bold>(D)</bold> Violin plot of number of N&#x2019;s in gene sequences from finished genomes of shrimp plotted against the corresponding gene lengths (log<sub>10</sub> scale). Notice that the assembly of <italic>P. indicus</italic> has no N&#x2019;s whereas the assembly of <italic>P. monodon</italic> has highest number of N&#x2019;s in gene sequences.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-08-808354-g002.tif"/>
</fig>
</sec>
<sec id="S3.SS2">
<title>Genome Assembly Validation</title>
<p>Benchmarking of the <italic>P. indicus</italic> genome with arthropoda_odb10 (September 10, 2020) BUSCO gene groups (<italic>n</italic> = 1013) indicated the presence of 77% of complete orthologs in the genome. About 3.5% of BUSCO orthologs were fragmented and the remaining 19.5% were missing. Against the same dataset, the <italic>P. vannamei</italic> genome (<xref ref-type="bibr" rid="B62">Zhang X. et al., 2019</xref>) had 76% complete, 4.1% fragmented, and 19.6% missing BUSCO orthologs. In comparison, the proportion of missing genes reduced to 14.4% if odb9 dataset (<italic>n</italic> = 1066) was used for benchmarking. On the other hand, the <italic>P. indicus</italic> genome when assessed with gVolante 1.2.1 (<xref ref-type="bibr" rid="B35">Nishimura et al., 2017</xref>) against CEG ortholog set in CEGMA pipeline (<xref ref-type="bibr" rid="B36">Parra et al., 2007</xref>), has a completeness score of 98.39%. We observed that the completeness scores changed with the orthologous gene set and the data release associated with it. We suggest exercising caution to draw conclusions while directly comparing quality of genomes based on different gene sets. Moreover, with the shrinkage of orthologous gene datasets due to increased number of sequenced genomes, we opine that their usage soon becomes debatable.</p>
<p>Read mapping statistics indicated high quality of the assembly with high percentage of reads aligning on to the genome (<xref ref-type="supplementary-material" rid="DS2">Supplementary Table 6</xref>). For DNA data, about 98.89% of Pacbio reads and 95.4% of Illumina reads could be aligned back to the genome scaffolds. For RNA-based data, about 94.29% of RNAseq reads and 99.25% of Pacbio IsoSeq transcripts could be mapped on to the genome. High quality of the <italic>P. indicus</italic> genome assembly presented in this study has been demonstrated with high mapping statistics and BUSCO completeness scores as comparable to other shrimp genomes.</p>
<p>The sequence contiguity assessed based on un-gapped contig lengths and the number of gaps in assembly is another quality metric that we used in this study to assess the quality of <italic>P. indicus</italic> in relation to other shrimp genomes. A distribution of un-gapped contig lengths (<xref ref-type="fig" rid="F2">Figure 2B</xref>) indicated that <italic>P. monodon</italic> is the only shrimp genome other than the <italic>P. indicus</italic> that contained un-gapped contigs (<italic>n</italic> = 3) of over 1 Mb length. The <italic>P. indicus</italic> assembly has 346 un-gapped contigs of 1 Mb or higher length. Again, a plot of the number of gaps (<xref ref-type="fig" rid="F2">Figure 2C</xref>) in the finished assembly also tables <italic>P. indicus</italic> genome over other shrimp genomes on assembly quality. Interestingly, the chromosome-scale assembly presented for <italic>P. monodon</italic> was found to have high gap number in the finished assembly. As observed in the <xref ref-type="fig" rid="F2">Figure 2C</xref>, the intermittent elevations in <italic>P. monodon</italic> line plot indicate the presence of high number of gaps in some scaffolds. The coding sequences also were observed to contain N&#x2019;s in <italic>P. monodon</italic> assembly while none existed in the <italic>P. indicus</italic> assembly (<xref ref-type="fig" rid="F2">Figure 2D</xref>).</p>
</sec>
<sec id="S3.SS3">
<title>Repeat Content</title>
<p>The repeat elements as derived on the basis of the number of bases masked, constituted 49.31% (954 Mb) of the assembled genome (<xref ref-type="table" rid="T2">Table 2</xref>). One of the prominent features was a high proportion of simple sequence repeats (SSR) which spanned 31.99% of the genome. The proportion of SSRs reported here for <italic>P. indicus</italic> was found to be the highest amongst all sequenced genomes in the animal kingdom. The role of SSRs in adaptive evolution was recently demonstrated for shrimp (<xref ref-type="bibr" rid="B60">Yuan et al., 2021</xref>). Other major repeat classes include LINEs (5.8%) and low complexity regions (4.57%). The satellites (1.36%), LTR elements (0.31%), DNA transposons (0.2%), and small RNA (0.07%) were the other minor repeat families observed in the <italic>P. indicus</italic> genome assembly.</p>
<table-wrap position="float" id="T2">
<label>TABLE 2</label>
<caption><p>Repeat profile in assembled <italic>P. indicus</italic> genome.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<tbody>
<tr>
<td valign="top" align="left">Assembled genome size</td>
<td valign="top" align="center" colspan="3">1,935,640,391 bp (1,931,735,305 bp excluding N)</td>
</tr>
<tr>
<td valign="top" align="left">Total no. of scaffolds</td>
<td valign="top" align="center" colspan="3">11,168</td>
</tr>
<tr>
<td valign="top" align="left">GC content (%)</td>
<td valign="top" align="center" colspan="3">35.58%</td>
</tr>
<tr>
<td valign="top" align="left">Bases masked</td>
<td valign="top" align="center" colspan="3">954,483,365 bp (49.31%)</td>
</tr>
<tr>
<td valign="top" align="center" colspan="4"><hr/></td>
</tr>
<tr>
<td valign="top" align="left" colspan="4"><bold>Repeat profile</bold><hr/></td>
</tr>
<tr>
<td valign="top" align="left"><bold>Repeat class/family</bold></td>
<td valign="top" align="center"><bold>Number of elements</bold></td>
<td valign="top" align="center"><bold>Length occupied</bold></td>
<td valign="top" align="center"><bold>Percentage of sequence</bold></td>
</tr>
<tr>
<td valign="top" align="center" colspan="4"><hr/></td>
</tr>
<tr>
<td valign="top" align="left"><bold>SINEs</bold></td>
<td valign="top" align="center"><bold>6,867</bold></td>
<td valign="top" align="center"><bold>1,037,080</bold></td>
<td valign="top" align="center"><bold>0.05</bold></td>
</tr>
<tr>
<td valign="top" align="left"><bold>LINEs</bold></td>
<td valign="top" align="center"><bold>460,164</bold></td>
<td valign="top" align="center"><bold>112,349,233</bold></td>
<td valign="top" align="center"><bold>5.80</bold></td>
</tr>
<tr>
<td valign="top" align="left">Penelope</td>
<td valign="top" align="center">50,579</td>
<td valign="top" align="center">12,558,290</td>
<td valign="top" align="center">0.65</td>
</tr>
<tr>
<td valign="top" align="left">L2/CR1/Rex</td>
<td valign="top" align="center">6,014</td>
<td valign="top" align="center">5,448,412</td>
<td valign="top" align="center">0.28</td>
</tr>
<tr>
<td valign="top" align="left">R1/LOA/Jockey</td>
<td valign="top" align="center">29,806</td>
<td valign="top" align="center">27,471,836</td>
<td valign="top" align="center">1.42</td>
</tr>
<tr>
<td valign="top" align="left">RTE/Bov-B</td>
<td valign="top" align="center">343,621</td>
<td valign="top" align="center">62,070,348</td>
<td valign="top" align="center">3.21</td>
</tr>
<tr>
<td valign="top" align="left"><bold>LTR elements</bold></td>
<td valign="top" align="center"><bold>8,709</bold></td>
<td valign="top" align="center"><bold>6,093,826</bold></td>
<td valign="top" align="center"><bold>0.31</bold></td>
</tr>
<tr>
<td valign="top" align="left">BEL/Pao</td>
<td valign="top" align="center">181</td>
<td valign="top" align="center">162,287</td>
<td valign="top" align="center">0.01</td>
</tr>
<tr>
<td valign="top" align="left">Gypsy/DIRS1</td>
<td valign="top" align="center">8,516</td>
<td valign="top" align="center">5,923,190</td>
<td valign="top" align="center">0.31</td>
</tr>
<tr>
<td valign="top" align="left"><bold>DNA transposons</bold></td>
<td valign="top" align="center"><bold>15,164</bold></td>
<td valign="top" align="center"><bold>3,950,139</bold></td>
<td valign="top" align="center"><bold>0.20</bold></td>
</tr>
<tr>
<td valign="top" align="left">Tc1-IS630-Pogo</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">776</td>
<td valign="top" align="center">0.00</td>
</tr>
<tr>
<td valign="top" align="left">PiggyBac</td>
<td valign="top" align="center">180</td>
<td valign="top" align="center">81,946</td>
<td valign="top" align="center">0.00</td>
</tr>
<tr>
<td valign="top" align="left">Tourist/Harbinger</td>
<td valign="top" align="center">398</td>
<td valign="top" align="center">72,517</td>
<td valign="top" align="center">0.00</td>
</tr>
<tr>
<td valign="top" align="left"><bold>Unclassified</bold></td>
<td valign="top" align="center"><bold>445,043</bold></td>
<td valign="top" align="center"><bold>103,924,584</bold></td>
<td valign="top" align="center"><bold>5.37</bold></td>
</tr>
<tr>
<td valign="top" align="left"><bold>Small RNA</bold></td>
<td valign="top" align="center"><bold>8,598</bold></td>
<td valign="top" align="center"><bold>1,280,010</bold></td>
<td valign="top" align="center"><bold>0.07</bold></td>
</tr>
<tr>
<td valign="top" align="left"><bold>Satellites</bold></td>
<td valign="top" align="center"><bold>73,550</bold></td>
<td valign="top" align="center"><bold>26,392,782</bold></td>
<td valign="top" align="center"><bold>1.36</bold></td>
</tr>
<tr>
<td valign="top" align="left"><bold>Simple repeats</bold></td>
<td valign="top" align="center"><bold>3,754,013</bold></td>
<td valign="top" align="center"><bold>619,134,792</bold></td>
<td valign="top" align="center"><bold>31.99</bold></td>
</tr>
<tr>
<td valign="top" align="left"><bold>Low complexity</bold></td>
<td valign="top" align="center"><bold>496,765</bold></td>
<td valign="top" align="center"><bold>88,405,481</bold></td>
<td valign="top" align="center"><bold>4.57</bold></td>
</tr>
</tbody>
</table></table-wrap>
</sec>
<sec id="S3.SS4">
<title>Gene Prediction and Annotation</title>
<p>Combined evidence from <italic>ab initio</italic> gene prediction, Illumina RNAseq data, Pacbio Iso-Sequencing data and protein sequences from related species, identified 28,720 protein-coding genes in the <italic>P. indicus</italic> genome (<xref ref-type="fig" rid="F3">Figure 3</xref>). The predicted protein-coding gene number was higher than <italic>P. chinensis</italic> (26,343) and <italic>P. vannamei</italic> (25,596) genomes but lower when compared to <italic>P. monodon</italic> (30,038) genome. The mean exon and intron lengths were 259 and 2315 bp, respectively. The longest gene, exon and intron lengths were 98,168, 14,941, and 76,392 bp, respectively (<xref ref-type="table" rid="T3">Table 3</xref>). Overall, 81.79% of the predicted genes had evidence from RNAseq data or IsoSeq data or proteins from related species. Functional annotations yielded results for 98.36% of the predicted genes using Interproscan, non-redundant protein database of Genbank and the UniProt database. For majority of the genes, the <italic>P. vannamei</italic> was the top hit species showing homology (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 1</xref>).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption><p>Workflow followed for predicting protein-coding genes in assembly of <italic>P. indicus</italic> genome.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-08-808354-g003.tif"/>
</fig>
<table-wrap position="float" id="T3">
<label>TABLE 3</label>
<caption><p>Properties of predicted genes in <italic>P. indicus</italic> genome.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<tbody>
<tr>
<td valign="top" align="left">Total sequence length, bp</td>
<td valign="top" align="center">1,935,640,391</td>
</tr>
<tr>
<td valign="top" align="left">Number of genes</td>
<td valign="top" align="center">28,720</td>
</tr>
<tr>
<td valign="top" align="left">Total gene length, bp</td>
<td valign="top" align="center">459,774,562</td>
</tr>
<tr>
<td valign="top" align="left">Longest gene, bp</td>
<td valign="top" align="center">98,168</td>
</tr>
<tr>
<td valign="top" align="left">Longest exon, bp</td>
<td valign="top" align="center">14,941</td>
</tr>
<tr>
<td valign="top" align="left">Longest intron, bp</td>
<td valign="top" align="center">76,392</td>
</tr>
<tr>
<td valign="top" align="left">mean gene length, bp</td>
<td valign="top" align="center">16,009</td>
</tr>
<tr>
<td valign="top" align="left">mean exon length, bp</td>
<td valign="top" align="center">259</td>
</tr>
<tr>
<td valign="top" align="left">mean intron length, bp</td>
<td valign="top" align="center">2,315</td>
</tr>
<tr>
<td valign="top" align="left">% of genome covered by CDS</td>
<td valign="top" align="center">2.7</td>
</tr>
<tr>
<td valign="top" align="left">mean exons per mRNA</td>
<td valign="top" align="center">7</td>
</tr>
</tbody>
</table></table-wrap>
</sec>
<sec id="S3.SS5">
<title>Gene Family Analyses and Phylogenetic Relations</title>
<p>The gene family analyses with protein sequences of 21 species including <italic>P. indicus</italic> identified 148 single-copy orthologous genes amongst them. Out of 399,313 genes subjected to the analyses, 81.75% (326,455) were clustered into 35,611 orthogroups and 18.25% (72,858) were singletons (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 2</xref>). About 1,504 orthogroups were shared by all the 21 species. In <italic>P. indicus</italic>, of the 9595 orthologous gene families, maximum number were found sharing with <italic>P. monodon</italic> (8387) followed by <italic>P. vannamei</italic> (8255) and <italic>P. chinensis</italic> (7928). We found that 6,722 orthologous gene families were shared among the four shrimp species and 1,987 gene families only among them. The phylogenetic tree generated with sequences of single-copy orthologous genes depicted three distinct clades representative of Chelicerates, Crustaceans, and Hexapods (<xref ref-type="fig" rid="F4">Figure 4</xref>).</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption><p>Phylogenetic placement of <italic>Penaeus indicus</italic> among Arthropods. Numbers at the nodes are bootstrap values and those given in the parenthesis indicate number of gene families that were expanding (green), rapidly evolving (blue), and contracting (red).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-08-808354-g004.tif"/>
</fig>
</sec>
<sec id="S3.SS6">
<title>Coding Single Nucleotide Polymorphisms</title>
<p>Exploiting the pooled-sample RNA sequencing approach, the study reports 15,554 coding SNPs present in 3,965 different protein-coding genes in <italic>P. indicus</italic> genome (<xref ref-type="supplementary-material" rid="DS2">Supplementary Table 7</xref>). Minimum raw-read depth of 20, minimum evidence of 10 reads for each of the alleles and minimum phred quality of 100 were the criteria followed for short-listing the good quality SNPs. About one-third of the genes (<italic>n</italic> = 1185) had only one SNP and 283 genes had &#x2265;10 SNP positions (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 3</xref>). Majority of the SNPs were transversions (<italic>n</italic> = 13,655) than transitions (<italic>n</italic> = 1,899). Of 15,554 only 2,571 SNPs situated in 1,262 unique gene sequences were non-synonymous in nature contributing to amino acid polymorphic sites in the resulting proteins. Among the non-synonymous SNPs, majority were observed to be transversions (<italic>n</italic> = 2038) rather transitions (533). Of the total SNPs, 28% of transition and 15% of transversion substitutions were observed to be non-synonymous in nature (<xref ref-type="table" rid="T4">Table 4</xref>). As observed, a transition substitution has more chances of becoming non-synonymous than transversion. The PANTHER PSEP v1.01 (<xref ref-type="bibr" rid="B52">Tang and Thomas, 2016</xref>) tool classified 76 of these non-synonymous SNPs into probably benign (19), possibly damaging (11), and probably damaging (46) as shown in <xref ref-type="supplementary-material" rid="DS2">Supplementary Table 8</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 4</xref>. The tool was unable to determine the score for the remaining non-synonymous SNPs either due to a mismatch of the amino acid at the mentioned position between the query sequence and the panther family sequence or the absence of the panther family to the given query sequence.</p>
<table-wrap position="float" id="T4">
<label>TABLE 4</label>
<caption><p>Single nucleotide polymorphisms (SNP&#x2019;s) present in protein coding genes of <italic>P. indicus</italic>.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left"></td>
<td valign="top" align="center">Synonymous</td>
<td valign="top" align="center">Non-synonymous</td>
<td valign="top" align="center">Total</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Transition</td>
<td valign="top" align="center">1,366</td>
<td valign="top" align="center">533</td>
<td valign="top" align="center"><bold>1,899</bold></td>
</tr>
<tr>
<td valign="top" align="left">Transversion</td>
<td valign="top" align="center">11,617</td>
<td valign="top" align="center">2,038</td>
<td valign="top" align="center"><bold>13,655</bold></td>
</tr>
<tr>
<td valign="top" align="left"><bold>Total</bold></td>
<td valign="top" align="center"><bold>12,983</bold></td>
<td valign="top" align="center"><bold>2,571</bold></td>
<td valign="top" align="center"><bold>15,554</bold></td>
</tr>
</tbody>
</table></table-wrap>
</sec>
</sec>
<sec id="S4" sec-type="discussion">
<title>Discussion</title>
<p>One of the foremost requirements suggested for benchmarking genome assemblies (<xref ref-type="bibr" rid="B42">Reference Standard For Genome Biology, 2018</xref>) is to have a N50 size of at least 1 Mb for contigs and 10 Mb for scaffolds, in addition to other quality metrics concerning base error rates, structural variants and chromosome level phased assembly. It is interesting to observe a very few of the available Crustacean genomes satisfy this benchmark based on N50 statistics (<xref ref-type="supplementary-material" rid="DS2">Supplementary Table 9</xref>). These include the genomes assembled for <italic>P. indicus</italic> (1.93 Gb, this study), <italic>E. sinensis</italic> (1.27 Gb), <italic>Lepeophtheirus salmonis</italic> (0.67 Gb), and <italic>Eulimnadia texana</italic> (0.12 Gb). The assembly presented for <italic>P. indicus</italic> in this study is the largest Crustacean genome as on date to meet these quality standards. There are other genomes of crustaceans that are superior to <italic>P. indicus</italic> assembly in terms of scaffold N50 metric but no other large Crustacean genome (of &#x003E;1.5 Gbp) has a contig N50 of &#x003E;1 Mb which is the minimum requirement as per the suggested standards. A combination of primary assembly with Pacbio Sequel subreads, error correction with high quality Illumina reads and scaffolding with Arima HiC reads resulted in a highly contiguous assembly reported so far for a shrimp species. Considering only the large genomes with assembly length of &#x003E;1.5 Gb, the <italic>P. indicus</italic> assembly is one among the only nine Invertebrate genomes sequenced so far to meet the reference standard of 1 Mb contig N50 and 10 Mb scaffold N50 lengths (<xref ref-type="supplementary-material" rid="DS2">Supplementary Tables 10</xref>, <xref ref-type="supplementary-material" rid="DS2">11</xref>).</p>
<p>Previous attempts to assemble genome with short reads in other shrimp species such as <italic>P. monodon</italic> (<xref ref-type="bibr" rid="B59">Yuan et al., 2018</xref>; <xref ref-type="bibr" rid="B55">Van Quyen et al., 2020</xref>) and <italic>Marsupenaeus japonicus</italic> (<xref ref-type="bibr" rid="B59">Yuan et al., 2018</xref>) also produced a highly fragmented assembly like that of <italic>P. indicus</italic>. For example, the genome of <italic>P. monodon</italic> consisting of over a million scaffolds with N50 of 1756 bp and covering just above 60% of genome length was reported to have a BUSCO score of 96.8% (<xref ref-type="bibr" rid="B55">Van Quyen et al., 2020</xref>). Recently, the chromosome scale genome assembly (92% coverage and N50 of 44.86 Mb) presented for <italic>P. monodon</italic> has 94.7% BUSCO score using Eukaryota odb9 dataset which is lower than the score obtained for a highly fragmented assembly. Similarly for <italic>P. vannamei</italic> assembly (<xref ref-type="bibr" rid="B62">Zhang X. et al., 2019</xref>), the missing BUSCO orthologs were 19.6% when benchmarked against arthropoda_odb10 whereas the missing orthologs were only 5.2% if arthropoda_odb9 dataset was used (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 5</xref>). Therefore, we suggest not emphasizing BUSCO completeness scores for fragmented assemblies. Similar opinion was expressed while comparing the latest chromosome scale assembly of water buffalo genome against the previous highly fragmented assembly (<xref ref-type="bibr" rid="B32">Low et al., 2019</xref>).</p>
<p>The repeat content in the four shrimp genomes (including of <italic>P. indicus</italic> in this study) assembled so far ranged from 48.58 to 62.50% (<xref ref-type="supplementary-material" rid="DS2">Supplementary Table 12</xref>). It can be firmly established that shrimp genomes are characterized by high proportion of simple repeats whose origin and role remains intriguing. The genomes of <italic>P. chinensis</italic> and <italic>P. vannamei</italic> have higher proportion of DNA transposons and low complexity repeats than other shrimp. Whereas <italic>P. monodon</italic> contains more genome length spanning SINEs, LINEs, LTR elements and unclassified repeats compared to other shrimp genomes. As the assembly length varies (1.58&#x2013;2.39 Gb) among shrimp genomes, a comparison in terms of number of bases would be more appropriate rather on proportions. Among shrimp, though the repeat content varied between 768 and 1498 Mb, the non-repeat portion of the genome remained uniform between 813 and 981 Mb. The genome of <italic>P. monodon</italic> with the largest assembly length also has higher repeat length. The added evidence about presence of higher orthologous genes content in 1.66 and 1.58 Gb length of <italic>P. vannamei</italic> and <italic>P. chinensis</italic> genomes, respectively might indicate a higher proportion of repeat elements in the unassembled portion of these genomes. Nevertheless, assessing on proportion of assembled length or on actual base count, the <italic>P. indicus</italic> genome has the highest length of simple repeats among shrimp genomes. The high SSRs in the genome of <italic>P. indicus</italic> may be attributed to the sequence contiguity as shorter repeats get resolved in longer contigs. It is fascinating to observe high SSR content within the coding genes of <italic>P. indicus</italic> in comparison to other shrimp genomes (<xref ref-type="supplementary-material" rid="DS2">Supplementary Table 13</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 6</xref>). The SSR spans about 7.56% of coding sequences in <italic>P. indicus</italic> as against 1.12&#x2013;2.29% in other shrimp. The demonstrated role of SSRs in genomic plasticity of <italic>P. vannamei</italic> and <italic>P. chinensis</italic> shrimp (<xref ref-type="bibr" rid="B60">Yuan et al., 2021</xref>) would suggest the influence of high SSR on certain species-specific adaptive functions of <italic>P. indicus</italic>, which needs to be explored.</p>
<p>In shrimp, the role of SNPs was demonstrated for use in construction of linkage maps (<xref ref-type="bibr" rid="B2">Baranski et al., 2014</xref>; <xref ref-type="bibr" rid="B58">Yu et al., 2015</xref>; <xref ref-type="bibr" rid="B18">Jones et al., 2017</xref>) trait-specific association studies (<xref ref-type="bibr" rid="B43">Robinson et al., 2014</xref>; <xref ref-type="bibr" rid="B45">Santos et al., 2018</xref>; <xref ref-type="bibr" rid="B61">Zhang Q. et al., 2019</xref>; <xref ref-type="bibr" rid="B17">Janpoom et al., 2020</xref>), genetic characterization (<xref ref-type="bibr" rid="B37">Perez-Enriquez et al., 2018</xref>; <xref ref-type="bibr" rid="B56">Vu et al., 2020</xref>) and parentage testing (<xref ref-type="bibr" rid="B15">Henshall et al., 2014</xref>; <xref ref-type="bibr" rid="B46">Sellars et al., 2014</xref>). In this study, we report 2,572 non-synonymous SNPs, of which 46 might have potential to impact the functions of coding proteins. Earlier in <italic>P. monodon</italic>, majority of coding SNPs identified through pooled-sequencing approach proved to be real polymorphic sites and useful for QTL finding (<xref ref-type="bibr" rid="B43">Robinson et al., 2014</xref>). Therefore, we believe that the SNPs identified in this study with further stringent criteria would be real SNP sites with potential applications in genome-wide association studies.</p>
</sec>
<sec id="S5" sec-type="conclusion">
<title>Conclusion</title>
<p>We report the assembly of <italic>P. indicus</italic> genome which is the largest Crustacean genome assembly reported so far to meet the 1 Mb contig N50 and 10 Mb scaffold N50 quality metrics. The protein-coding gene prediction strategy followed in the current study which combines evidence from RNAseq, IsoSeq, <italic>ab initio</italic> methods and proteins from related species, has general application to other genomes. The contiguous assembly presented here would serve as reference for future genome-guided assemblies. Continuous improvements in sequencing technologies and bioinformatics approaches shall lead to a better understanding of abundant repetitive sequences especially of SSRs in shrimp genomes. The identified non-synonymous SNPs would be a valuable resource to construct custom genotyping panels useful in genome-wide association studies.</p>
</sec>
<sec id="S6" sec-type="data-availability">
<title>Data Availability Statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found below: <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/">https://www.ncbi.nlm.nih.gov/</ext-link>, <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="PRJNA495105">PRJNA495105</ext-link>; <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/">https://www.ncbi.nlm.nih.gov/</ext-link>, <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="PRJNA494937">PRJNA494937</ext-link>; <ext-link ext-link-type="uri" xlink:href="https://www.ebi.ac.uk/ena">https://www.ebi.ac.uk/ena</ext-link>, <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="PRJEB38936">PRJEB38936</ext-link>; <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/genbank/">https://www.ncbi.nlm.nih.gov/genbank/</ext-link>, <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="JAGYIC000000000">JAGYIC000000000</ext-link>; <ext-link ext-link-type="uri" xlink:href="https://figshare.com/">https://figshare.com/</ext-link>, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.6084/m9.figshare.14806308.v2">https://doi.org/10.6084/m9.figshare.14806308.v2</ext-link>.</p>
</sec>
<sec id="S7">
<title>Author Contributions</title>
<p>TM, JJ, VKo, and MS conceived and designed the study. DB generated the sequence data. VKa, AJ, KK, SP, and NK performed the genome assembly, repeat masking, genome annotation, and cSNP identification. AJ and NK performed the gene family analyses and phylogenetic analyses. MS and VKa wrote the manuscript with inputs from all other authors. All authors have reviewed the manuscript and accepted the final version.</p>
</sec>
<sec id="conf1" sec-type="COI-statement">
<title>Conflict of Interest</title>
<p>DB is employed by Nucleome Informatics Pvt. Ltd., Hyderabad, India. The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="pudiscl1" sec-type="disclaimer">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
</body>
<back>
<sec id="S8" sec-type="funding-information">
<title>Funding</title>
<p>The work is funded by the ICAR-CRP on Genomics, Indian Council of Agricultural Research, New Delhi, India.</p>
</sec>
<sec id="S9" sec-type="supplementary-material"><title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fmars.2021.808354/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fmars.2021.808354/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.docx" id="DS1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Data_Sheet_2.docx" id="DS2" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Data_Sheet_3.xlsx" id="DS3" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Altschul</surname> <given-names>S. F.</given-names></name> <name><surname>Gish</surname> <given-names>W.</given-names></name> <name><surname>Miller</surname> <given-names>W.</given-names></name> <name><surname>Myers</surname> <given-names>E. W.</given-names></name> <name><surname>Lipman</surname> <given-names>D. J.</given-names></name></person-group> (<year>1990</year>). <article-title>Basic local alignment search tool.</article-title> <source><italic>J. Mol. Biol.</italic></source> <volume>215</volume> <fpage>403</fpage>&#x2013;<lpage>410</lpage>. <pub-id pub-id-type="doi">10.1016/S0022-2836(05)80360-2</pub-id></citation></ref>
<ref id="B2"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Baranski</surname> <given-names>M.</given-names></name> <name><surname>Gopikrishna</surname> <given-names>G.</given-names></name> <name><surname>Robinson</surname> <given-names>N. A.</given-names></name> <name><surname>Katneni</surname> <given-names>V. K.</given-names></name> <name><surname>Shekhar</surname> <given-names>M. S.</given-names></name> <name><surname>Shanmugakarthik</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>The development of a high density linkage map for black tiger shrimp (Penaeus monodon) based on cSNPs.</article-title> <source><italic>PLoS One</italic></source> <volume>9</volume>:<issue>85413</issue>. <pub-id pub-id-type="doi">10.1371/journal.pone.0085413</pub-id> <pub-id pub-id-type="pmid">24465553</pub-id></citation></ref>
<ref id="B3"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bioinformatics</surname> <given-names>B.</given-names></name></person-group> (<year>2019</year>). <source><italic>OmicsBox-Bioinformatics made easy (Version 1.3. 3).</italic></source></citation></ref>
<ref id="B4"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bolger</surname> <given-names>A. M.</given-names></name> <name><surname>Lohse</surname> <given-names>M.</given-names></name> <name><surname>Usadel</surname> <given-names>B.</given-names></name></person-group> (<year>2014</year>). <article-title>Trimmomatic: a flexible trimmer for Illumina sequence data.</article-title> <source><italic>Bioinformatics</italic></source> <volume>30</volume> <fpage>2114</fpage>&#x2013;<lpage>2120</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btu170</pub-id> <pub-id pub-id-type="pmid">24695404</pub-id></citation></ref>
<ref id="B5"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Capella-Guti&#x00E9;rrez</surname> <given-names>S.</given-names></name> <name><surname>Silla-Mart&#x00ED;nez</surname> <given-names>J. M.</given-names></name> <name><surname>Gabald&#x00F3;n</surname> <given-names>T.</given-names></name></person-group> (<year>2009</year>). <article-title>trimAl: A tool for automated alignment trimming in large-scale phylogenetic analyses.</article-title> <source><italic>Bioinformatics</italic></source> <volume>25</volume> <fpage>1972</fpage>&#x2013;<lpage>1973</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp348</pub-id> <pub-id pub-id-type="pmid">19505945</pub-id></citation></ref>
<ref id="B6"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chow</surname> <given-names>S.</given-names></name> <name><surname>Dougherty</surname> <given-names>W. J.</given-names></name> <name><surname>Sandifer</surname> <given-names>P. A.</given-names></name></person-group> (<year>1990</year>). <article-title>Meiotic chromosome complements and nuclear DNA contents of four species of shrimps of the genus Penaeus.</article-title> <source><italic>J. Crustac. Biol.</italic></source> <volume>10</volume> <fpage>29</fpage>&#x2013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.1163/193724090X00221</pub-id></citation></ref>
<ref id="B7"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Darriba</surname> <given-names>D.</given-names></name> <name><surname>Taboada</surname> <given-names>G. L.</given-names></name> <name><surname>Doallo</surname> <given-names>R.</given-names></name> <name><surname>Posada</surname> <given-names>D.</given-names></name></person-group> (<year>2011</year>). <article-title>ProtTest 3: Fast selection of best-fit models of protein evolution.</article-title> <source><italic>Bioinformatics</italic></source> <volume>27</volume> <fpage>1164</fpage>&#x2013;<lpage>1165</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btr088</pub-id> <pub-id pub-id-type="pmid">21335321</pub-id></citation></ref>
<ref id="B8"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dudchenko</surname> <given-names>O.</given-names></name> <name><surname>Batra</surname> <given-names>S. S.</given-names></name> <name><surname>Omer</surname> <given-names>A. D.</given-names></name> <name><surname>Nyquist</surname> <given-names>S. K.</given-names></name> <name><surname>Hoeger</surname> <given-names>M.</given-names></name> <name><surname>Durand</surname> <given-names>N. C.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>De novo assembly of the Aedes aegypti genome using Hi-C yields chromosome-length scaffolds.</article-title> <source><italic>Science</italic></source> <volume>356</volume> <fpage>92</fpage>&#x2013;<lpage>95</lpage>. <pub-id pub-id-type="doi">10.1126/science.aal3327</pub-id> <pub-id pub-id-type="pmid">28336562</pub-id></citation></ref>
<ref id="B9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Edgar</surname> <given-names>R. C.</given-names></name></person-group> (<year>2004</year>). <article-title>MUSCLE: Multiple sequence alignment with high accuracy and high throughput.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>32</volume> <fpage>1792</fpage>&#x2013;<lpage>1797</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkh340</pub-id> <pub-id pub-id-type="pmid">15034147</pub-id></citation></ref>
<ref id="B10"><citation citation-type="journal"><collab>FAO</collab> (<year>2020</year>). <source><italic>Food Agric. Organ. United Nations.</italic></source> <ext-link ext-link-type="uri" xlink:href="http://www.fao.org/">http://www.fao.org/</ext-link></citation></ref>
<ref id="B11"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fischer</surname> <given-names>S.</given-names></name> <name><surname>Brunk</surname> <given-names>B. P.</given-names></name> <name><surname>Chen</surname> <given-names>F.</given-names></name> <name><surname>Gao</surname> <given-names>X.</given-names></name> <name><surname>Harb</surname> <given-names>O. S.</given-names></name> <name><surname>Iodice</surname> <given-names>J. B.</given-names></name><etal/></person-group> (<year>2011</year>). <article-title>Using OrthoMCL to assign proteins to OrthoMCL-DB groups or to cluster proteomes into new ortholog groups.</article-title> <source><italic>Curr. Protoc. Bioinforma.</italic></source> <volume>2011</volume> <fpage>1</fpage>&#x2013;<lpage>19</lpage>. <pub-id pub-id-type="doi">10.1002/0471250953.bi0612s35</pub-id> <pub-id pub-id-type="pmid">21901743</pub-id></citation></ref>
<ref id="B12"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gremme</surname> <given-names>G.</given-names></name></person-group> (<year>2012</year>). <source><italic>Computational gene structure prediction.</italic></source> <publisher-loc>dissertation Hamburg</publisher-loc>: <publisher-name>University of Hamburg</publisher-name>.</citation></ref>
<ref id="B13"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Haas</surname> <given-names>B. J.</given-names></name> <name><surname>Delcher</surname> <given-names>A. L.</given-names></name> <name><surname>Mount</surname> <given-names>S. M.</given-names></name> <name><surname>Wortman</surname> <given-names>J. R.</given-names></name> <name><surname>Smith</surname> <given-names>R. K.</given-names></name> <name><surname>Hannick</surname> <given-names>L. I.</given-names></name><etal/></person-group> (<year>2003</year>). <article-title>Improving the Arabidopsis genome annotation using maximal transcript alignment assemblies.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>31</volume> <fpage>5654</fpage>&#x2013;<lpage>5666</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkg770</pub-id> <pub-id pub-id-type="pmid">14500829</pub-id></citation></ref>
<ref id="B14"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Haas</surname> <given-names>B. J.</given-names></name> <name><surname>Salzberg</surname> <given-names>S. L.</given-names></name> <name><surname>Zhu</surname> <given-names>W.</given-names></name> <name><surname>Pertea</surname> <given-names>M.</given-names></name> <name><surname>Allen</surname> <given-names>J. E.</given-names></name> <name><surname>Orvis</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2008</year>). <article-title>Automated eukaryotic gene structure annotation using EVidenceModeler and the Program to Assemble Spliced Alignments.</article-title> <source><italic>Genome Biol.</italic></source> <volume>9</volume> <fpage>1</fpage>&#x2013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1186/gb-2008-9-1-r7</pub-id> <pub-id pub-id-type="pmid">18190707</pub-id></citation></ref>
<ref id="B15"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Henshall</surname> <given-names>J. M.</given-names></name> <name><surname>Dierens</surname> <given-names>L.</given-names></name> <name><surname>Sellars</surname> <given-names>M. J.</given-names></name></person-group> (<year>2014</year>). <article-title>Quantitative analysis of low-density SNP data for parentage assignment and estimation of family contributions to pooled samples.</article-title> <source><italic>Genet. Sel. Evol.</italic></source> <volume>46</volume> <fpage>1</fpage>&#x2013;<lpage>17</lpage>. <pub-id pub-id-type="doi">10.1186/s12711-014-0051-y</pub-id> <pub-id pub-id-type="pmid">25183297</pub-id></citation></ref>
<ref id="B16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Holthuis</surname> <given-names>L. B.</given-names></name> <name><surname>Collette</surname> <given-names>B. B.</given-names></name> <name><surname>Nauen</surname> <given-names>C. E.</given-names></name></person-group> (<year>1980</year>). <source><italic>FAO species catalogue.</italic></source> <publisher-loc>Rome</publisher-loc>: <publisher-name>FAO</publisher-name>.</citation></ref>
<ref id="B17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Janpoom</surname> <given-names>S.</given-names></name> <name><surname>Kaewduang</surname> <given-names>M.</given-names></name> <name><surname>Prasertlux</surname> <given-names>S.</given-names></name> <name><surname>Rongmung</surname> <given-names>P.</given-names></name> <name><surname>Ratdee</surname> <given-names>O.</given-names></name> <name><surname>Lirdwitayaprasit</surname> <given-names>T.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>A SNP of the hemocyanin gene (LvHc) is a marker for high growth and ammonia-tolerance in Pacific white shrimp Litopenaeus vannamei.</article-title> <source><italic>Fish Shellf. Immunol.</italic></source> <volume>106</volume> <fpage>491</fpage>&#x2013;<lpage>501</lpage>. <pub-id pub-id-type="doi">10.1016/j.fsi.2020.07.058</pub-id> <pub-id pub-id-type="pmid">32750547</pub-id></citation></ref>
<ref id="B18"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jones</surname> <given-names>D. B.</given-names></name> <name><surname>Jerry</surname> <given-names>D. R.</given-names></name> <name><surname>Khatkar</surname> <given-names>M. S.</given-names></name> <name><surname>Raadsma</surname> <given-names>H. W.</given-names></name> <name><surname>Steen</surname> <given-names>H.</given-names></name> <name><surname>Van Der Prochaska</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>A comparative integrated gene-based linkage and locus ordering by linkage disequilibrium map for the Pacific white shrimp.</article-title> <source><italic>Litopen. Vannamei. Sci. Rep.</italic></source> <volume>7</volume> <fpage>1</fpage>&#x2013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.1038/s41598-017-10515-7</pub-id> <pub-id pub-id-type="pmid">28871114</pub-id></citation></ref>
<ref id="B19"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jurka</surname> <given-names>J.</given-names></name> <name><surname>Kapitonov</surname> <given-names>V. V.</given-names></name> <name><surname>Pavlicek</surname> <given-names>A.</given-names></name> <name><surname>Klonowski</surname> <given-names>P.</given-names></name> <name><surname>Kohany</surname> <given-names>O.</given-names></name> <name><surname>Walichiewicz</surname> <given-names>J.</given-names></name></person-group> (<year>2005</year>). <article-title>Repbase Update, a database of eukaryotic repetitive elements.</article-title> <source><italic>Cytogenet. Genome Res.</italic></source> <volume>110</volume> <fpage>462</fpage>&#x2013;<lpage>467</lpage>. <pub-id pub-id-type="doi">10.1159/000084979</pub-id> <pub-id pub-id-type="pmid">16093699</pub-id></citation></ref>
<ref id="B20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kajitani</surname> <given-names>R.</given-names></name> <name><surname>Toshimoto</surname> <given-names>K.</given-names></name> <name><surname>Noguchi</surname> <given-names>H.</given-names></name> <name><surname>Toyoda</surname> <given-names>A.</given-names></name> <name><surname>Ogura</surname> <given-names>Y.</given-names></name> <name><surname>Okuno</surname> <given-names>M.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>Efficient de novo assembly of highly heterozygous genomes from whole-genome shotgun short reads.</article-title> <source><italic>Genome Res.</italic></source> <volume>24</volume> <fpage>1384</fpage>&#x2013;<lpage>1395</lpage>. <pub-id pub-id-type="doi">10.1101/gr.170720.113</pub-id> <pub-id pub-id-type="pmid">24755901</pub-id></citation></ref>
<ref id="B21"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kalbfleisch</surname> <given-names>T. S.</given-names></name> <name><surname>Rice</surname> <given-names>E. S.</given-names></name> <name><surname>DePriest</surname> <given-names>M. S.</given-names></name> <name><surname>Walenz</surname> <given-names>B. P.</given-names></name> <name><surname>Hestand</surname> <given-names>M. S.</given-names></name> <name><surname>Vermeesch</surname> <given-names>J. R.</given-names></name><etal/></person-group> (<year>2018</year>). <article-title>Improved reference genome for the domestic horse increases assembly contiguity and composition.</article-title> <source><italic>Commun. Biol.</italic></source> <volume>1</volume> <fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1038/s42003-018-0199-z</pub-id> <pub-id pub-id-type="pmid">30456315</pub-id></citation></ref>
<ref id="B22"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kanehisa</surname> <given-names>M.</given-names></name> <name><surname>Goto</surname> <given-names>S.</given-names></name></person-group> (<year>2000</year>). <article-title>KEGG: kyoto encyclopedia of genes and genomes.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>28</volume> <fpage>27</fpage>&#x2013;<lpage>30</lpage>. <pub-id pub-id-type="doi">10.1093/nar/28.1.27</pub-id> <pub-id pub-id-type="pmid">10592173</pub-id></citation></ref>
<ref id="B23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Katneni</surname> <given-names>V. K.</given-names></name> <name><surname>Shekhar</surname> <given-names>M. S.</given-names></name> <name><surname>Jangam</surname> <given-names>A. K.</given-names></name> <name><surname>Prabhudas</surname> <given-names>S. K.</given-names></name> <name><surname>Krishnan</surname> <given-names>K.</given-names></name> <name><surname>Kaikkolante</surname> <given-names>N.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Novel isoform sequencing based full-length transcriptome resource for indian white shrimp. Penaeus Indicus.</article-title> <source><italic>Front. Mar. Sci.</italic></source> <volume>7</volume> <fpage>1</fpage>&#x2013;<lpage>4</lpage>. <pub-id pub-id-type="doi">10.3389/fmars.2020.605098</pub-id></citation></ref>
<ref id="B24"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kent</surname> <given-names>W. J.</given-names></name></person-group> (<year>2002</year>). <article-title>BLAT&#x2014;The BLAST-like alignment tool.</article-title> <source><italic>Genome Res.</italic></source> <volume>12</volume> <fpage>656</fpage>&#x2013;<lpage>664</lpage>. <pub-id pub-id-type="doi">10.1101/gr.229202</pub-id> <pub-id pub-id-type="pmid">11932250</pub-id></citation></ref>
<ref id="B25"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname> <given-names>D.</given-names></name> <name><surname>Paggi</surname> <given-names>J. M.</given-names></name> <name><surname>Park</surname> <given-names>C.</given-names></name> <name><surname>Bennett</surname> <given-names>C.</given-names></name> <name><surname>Salzberg</surname> <given-names>S. L.</given-names></name></person-group> (<year>2019</year>). <article-title>Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype.</article-title> <source><italic>Nat. Biotechnol.</italic></source> <volume>37</volume> <fpage>907</fpage>&#x2013;<lpage>915</lpage>. <pub-id pub-id-type="doi">10.1038/s41587-019-0201-4</pub-id> <pub-id pub-id-type="pmid">31375807</pub-id></citation></ref>
<ref id="B26"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>K&#x00FC;ck</surname> <given-names>P.</given-names></name> <name><surname>Meusemann</surname> <given-names>K.</given-names></name></person-group> (<year>2010</year>). <article-title>FASconCAT: Convenient handling of data matrices.</article-title> <source><italic>Mol. Phylogenet. Evol.</italic></source> <volume>56</volume> <fpage>1115</fpage>&#x2013;<lpage>1118</lpage>. <pub-id pub-id-type="doi">10.1016/j.ympev.2010.04.024</pub-id> <pub-id pub-id-type="pmid">20416383</pub-id></citation></ref>
<ref id="B27"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Langmead</surname> <given-names>B.</given-names></name> <name><surname>Salzberg</surname> <given-names>S. L.</given-names></name></person-group> (<year>2012</year>). <article-title>Fast gapped-read alignment with Bowtie 2.</article-title> <source><italic>Nat. Methods</italic></source> <volume>9</volume>:<issue>357</issue>. <pub-id pub-id-type="doi">10.1038/nmeth.1923</pub-id> <pub-id pub-id-type="pmid">22388286</pub-id></citation></ref>
<ref id="B28"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name></person-group> (<year>2011</year>). <article-title>A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data.</article-title> <source><italic>Bioinformatics</italic></source> <volume>27</volume> <fpage>2987</fpage>&#x2013;<lpage>2993</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btr509</pub-id> <pub-id pub-id-type="pmid">21903627</pub-id></citation></ref>
<ref id="B29"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Durbin</surname> <given-names>R.</given-names></name></person-group> (<year>2009</year>). <article-title>Fast and accurate short read alignment with Burrows-Wheeler transform.</article-title> <source><italic>Bioinformatics</italic></source> <volume>25</volume> <fpage>1754</fpage>&#x2013;<lpage>1760</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp324</pub-id> <pub-id pub-id-type="pmid">19451168</pub-id></citation></ref>
<ref id="B30"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Handsaker</surname> <given-names>B.</given-names></name> <name><surname>Wysoker</surname> <given-names>A.</given-names></name> <name><surname>Fennell</surname> <given-names>T.</given-names></name> <name><surname>Ruan</surname> <given-names>J.</given-names></name> <name><surname>Homer</surname> <given-names>N.</given-names></name><etal/></person-group> (<year>2009</year>). <article-title>The sequence alignment/map format and SAMtools.</article-title> <source><italic>Bioinformatics</italic></source> <volume>25</volume> <fpage>2078</fpage>&#x2013;<lpage>2079</lpage>.</citation></ref>
<ref id="B31"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lomsadze</surname> <given-names>A.</given-names></name> <name><surname>Ter-Hovhannisyan</surname> <given-names>V.</given-names></name> <name><surname>Chernoff</surname> <given-names>Y. O.</given-names></name> <name><surname>Borodovsky</surname> <given-names>M.</given-names></name></person-group> (<year>2005</year>). <article-title>Gene identification in novel eukaryotic genomes by self-training algorithm.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>33</volume> <fpage>6494</fpage>&#x2013;<lpage>6506</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gki937</pub-id> <pub-id pub-id-type="pmid">16314312</pub-id></citation></ref>
<ref id="B32"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Low</surname> <given-names>W. Y.</given-names></name> <name><surname>Tearle</surname> <given-names>R.</given-names></name> <name><surname>Bickhart</surname> <given-names>D. M.</given-names></name> <name><surname>Rosen</surname> <given-names>B. D.</given-names></name> <name><surname>Kingan</surname> <given-names>S. B.</given-names></name> <name><surname>Swale</surname> <given-names>T.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>Chromosome-level assembly of the water buffalo genome surpasses human and goat genomes in sequence contiguity.</article-title> <source><italic>Nat. Commun.</italic></source> <volume>10</volume> <fpage>1</fpage>&#x2013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.1038/s41467-018-08260-0</pub-id> <pub-id pub-id-type="pmid">30651564</pub-id></citation></ref>
<ref id="B33"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>R.</given-names></name> <name><surname>Liu</surname> <given-names>B.</given-names></name> <name><surname>Xie</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>Z.</given-names></name> <name><surname>Huang</surname> <given-names>W.</given-names></name> <name><surname>Yuan</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2015</year>). <article-title>Erratum to SOAPdenovo2: An empirically improved memory-efficient short-read de novo assembler.</article-title> <source><italic>Gigascience</italic></source> <volume>4</volume>:<issue>30</issue>. <pub-id pub-id-type="doi">10.1186/s13742-015-0069-2</pub-id> <pub-id pub-id-type="pmid">26161257</pub-id></citation></ref>
<ref id="B34"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Michael</surname> <given-names>T. P.</given-names></name> <name><surname>Jupe</surname> <given-names>F.</given-names></name> <name><surname>Bemm</surname> <given-names>F.</given-names></name> <name><surname>Motley</surname> <given-names>S. T.</given-names></name> <name><surname>Sandoval</surname> <given-names>J. P.</given-names></name> <name><surname>Lanz</surname> <given-names>C.</given-names></name><etal/></person-group> (<year>2018</year>). <article-title>High contiguity Arabidopsis thaliana genome assembly with a single nanopore flow cell.</article-title> <source><italic>Nat. Commun.</italic></source> <volume>9</volume> <fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1038/s41467-018-03016-2</pub-id> <pub-id pub-id-type="pmid">29416032</pub-id></citation></ref>
<ref id="B35"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nishimura</surname> <given-names>O.</given-names></name> <name><surname>Hara</surname> <given-names>Y.</given-names></name> <name><surname>Kuraku</surname> <given-names>S.</given-names></name></person-group> (<year>2017</year>). <article-title>GVolante for standardizing completeness assessment of genome and transcriptome assemblies.</article-title> <source><italic>Bioinformatics</italic></source> <volume>33</volume> <fpage>3635</fpage>&#x2013;<lpage>3637</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btx445</pub-id> <pub-id pub-id-type="pmid">29036533</pub-id></citation></ref>
<ref id="B36"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Parra</surname> <given-names>G.</given-names></name> <name><surname>Bradnam</surname> <given-names>K.</given-names></name> <name><surname>Korf</surname> <given-names>I.</given-names></name></person-group> (<year>2007</year>). <article-title>CEGMA: A pipeline to accurately annotate core genes in eukaryotic genomes.</article-title> <source><italic>Bioinformatics</italic></source> <volume>23</volume> <fpage>1061</fpage>&#x2013;<lpage>1067</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btm071</pub-id> <pub-id pub-id-type="pmid">17332020</pub-id></citation></ref>
<ref id="B37"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Perez-Enriquez</surname> <given-names>R.</given-names></name> <name><surname>Robledo</surname> <given-names>D.</given-names></name> <name><surname>Houston</surname> <given-names>R. D.</given-names></name> <name><surname>Llera-Herrera</surname> <given-names>R.</given-names></name></person-group> (<year>2018</year>). <article-title>SNP markers for the genetic characterization of Mexican shrimp broodstocks.</article-title> <source><italic>Genomics</italic></source> <volume>110</volume> <fpage>423</fpage>&#x2013;<lpage>429</lpage>.</citation></ref>
<ref id="B38"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pertea</surname> <given-names>M.</given-names></name> <name><surname>Pertea</surname> <given-names>G. M.</given-names></name> <name><surname>Antonescu</surname> <given-names>C. M.</given-names></name> <name><surname>Chang</surname> <given-names>T. C.</given-names></name> <name><surname>Mendell</surname> <given-names>J. T.</given-names></name> <name><surname>Salzberg</surname> <given-names>S. L.</given-names></name></person-group> (<year>2015</year>). <article-title>StringTie enables improved reconstruction of a transcriptome from RNA-seq reads.</article-title> <source><italic>Nat. Biotechnol.</italic></source> <volume>33</volume> <fpage>290</fpage>&#x2013;<lpage>295</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.3122</pub-id> <pub-id pub-id-type="pmid">25690850</pub-id></citation></ref>
<ref id="B39"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Perumal</surname> <given-names>S.</given-names></name> <name><surname>Koh</surname> <given-names>C. S.</given-names></name> <name><surname>Jin</surname> <given-names>L.</given-names></name> <name><surname>Buchwaldt</surname> <given-names>M.</given-names></name> <name><surname>Higgins</surname> <given-names>E. E.</given-names></name> <name><surname>Zheng</surname> <given-names>C.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>A high-contiguity Brassica nigra genome localizes active centromeres and defines the ancestral Brassica genome.</article-title> <source><italic>Nat. Plants</italic></source> <volume>6</volume> <fpage>929</fpage>&#x2013;<lpage>941</lpage>. <pub-id pub-id-type="doi">10.1038/s41477-020-0735-y</pub-id> <pub-id pub-id-type="pmid">32782408</pub-id></citation></ref>
<ref id="B40"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pryszcz</surname> <given-names>L. P.</given-names></name> <name><surname>Gabald&#x00F3;n</surname> <given-names>T.</given-names></name></person-group> (<year>2016</year>). <article-title>Redundans: An assembly pipeline for highly heterozygous genomes.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>44</volume> <issue>e113</issue>. <pub-id pub-id-type="doi">10.1093/nar/gkw294</pub-id> <pub-id pub-id-type="pmid">27131372</pub-id></citation></ref>
<ref id="B41"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rambaut</surname> <given-names>A.</given-names></name></person-group> (<year>2009</year>). <source><italic>FigTree</italic>. <italic>Tree Figure Drawing Tool.</italic></source> Available online at: <ext-link ext-link-type="uri" xlink:href="http://tree.bio.ed.ac.uk/software/figtree/">http://tree.bio.ed.ac.uk/software/figtree/</ext-link> <comment>(accessed November 25, 2018)</comment>.</citation></ref>
<ref id="B42"><citation citation-type="journal"><collab>Reference Standard For Genome Biology</collab> (<year>2018</year>). <article-title>A reference standard for genome biology.</article-title> <source><italic>Nat. Biotechnol.</italic></source> <volume>36</volume>:<issue>1121</issue>.</citation></ref>
<ref id="B43"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Robinson</surname> <given-names>N. A.</given-names></name> <name><surname>Gopikrishna</surname> <given-names>G.</given-names></name> <name><surname>Baranski</surname> <given-names>M.</given-names></name> <name><surname>Katneni</surname> <given-names>V. K.</given-names></name> <name><surname>Shekhar</surname> <given-names>M. S.</given-names></name> <name><surname>Shanmugakarthik</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>QTL for white spot syndrome virus resistance and the sex-determining locus in the Indian black tiger shrimp (Penaeus monodon).</article-title> <source><italic>BMC Genomics</italic></source> <volume>15</volume>:<issue>731</issue>.</citation></ref>
<ref id="B44"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ruan</surname> <given-names>J.</given-names></name> <name><surname>Li</surname> <given-names>H.</given-names></name></person-group> (<year>2020</year>). <article-title>Fast and accurate long-read assembly with wtdbg2.</article-title> <source><italic>Nat. Methods</italic></source> <volume>17</volume> <fpage>155</fpage>&#x2013;<lpage>158</lpage>.</citation></ref>
<ref id="B45"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Santos</surname> <given-names>C. A.</given-names></name> <name><surname>Andrade</surname> <given-names>S. C. S.</given-names></name> <name><surname>Freitas</surname> <given-names>P. D.</given-names></name></person-group> (<year>2018</year>). <article-title>Identification of SNPs potentially related to immune responses and growth performance in Litopenaeus vannamei by RNA-seq analyses.</article-title> <source><italic>PeerJ</italic></source> <volume>2018</volume> <fpage>1</fpage>&#x2013;<lpage>19</lpage>. <pub-id pub-id-type="doi">10.7717/peerj.5154</pub-id> <pub-id pub-id-type="pmid">30013834</pub-id></citation></ref>
<ref id="B46"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sellars</surname> <given-names>M. J.</given-names></name> <name><surname>Dierens</surname> <given-names>L.</given-names></name> <name><surname>Mcwilliam</surname> <given-names>S.</given-names></name> <name><surname>Little</surname> <given-names>B.</given-names></name> <name><surname>Murphy</surname> <given-names>B.</given-names></name> <name><surname>Coman</surname> <given-names>G. J.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>Comparison of microsatellite and SNP DNA markers for pedigree assignment in Black Tiger shrimp, Penaeus monodon.</article-title> <source><italic>Aquac. Res.</italic></source> <volume>45</volume> <fpage>417</fpage>&#x2013;<lpage>426</lpage>. <pub-id pub-id-type="doi">10.1111/j.1365-2109.2012.03243.x</pub-id></citation></ref>
<ref id="B47"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Seppey</surname> <given-names>M.</given-names></name> <name><surname>Manni</surname> <given-names>M.</given-names></name> <name><surname>Zdobnov</surname> <given-names>E. M.</given-names></name></person-group> (<year>2019</year>). <source><italic>BUSCO: assessing genome assembly and annotation completeness in Gene prediction.</italic></source> <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Springer</publisher-name>, <fpage>227</fpage>&#x2013;<lpage>245</lpage>.</citation></ref>
<ref id="B48"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Stamatakis</surname> <given-names>A.</given-names></name></person-group> (<year>2014</year>). <article-title>RAxML version 8: a tool for phylogenetic analysis and post-analysis of large phylogenies.</article-title> <source><italic>Bioinformatics</italic></source> <volume>30</volume> <fpage>1312</fpage>&#x2013;<lpage>1313</lpage>.</citation></ref>
<ref id="B49"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Stanke</surname> <given-names>M.</given-names></name> <name><surname>Keller</surname> <given-names>O.</given-names></name> <name><surname>Gunduz</surname> <given-names>I.</given-names></name> <name><surname>Hayes</surname> <given-names>A.</given-names></name> <name><surname>Waack</surname> <given-names>S.</given-names></name> <name><surname>Morgenstern</surname> <given-names>B.</given-names></name></person-group> (<year>2006</year>). <article-title>AUGUSTUS: A b initio prediction of alternative transcripts.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>34</volume> <fpage>435</fpage>&#x2013;<lpage>439</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkl200</pub-id> <pub-id pub-id-type="pmid">16845043</pub-id></citation></ref>
<ref id="B50"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Swathi</surname> <given-names>A.</given-names></name> <name><surname>Shekhar</surname> <given-names>M. S.</given-names></name> <name><surname>Katneni</surname> <given-names>V. K.</given-names></name> <name><surname>Vijayan</surname> <given-names>K. K.</given-names></name></person-group> (<year>2018</year>). <article-title>Genome size estimation of brackishwater fishes and penaeid shrimps by flow cytometry.</article-title> <source><italic>Mol. Biol. Rep.</italic></source> <volume>45</volume> <fpage>951</fpage>&#x2013;<lpage>960</lpage>.</citation></ref>
<ref id="B51"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tang</surname> <given-names>B.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Liu</surname> <given-names>Q.</given-names></name> <name><surname>Zhang</surname> <given-names>H.</given-names></name> <name><surname>Jiang</surname> <given-names>S.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>High-quality genome assembly of eriocheir japonica sinensis reveals its unique genome evolution.</article-title> <source><italic>Front. Genet.</italic></source> <volume>10</volume>:<fpage>1</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.3389/fgene.2019.01340</pub-id> <pub-id pub-id-type="pmid">32010195</pub-id></citation></ref>
<ref id="B52"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tang</surname> <given-names>H.</given-names></name> <name><surname>Thomas</surname> <given-names>P. D.</given-names></name></person-group> (<year>2016</year>). <article-title>PANTHER-PSEP: Predicting disease-causing genetic variants using position-specific evolutionary preservation.</article-title> <source><italic>Bioinformatics</italic></source> <volume>32</volume> <fpage>2230</fpage>&#x2013;<lpage>2232</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btw222</pub-id> <pub-id pub-id-type="pmid">27193693</pub-id></citation></ref>
<ref id="B53"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Trapnell</surname> <given-names>C.</given-names></name> <name><surname>Roberts</surname> <given-names>A.</given-names></name> <name><surname>Goff</surname> <given-names>L.</given-names></name> <name><surname>Pertea</surname> <given-names>G.</given-names></name> <name><surname>Kim</surname> <given-names>D.</given-names></name> <name><surname>Kelley</surname> <given-names>D. R.</given-names></name><etal/></person-group> (<year>2012</year>). <article-title>Differential gene and transcript expression analysis of RNA-seq experiments with TopHat and Cufflinks.</article-title> <source><italic>Nat. Protoc.</italic></source> <volume>7</volume> <fpage>562</fpage>&#x2013;<lpage>578</lpage>. <pub-id pub-id-type="doi">10.1038/nprot.2012.016</pub-id> <pub-id pub-id-type="pmid">22383036</pub-id></citation></ref>
<ref id="B54"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Uengwetwanit</surname> <given-names>T.</given-names></name> <name><surname>Pootakham</surname> <given-names>W.</given-names></name> <name><surname>Nookaew</surname> <given-names>I.</given-names></name> <name><surname>Sonthirod</surname> <given-names>C.</given-names></name> <name><surname>Angthong</surname> <given-names>P.</given-names></name> <name><surname>Sittikankaew</surname> <given-names>K.</given-names></name><etal/></person-group> (<year>2021</year>). <article-title>A chromosome-level assembly of the black tiger shrimp (Penaeus monodon) genome facilitates the identification of growth-associated genes.</article-title> <source><italic>Mol. Ecol. Resour.</italic></source> <volume>2021</volume> <fpage>1</fpage>&#x2013;<lpage>21</lpage>. <pub-id pub-id-type="doi">10.1111/1755-0998.13357</pub-id> <pub-id pub-id-type="pmid">33586292</pub-id></citation></ref>
<ref id="B55"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Van Quyen</surname> <given-names>D.</given-names></name> <name><surname>Gan</surname> <given-names>H. M.</given-names></name> <name><surname>Lee</surname> <given-names>Y. P.</given-names></name> <name><surname>Nguyen</surname> <given-names>D. D.</given-names></name> <name><surname>Nguyen</surname> <given-names>T. H.</given-names></name> <name><surname>Tran</surname> <given-names>X. T.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Improved genomic resources for the black tiger prawn (Penaeus monodon).</article-title> <source><italic>Mar. Genomics</italic></source> <volume>52</volume>:<issue>100751</issue>. <pub-id pub-id-type="doi">10.1016/j.margen.2020.100751</pub-id> <pub-id pub-id-type="pmid">32033920</pub-id></citation></ref>
<ref id="B56"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vu</surname> <given-names>N. T. T.</given-names></name> <name><surname>Zenger</surname> <given-names>K. R.</given-names></name> <name><surname>Guppy</surname> <given-names>J. L.</given-names></name> <name><surname>Sellars</surname> <given-names>M. J.</given-names></name> <name><surname>Silva</surname> <given-names>C. N. S.</given-names></name> <name><surname>Kjeldsen</surname> <given-names>S. R.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Fine-scale population structure and evidence for local adaptation in Australian giant black tiger shrimp (Penaeus monodon) using SNP analysis.</article-title> <source><italic>BMC Genomics</italic></source> <volume>21</volume>:<fpage>1</fpage>&#x2013;<lpage>18</lpage>. <pub-id pub-id-type="doi">10.1186/s12864-020-07084-x</pub-id> <pub-id pub-id-type="pmid">32993495</pub-id></citation></ref>
<ref id="B57"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>T. D.</given-names></name> <name><surname>Watanabe</surname> <given-names>C. K.</given-names></name></person-group> (<year>2005</year>). <article-title>GMAP: A genomic mapping and alignment program for mRNA and EST sequences.</article-title> <source><italic>Bioinformatics</italic></source> <volume>21</volume> <fpage>1859</fpage>&#x2013;<lpage>1875</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bti310</pub-id> <pub-id pub-id-type="pmid">15728110</pub-id></citation></ref>
<ref id="B58"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Yuan</surname> <given-names>J.</given-names></name> <name><surname>Li</surname> <given-names>F.</given-names></name> <name><surname>Chen</surname> <given-names>X.</given-names></name> <name><surname>Zhao</surname> <given-names>Y.</given-names></name><etal/></person-group> (<year>2015</year>). <article-title>Genome survey and high-density genetic map construction provide genomic and genetic resources for the Pacific White Shrimp Litopenaeus vannamei.</article-title> <source><italic>Sci. Rep.</italic></source> <volume>5</volume> <fpage>1</fpage>&#x2013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1038/srep15612</pub-id> <pub-id pub-id-type="pmid">26503227</pub-id></citation></ref>
<ref id="B59"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yuan</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Liu</surname> <given-names>C.</given-names></name> <name><surname>Yu</surname> <given-names>Y.</given-names></name> <name><surname>Wei</surname> <given-names>J.</given-names></name> <name><surname>Li</surname> <given-names>F.</given-names></name><etal/></person-group> (<year>2018</year>). <article-title>Genomic resources and comparative analyses of two economical penaeid shrimp species, Marsupenaeus japonicus and Penaeus monodon.</article-title> <source><italic>Mar. Genomics</italic></source> <volume>39</volume> <fpage>22</fpage>&#x2013;<lpage>25</lpage>. <pub-id pub-id-type="doi">10.1016/j.margen.2017.12.006</pub-id></citation></ref>
<ref id="B60"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yuan</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Wang</surname> <given-names>M.</given-names></name> <name><surname>Sun</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>C.</given-names></name> <name><surname>Li</surname> <given-names>S.</given-names></name><etal/></person-group> (<year>2021</year>). <article-title>Simple sequence repeats drive genome plasticity and promote adaptive evolution in penaeid shrimp.</article-title> <source><italic>Commun. Biol.</italic></source> <volume>4</volume> <fpage>1</fpage>&#x2013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1038/s42003-021-01716-y</pub-id> <pub-id pub-id-type="pmid">33574498</pub-id></citation></ref>
<ref id="B61"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Q.</given-names></name> <name><surname>Yu</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>Q.</given-names></name> <name><surname>Liu</surname> <given-names>F.</given-names></name> <name><surname>Luo</surname> <given-names>Z.</given-names></name> <name><surname>Zhang</surname> <given-names>C.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>Identification of single nucleotide polymorphisms related to the resistance against acute hepatopancreatic necrosis disease in the pacific white shrimp litopenaeus vannamei by target sequencing approach.</article-title> <source><italic>Front. Genet.</italic></source> <volume>10</volume>:<fpage>1</fpage>&#x2013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.3389/fgene.2019.00700</pub-id> <pub-id pub-id-type="pmid">31428134</pub-id></citation></ref>
<ref id="B62"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Yuan</surname> <given-names>J.</given-names></name> <name><surname>Sun</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>S.</given-names></name> <name><surname>Gao</surname> <given-names>Y.</given-names></name> <name><surname>Yu</surname> <given-names>Y.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>Penaeid shrimp genome provides insights into benthic adaptation and frequent molting.</article-title> <source><italic>Nat. Commun.</italic></source> <volume>10</volume> <fpage>1</fpage>&#x2013;<lpage>14</lpage>.</citation></ref>
<ref id="B63"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Scheuring</surname> <given-names>C.</given-names></name> <name><surname>Zhang</surname> <given-names>H.-B.</given-names></name> <name><surname>Huan</surname> <given-names>P.</given-names></name> <name><surname>Wang</surname> <given-names>B.</given-names></name><etal/></person-group> (<year>2010</year>). <article-title>Construction and characterization of a bacterial artificial chromosome (BAC) library of Pacific white shrimp, Litopenaeus vannamei.</article-title> <source><italic>Mar. Biotechnol.</italic></source> <volume>12</volume> <fpage>141</fpage>&#x2013;<lpage>149</lpage>.</citation></ref>
<ref id="B64"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zimin</surname> <given-names>A. V.</given-names></name> <name><surname>Salzberg</surname> <given-names>S. L.</given-names></name></person-group> (<year>2020</year>). <article-title>The genome polishing tool POLCA makes fast and accurate corrections in genome assemblies.</article-title> <source><italic>PLoS Comput. Biol.</italic></source> <volume>16</volume>:<fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1007981</pub-id> <pub-id pub-id-type="pmid">32589667</pub-id></citation></ref>
</ref-list>
<fn-group>
<fn id="footnote1">
<label>1</label>
<p><ext-link ext-link-type="uri" xlink:href="https://github.com/PacificBiosciences/GenomicConsensus">https://github.com/PacificBiosciences/GenomicConsensus</ext-link></p></fn>
<fn id="footnote2">
<label>2</label>
<p><ext-link ext-link-type="uri" xlink:href="http://repeatmasker.org">http://repeatmasker.org</ext-link></p></fn>
<fn id="footnote3">
<label>3</label>
<p><ext-link ext-link-type="uri" xlink:href="https://github.com/TransDecoder">https://github.com/TransDecoder</ext-link></p></fn>
<fn id="footnote4">
<label>4</label>
<p><ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov">www.ncbi.nlm.nih.gov</ext-link></p></fn>
</fn-group>
</back>
</article>
