<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1308527</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2024.1308527</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Revisiting genomes of non-model species with long reads yields new insights into their biology and evolution</article-title>
<alt-title alt-title-type="left-running-head">Guiglielmoni et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2024.1308527">10.3389/fgene.2024.1308527</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Guiglielmoni</surname>
<given-names>Nad&#xe8;ge</given-names>
</name>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1526160/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Villegas</surname>
<given-names>Laura I.</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/2603992/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Kirangwa</surname>
<given-names>Joseph</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Schiffer</surname>
<given-names>Philipp H.</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/2281088/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff>
<institution>Institut f&#xfc;r Zoologie</institution>, <institution>Universit&#xe4;t zu K&#xf6;ln</institution>, <addr-line>Cologne</addr-line>, <country>Germany</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/23877/overview">Richard D. Emes</ext-link>, Nottingham Trent University, United Kingdom</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/650860/overview">Thomas Hackl</ext-link>, University of Groningen, Netherlands</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2170905/overview">Iraad F. Bronner</ext-link>, Wellcome Sanger Institute (WT), United Kingdom</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Nad&#xe8;ge Guiglielmoni, <email>nguiglie@uni-koeln.de</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>07</day>
<month>02</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1308527</elocation-id>
<history>
<date date-type="received">
<day>06</day>
<month>10</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>04</day>
<month>01</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Guiglielmoni, Villegas, Kirangwa and Schiffer.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Guiglielmoni, Villegas, Kirangwa and Schiffer</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>High-quality genomes obtained using long-read data allow not only for a better understanding of heterozygosity levels, repeat content, and more accurate gene annotation and prediction when compared to those obtained with short-read technologies, but also allow to understand haplotype divergence. Advances in long-read sequencing technologies in the last years have made it possible to produce such high-quality assemblies for non-model organisms. This allows us to revisit genomes, which have been problematic to scaffold to chromosome-scale with previous generations of data and assembly software. Nematoda, one of the most diverse and speciose animal phyla within metazoans, remains poorly studied, and many previously assembled genomes are fragmented. Using long reads obtained with Nanopore R10.4.1 and PacBio HiFi, we generated highly contiguous assemblies of a diploid nematode of the Mermithidae family, for which no closely related genomes are available to date, as well as a collapsed assembly and a phased assembly for a triploid nematode from the Panagrolaimidae family. Both genomes had been analysed before, but the fragmented assemblies had scaffold sizes comparable to the length of long reads prior to assembly. Our new assemblies illustrate how long-read technologies allow for a much better representation of species genomes. We are now able to conduct more accurate downstream assays based on more complete gene and transposable element predictions.</p>
</abstract>
<kwd-group>
<kwd>Nematoda</kwd>
<kwd>genomics</kwd>
<kwd>long reads</kwd>
<kwd>genome assembly</kwd>
<kwd>genome annotation</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational Genomics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>Over the past decade, the field of genome assembly has experienced major improvements fueled by the development of high throughput sequencing techniques and major increases in the length and accuracy of reads. Short-read sequencing prompted the release of many draft assemblies for a large variety of species. The limited length of these reads could only yield highly fragmented assemblies, which were sufficient for initial analyses of gene content, but could not account for the structure of genomes and often fell short on resolving repetitive regions (<xref ref-type="bibr" rid="B41">Rice and Green, 2019</xref>). Recent advances in genome assembly have been driven by the availability of long reads offered by Pacific Biosciences (PacBio) and Oxford Nanopore. While these reads initially had a high error rate, improvements of these technologies have drastically increased their accuracy to over 99%, with the release of PacBio HiFi reads (based on circular consensus sequencing) (<xref ref-type="bibr" rid="B51">Wenger et al. 2019</xref>) and Nanopore Q20&#x2b; (<xref ref-type="bibr" rid="B45">Sereika et al. 2021</xref>) reads obtained using R10.4.1 flow cells. These developments have brought draft assemblies to Megabase-level N50s (<xref ref-type="bibr" rid="B16">Guiglielmoni et al. 2022</xref>), illustrating their high contiguity, and have opened new possibilities for genome analyses. Assemblies obtained with long-read data not only have a higher gene completeness, but they can also provide a more comprehensive overview of repetitive regions and potentially allow for a better understanding of their structure, activity and dynamics (<xref ref-type="bibr" rid="B46">Shahid and Slotkin 2020</xref>). In addition, high-accuracy long reads can be used to discriminate alleles and generate phased assemblies, including all haplotypes (<xref ref-type="bibr" rid="B6">Cheng et al. 2021</xref>; <xref ref-type="bibr" rid="B39">Rautiainen et al. 2023</xref>), while low-accuracy long reads were only sufficient for collapsed assemblies (in which homologous chromosomes are represented by a single sequence) as errors could not be distinguished from alternative haplotypes.</p>
<p>Some hundred genome assemblies have been released thus far for the phylum Nematoda, yet only a few are high-quality assemblies and they offer a poor representation of the diversity of the taxon for which over 30,000 species have been described (<xref ref-type="bibr" rid="B17">Hodda 2022</xref>). In particular, efforts have focused on <italic>Caenorhabditis</italic> and parasitic species, leaving incomplete resources for understudied clades (<xref ref-type="bibr" rid="B23">Kumar et al. 2012b</xref>; <xref ref-type="bibr" rid="B22">Kumar et al. 2012a</xref>). In this paper, we focus on the genomes of two species at two extremities of the nematode phylogeny: the basal <italic>Romanomermis culicivorax</italic> (clade I) and the derived <italic>Panagrolaimus</italic> sp. PS1159 (clade IV).</p>
<p>The Enoplean nematode <italic>Romanomermis culicivorax</italic> is a member of the mermithidae family which includes over 100 described species (<xref ref-type="bibr" rid="B37">Presswell et al. 2015</xref>). It is an obligate parasite of various species of mosquito larvae (<xref ref-type="bibr" rid="B12">Giblin and Platzer 1985</xref>). Along other mermithid nematodes, it is presently employed for the biological control of malaria (<xref ref-type="bibr" rid="B36">Petersen et al. 1978</xref>; <xref ref-type="bibr" rid="B1">Abagli et al. 2019</xref>). Enoplean research often revolves around <italic>Trichinella spiralis</italic>, given its significance as a mammalian parasite (<xref ref-type="bibr" rid="B30">Mitreva et al. 2011</xref>). Among mermithidae, only two genomes are currently publicly available (<xref ref-type="bibr" rid="B4">Bhattarai et al. 2022</xref>; <xref ref-type="bibr" rid="B44">Schiffer et al. 2013</xref>). In contrast with the published assembly of the sexual <italic>R. culicivorax</italic>, the long-read genome assembly of the parthenogenetic <italic>Mermis nigrescens</italic> is more contiguous and contains approximately twice the repeat content and heterozygosity. The need for additional high-quality genomes is evident, not only to address resource gaps in the Enoplean class, but also to enable investigations into sexual evolution, genome structural variations, and host-parasite interactions within the mermithidae family.</p>
<p>
<italic>Panagrolaimus</italic> sp. PS1159 is a free-living nematode belonging to the Panagrolaimidae family. Members of this family have various reproductive modes including hermaphroditism, outcrossing between males and females and asexual reproduction through parthenogenesis (<xref ref-type="bibr" rid="B25">Lewis et al. 2009</xref>); <italic>Panagrolaimus</italic> sp. PS1159 is parthenogenetic. This strain has been isolated in North Carolina, United States by Paul Sterneberg, and is thought to be a triploid allopolyploid (3n &#x3d; 12) (<xref ref-type="bibr" rid="B43">Schiffer et al. 2019</xref>). Previous studies have found it shares a common origin of parthenogenesis with most <italic>Panagrolaimus</italic> asexual strains, from a hybridization event estimated to have occurred 1.3&#x2013;8.5 Million years ago (<xref ref-type="bibr" rid="B43">Schiffer et al. 2019</xref>; <xref ref-type="bibr" rid="B48">Shatilovich et al. 2023</xref>). To date, over 140 strains of the genus have been documented (NCBI Taxonomy Browser), however only nine, largely fragmented, draft genome assemblies are available on GenBank (accessed on 06.10.2023). This widely distributed group includes strains isolated from extreme environments such as Antarctica, the volcanic island of Surtsey and the Russian permafrost. Representatives of the genus from these locations have been found to be freezing-tolerant undergoing cryptobiosis (<xref ref-type="bibr" rid="B48">Shatilovich et al. 2023</xref>; <xref ref-type="bibr" rid="B29">McGill et al. 2015</xref>), and <italic>Panagrolaimus</italic> sp. PS1159 has also shown anhydrobiotic potential as a fast desiccation strategist (<xref ref-type="bibr" rid="B47">Shannon et al. 2005</xref>).</p>
<p>Short-read genome assemblies are available for both species, yet their high fragmentation impedes downstream analyses. Their scaffold N50s are limited to 17.6&#xa0;kb for <italic>R. culicivorax</italic> and 9.9&#xa0;kb for <italic>P.</italic> sp. PS1159. By contrast, these values would be the expected length for unassembled long reads nowadays. Although these draft assemblies provided a first insight into the genomics of these species, more contiguous assemblies can now be obtained using long reads. To reassemble these species, we chose to generate both PacBio HiFi and Nanopore sequencing data and to leverage distinct advantages of these technologies. For PacBio HiFi, we used an ultra-low input protocol with DNA extracted from only a few individuals and whole genome amplification. For Nanopore sequencing, we extracted DNA from large pools of individuals and selected the largest fragments. Using these heterogeneous long-read datasets, we produced new highly contiguous assemblies with increased completeness.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Materials and methods</title>
<sec id="s2-1">
<title>2.1 Pacific Biosciences HiFi sequencing</title>
<p>Up to 10 individuals were collected and washed in water, then flash-frozen using liquid nitrogen in a salt-based extraction buffer (Tris-HCl 100&#xa0;mM, ethylenediaminetetraacetic acid 50&#xa0;mM, NaCl 0.5&#xa0;M and sodium dodecylsulfate 1%). Samples were incubated overnight at 50&#xb0;C after addition of 5&#xa0;<italic>&#x3bc;</italic>L of proteinase K (Zymo Research D3001-2). DNA was precipitated using NaCl 5&#xa0;M, yeast tRNA and isopropanol, and incubated at room temperature for 30&#xa0;min, then pelleted at 18,000&#xa0;g for 20&#xa0;min (4&#xb0;C). The DNA was washed twice with 80% ethanol and spinned at 18,000&#xa0;g for 10&#xa0;min (4&#xb0;C). The DNA pellet was eluted in elution buffer (D3004-4-10 Zymo Research) and incubated at 50&#xb0;C for 10&#xa0;min. RNA was removed by incubating with RNAse (Qiagen, 19101) for 1&#xa0;h at (37&#xb0;C). DNA concentrations were quantified using a Qubit 4 fluoremeter with 1X dsDNA kit. HiFi libraries were prepared with the Express 2.0 Template kit (Pacific Biosciences, Menlo Park, CA, United States) and sequenced on a Sequel II/Sequel IIe instrument with 30&#xa0;h movie time. HiFi reads were generated using SMRT Link (v10, Pacific Biosciences, Menlo Park, CA, United States) with default parameters. Sequencing results are presented in <xref ref-type="sec" rid="s10">Supplementary Table S1</xref>.</p>
</sec>
<sec id="s2-2">
<title>2.2 Nanopore sequencing</title>
<p>
<italic>Romanomermis culicivorax</italic> worms were picked from moss material supplied by Prof. Dr Edward Platzer at University of California Riverside. <italic>Panagrolaimus</italic> sp. PS1159 worms (isolate from North Carolina, United States) were harvested from agar plates with water and pelleted at 5,000&#xa0;g for 5&#xa0;min. The <italic>P.</italic> sp. PS1159 pellet was re-suspended in a 1&#xa0;M sucrose solution used for bacterial decontamination (sucrose flotation). The sample was centrifuged at 1,000&#xa0;g for 3&#xa0;min, the upper 1&#xa0;mL of the supernatant containing the live clean worms was transferred to a new tube and diluted with nuclease-free water. The worms where pelleted again at 5,000&#xa0;g for 5&#xa0;min for further processing. Due to the large input, different DNA extraction protocols were tested as the salting-out protocol used for ultra-low input DNA extraction led to poor purity with many worms. Extractions with the Monarch DNA extraction kit also resulted in suboptimal OD260/230 values. Samples were incubated in cetyltrimethylammonium bromide (CTAB) buffer (polyvinylpyrrolidone 2%, Tris-HCl 100&#xa0;mM, ethylenediaminetetraacetic acid 25&#xa0;mM, NaCl 2&#xa0;M, CTAB 2%) supplemented with 25&#xa0;<italic>&#x3bc;</italic>L of proteinase K (Zymo Research D3001-2) for 1&#xa0;h (<italic>P.</italic> sp. PS1159) or 2&#xa0;h (<italic>R. culicivorax</italic>), until the individuals were dissolved. After further incubation for 10&#xa0;min with 1.0&#xa0;M potassium acetate, extracts were purified with phenol-chloroform-isoamyl alcohol 25:24:1, chloroform-isoamyl alcohol 24:1, centrifugation at 16,000&#xa0;g for 10&#xa0;min (room temperature) and AMPure XP beads (Agencourt). DNA was then incubated with RNAse cocktail enzyme mix (Thermo Fischer, AM2286) for 1&#xa0;h at 37 &#xb0;C. Prior trials of the same protocol without the potassium acetate step led to low OD260/230 values. DNA was fragmented in a 2&#xa0;mL low-bind round bottom Eppendorf tube using a sterile 3&#xa0;mm borosilicate bead (Z143928-1EA Merck) by vortexing for 1&#xa0;min at maximum speed as described in <xref ref-type="bibr" rid="B19">Koetsier and Cantor (2021)</xref>. Short fragments were removed using the Short Reads Eliminator (SRE) (Circulomics, Pacific Biosciences). The DNA samples were incubated with SRE buffer for 1&#xa0;h (50&#xb0;C), then the long fragments of DNA were pelleted at 10,000&#xa0;g for 30&#xa0;min (room temperature) and re-suspended in elution buffer. DNA concentrations were quantified using a Qubit 4 fluoremeter with 1X dsDNA kit.</p>
<p>Nanopore libraries were prepared using the Ligation Sequencing Kit LSK114 (Oxford Nanopore Technologies). The <italic>R. culicivorax</italic> library was loaded a first time on one R10.4 MinION flowcell. The library was recovered from the flowcell and reloaded after nuclease flush. The <italic>P.</italic> sp. PS1159 library was loaded 4 times (with nuclease flushes and fresh library loads) on one R10.4 MinION flowcell. Fast5 files were converted to Pod5 using pod5 v0.2.2. Basecalling was performed using Dorado v0.3.1 (<xref ref-type="bibr" rid="B34">Oxford Nanopore Technologies 2022</xref>) in duplex mode with model dna_r10.4.1_e8.2_400bps_supv4.2.0 and the reads were converted to fastq using SAMtools v1.6 (<xref ref-type="bibr" rid="B8">Danecek et al. 2021</xref>) with the module <monospace>samtools fastq</monospace>. This resulted in 5.7&#xa0;Gb of Nanopore reads for <italic>R. culicivorax</italic> (N50: 15.9&#xa0;kb) and 10.7&#xa0;Gb for <italic>P.</italic> sp. PS1159 (N50: 33.4&#xa0;kb) (<xref ref-type="sec" rid="s10">Supplementary Table S2</xref>). Adapters were trimmed using chopper v0.5.0 (<xref ref-type="bibr" rid="B9">De Coster and Rademakers 2023</xref>) with minimum quality <monospace>-q</monospace> set to default (for <italic>R. culicivorax</italic> and <italic>P.</italic> sp. PS1159) or 20 (for <italic>P.</italic> sp. PS1159).</p>
</sec>
<sec id="s2-3">
<title>2.3 RNA sequencing</title>
<p>RNA was extracted from <italic>R. culicivorax</italic> adults using a modified version of the protocol established by <xref ref-type="bibr" rid="B7">Chomczynski and Sacchi (1987)</xref>. Tissue pellets of approximately 10&#xa0;mg were transferred into 1&#xa0;mL Trimix and lysed using a homogeniser (Ultra-Turrax, IKA Werke GmbH) for 10&#xa0;min on ice. After addition of 200&#xa0;<italic>&#x3bc;</italic>L chloroform and incubation at room temperature for 5&#xa0;min, the sample was centrifuged for 10&#xa0;min at 15,000&#xa0;g. The aqueous phase was collected and supplemented with 0.025 volumes of 1&#xa0;M acidic acid and 0.5 volumes of pre-cooled 100% EtOH (&#x2212;20&#xb0;C). RNA was precipitated overnight at &#x2212;20&#xb0;C and then centrifuged at 15,000&#xa0;g for 20&#xa0;min. After removing the supernatant, the RNA pellet was dried for 10&#xa0;min and resuspended in 125&#xa0;<italic>&#x3bc;</italic>L of GU-mix and added 3.125&#xa0;<italic>&#x3bc;</italic>L 1M acidic acid, vortexed the sample and added 70&#xa0;<italic>&#x3bc;</italic>L 100% EtOH. RNA was precipitated overnight at &#x2212;20&#xb0;C and then centrifuged at 15,000&#xa0;g for 20&#xa0;min and washed twice with 500&#xa0;<italic>&#x3bc;</italic>L EtOH (70%). The RNA pellet was resuspended in 20&#xa0;<italic>&#x3bc;</italic>L DEPC-H2O and incubated at 65&#xb0;C for 5&#xa0;min. The quality of the total RNA was assessed using degenerative agarose-gel electrophoresis and a Nanodrop 1000 photometer (Agilent Inc.). RNA libraries were prepared using a TrueSeq RNA Sample Prep kit v2 (Illumina Inc.) and sequenced on Illumina HiSeq and MiSeq platforms (Illumina Inc.) at the Cologne Center for Genomics (CCG, Cologne, Germany). For <italic>Panagrolaimus</italic> sp. PS1159, publicly available Illumina RNA sequencing reads were used (SRR5253560) (<xref ref-type="bibr" rid="B43">Schiffer et al. 2019</xref>).</p>
</sec>
<sec id="s2-4">
<title>2.4 Long-read preliminary analyses</title>
<p>Quality and length of PacBio HiFi and Nanopore reads were plotted using Nanoplot v1.41.3 (<xref ref-type="bibr" rid="B9">De Coster and Rademakers 2023</xref>). Ploidy was estimated using Smudgeplot v0.2.2 (<xref ref-type="bibr" rid="B38">Ranallo-Benavidez et al. 2020</xref>) with the PacBio HiFi reads.</p>
</sec>
<sec id="s2-5">
<title>2.5 <italic>Romanomermis culicivorax</italic> long-read assembly</title>
<p>PacBio HiFi reads were assembled using Flye v2.9 (Kolmogorov et al. 2019) with parameter<monospace>&#x2013;pacbio-hifi</monospace>, hifiasm v0.19 (<xref ref-type="bibr" rid="B6">Cheng et al. 2021</xref>) with parameter <monospace>-l 3</monospace>, NextDenovo v2.5 (<xref ref-type="bibr" rid="B31">NextOmics 2019</xref>) with parameters <monospace>genome_size</monospace> &#x3d; <monospace>300m read_type</monospace> &#x3d; <monospace>hifi</monospace>, and wtdbg2 v2.5 (<xref ref-type="bibr" rid="B42">Ruan and Li 2020</xref>) with parameter <monospace>-x ccs</monospace>. For Nanopore reads, Canu v2.2 (<xref ref-type="bibr" rid="B21">Koren et al. 2017</xref>) was run with parameters <monospace>-nanopore genomeSize</monospace> &#x3d; <monospace>300m</monospace>, Flye v2.9 (<xref ref-type="bibr" rid="B20">Kolmogorov et al. 2019</xref>) with parameter<monospace>&#x2013;nano-hq</monospace>, NextDenovo v2.5 (<xref ref-type="bibr" rid="B31">NextOmics 2019</xref>) with parameters <monospace>genome_size</monospace> &#x3d; <monospace>300m read_type</monospace> &#x3d; <monospace>raw</monospace>, and wtdbg2 v2.5 (<xref ref-type="bibr" rid="B42">Ruan and Li 2020</xref>) with parameter <monospace>-x ont</monospace>. To combine PacBio HiFi and Nanopore reads, Nanopore reads longer than 15&#xa0;kb were selected using seqtk v1.3 (Li 2012) with the module <monospace>seqtk seq</monospace> and the parameter <monospace>-L 15000</monospace>. hifiasm v0.19 was run using the PacBio HiFi reads and Nanopore reads <inline-formula id="inf1">
<mml:math id="m1">
<mml:mo>&#x3e;</mml:mo>
</mml:math>
</inline-formula> 15&#xa0;kb with parameter <monospace>-l 3</monospace>. Assembly using Verkko v1.4 with default parameters failed.</p>
</sec>
<sec id="s2-6">
<title>2.6 <italic>Panagrolaimus</italic> sp. PS1159 long-read assembly</title>
<p>PacBio HiFi reads were assembled using Flye v2.9 (<xref ref-type="bibr" rid="B20">Kolmogorov et al. 2019</xref>) with parameter<monospace>&#x2013;pacbio-hifi</monospace> and with the option <monospace>&#x2013;keep-haplotypes</monospace>, hifiasm v0.19 (<xref ref-type="bibr" rid="B6">Cheng et al. 2021</xref>) was run with parameters<monospace>&#x2013;n-hap 3</monospace> and <monospace>-l</monospace> set to 0 and 3, NextDenovo v2.5 (<xref ref-type="bibr" rid="B31">NextOmics 2019</xref>) with parameters <monospace>genome_size</monospace> &#x3d; <monospace>300m read_type</monospace> &#x3d; <monospace>hifi</monospace>, and wtdbg2 v2.5 (<xref ref-type="bibr" rid="B42">Ruan and Li 2020</xref>) with parameter <monospace>-x ccs</monospace>. Nanopore reads with a quality higher than Q20 were selected using chopper. Different parameters were tested to adapt to the high accuracy and assemblies with highest contiguity and completeness were selected. Canu v2.2 (<xref ref-type="bibr" rid="B21">Koren et al. 2017</xref>) was run with parameters <monospace>-nanopore -corrected genomeSize</monospace> &#x3d; <monospace>300m</monospace>, Flye v2.9 (<xref ref-type="bibr" rid="B20">Kolmogorov et al. 2019</xref>) was run with parameters<monospace>&#x2013;nano-corr</monospace> and with the option <monospace>&#x2013;keep-haplotypes</monospace>, NextDenovo v2.5 (<xref ref-type="bibr" rid="B31">NextOmics 2019</xref>) was run with parameters <monospace>genome_size</monospace> &#x3d; <monospace>300m read_type</monospace> &#x3d; <monospace>hifi</monospace> and wtdbg2 v2.5 (<xref ref-type="bibr" rid="B42">Ruan and Li 2020</xref>) was run with parameter <monospace>-x ont</monospace>. To combine PacBio HiFi and Nanopore reads, Nanopore reads longer than 30&#xa0;kb were selected using seqtk v1.3 (<xref ref-type="bibr" rid="B26">Li 2012</xref>) with the module <monospace>seqtk seq</monospace> and the parameter <monospace>-L 30000</monospace>. hifiasm v0.19 (<xref ref-type="bibr" rid="B6">Cheng et al. 2021</xref>) was run using both datasets with parameters<monospace>&#x2013;n-hap 3</monospace> and <monospace>-l</monospace> set to 0 and 3. Verkko v1.4 (<xref ref-type="bibr" rid="B39">Rautiainen et al. 2023</xref>) was run with default parameters.</p>
</sec>
<sec id="s2-7">
<title>2.7 Assembly evaluation and post-processing</title>
<p>Assembly statistics were calculated using assembly-stats v1.0.1 (<xref ref-type="bibr" rid="B35">Sanger-Pathogens 2014</xref>). Ortholog completeness was computed using the Benchmarking Universal Single-Copy Orthologs (BUSCO) (<xref ref-type="bibr" rid="B28">Manni et al. 2021</xref>) tool v5.4.7 with parameter <monospace>-m genome</monospace> against the Metazoa odb10 and Nematoda odb10 lineages. PacBio HiFi reads were mapped against HiFi assemblies using minimap2 v2.24 (<xref ref-type="bibr" rid="B27">Li 2018</xref>) with parameters <monospace>-ax map-hifi</monospace> and Nanopore reads were mapped against the Nanopore and hybrid assemblies with parameters <monospace>-ax map-ont</monospace>. Mapped reads were sorted using SAMtools v1.6 with the module <monospace>samtools sort</monospace>. Contigs were aligned against the nt database using BLAST v2.13.0 (<xref ref-type="bibr" rid="B3">Altschul et al. 1990</xref>). The outputs were provided as input to BlobToolKit v4.1.5 (<xref ref-type="bibr" rid="B5">Challis et al. 2020</xref>), and contaminants identified as Proteobacteria, Actinobacteria, Actinomycetota and Bacteroidetes were subsequently removed; bacterial DNA is expected as these nematodes feed on bacteria. Reads were mapped again using minimap2 v2.24 and the output was provided to purge_dups v1.2.5 (<xref ref-type="bibr" rid="B14">Guan et al. 2020</xref>) to remove uncollapsed haplotypes. PacBio HiFi reads were used to purge HiFi-based assemblies, Nanopore reads for Nanopore-based assemblies, Nanopore reads for hybrid assemblies of <italic>Panagrolaimus</italic> sp. PS1159, and PacBio HiFi reads for hybrid assemblies of <italic>Romanomermis culicivorax</italic> (due to the low coverage of Nanopore reads).</p>
</sec>
<sec id="s2-8">
<title>2.8 Final scaffolding</title>
<p>
<italic>Romanomermis culicivorax</italic> was assembled following two pipelines: 1) the decontaminated NextDenovo PacBio HiFi contigs were purged once using purge_dups; 2) the decontamined hifiasm PacBio HiFi &#x2b; Nanopore contigs were purged twice; the assembly 1) was then scaffolded using RagTag v2.1.0 (<xref ref-type="bibr" rid="B2">Alonge et al. 2022</xref>) and the assembly 2) as reference. <italic>Panagrolaimus</italic> sp. PS1159 was also assembled using two pipelines: 1) the decontaminated hifiasm <monospace>-l 3</monospace> PacBio HiFi &#x2b; Nanopore contigs were purged twice using purge_dups; 2) the decontaminated Flye <monospace>&#x2013;keep-haplotypes</monospace> Nanopore contigs were purged twice; the assembly 1) was then scaffolded using RagTag v2.1.0 and the assembly 2) as reference. The decontaminated hifiasm <monospace>-l 0</monospace> PacBio HiFi &#x2b; Nanopore contigs were retained as a phased assembly.</p>
</sec>
<sec id="s2-9">
<title>2.9 Repeat and gene annotation</title>
<p>Repeats were annotated using the Extensive <italic>De novo</italic> TE Annotator (EDTA) pipeline v2.0.1 (<xref ref-type="bibr" rid="B33">Ou et al. 2019</xref>) with parameters<monospace>&#x2013;sensitive 1 &#x2013;anno 1</monospace>. This pipeline filters and combines predictions from LTRharvest (<xref ref-type="bibr" rid="B13">Gremme et al. 2013</xref>), LTR_FINDER (<xref ref-type="bibr" rid="B53">Xu and Wang 2007</xref>) LTR_retriever (<xref ref-type="bibr" rid="B32">Ou and Jiang 2018</xref>), HelitronScanner (<xref ref-type="bibr" rid="B52">Xiong et al. 2014</xref>), Generic Repeat Finder (<xref ref-type="bibr" rid="B49">Shi and Liang 2019</xref>), TIR-learner (<xref ref-type="bibr" rid="B50">Su et al. 2019</xref>) and produces a final transposable element library using RepeatModeler (<xref ref-type="bibr" rid="B10">Flynn et al. 2020</xref>). The output hardmasked assembly was converted into a softmasked assembly. RNA-seq reads were trimmed using Trim Galore v0.6.10 and mapped to the assemblies using hisat2 v2.2.1 (<xref ref-type="bibr" rid="B18">Kim et al. 2019</xref>). After sorting using SAMTools v1.6 (<xref ref-type="bibr" rid="B8">Danecek et al. 2021</xref>), the mapped reads were provided as input to BRAKER v3.0.3 (<xref ref-type="bibr" rid="B11">Gabriel et al. 2023</xref>) with parameters <monospace>&#x2013;gff3 &#x2013;UTR off</monospace>.</p>
</sec>
<sec id="s2-10">
<title>2.10 Downstream analyses</title>
<p>BUSCO v5.4.7 (<xref ref-type="bibr" rid="B28">Manni et al. 2021</xref>) was run on the annotated protein-coding genes using the option <monospace>-m proteins</monospace> against the Metazoa odb10 and Nematoda odb10 lineages. <italic>k</italic>-mer completeness of the assemblies was assessed based on the PacBio HiFi dataset using Merqury v1.3 (<xref ref-type="bibr" rid="B40">Rhie et al. 2020</xref>).</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>3 Results</title>
<sec id="s3-1">
<title>3.1 Initial long-read analyses</title>
<p>PacBio HiFi sequencing resulted in 37.5&#xa0;Gb of reads (N50: 12.6&#xa0;kb) for <italic>Romanomermis culicivorax</italic> and 29.2&#xa0;Gb (N50: 15.8&#xa0;kb) for <italic>Panagrolaimus</italic> sp. PS1159 (<xref ref-type="sec" rid="s10">Supplementary Table S1</xref>). Nanopore sequencing yielded 5.7&#xa0;Gb (N50: 15.9&#xa0;kb) for <italic>R. culicivorax</italic> and 10.7&#xa0;Gb (N50: 33.4&#xa0;kb) for <italic>P.</italic> sp. PS1159 (<xref ref-type="sec" rid="s10">Supplementary Table S2</xref>). While PacBio HiFi reads have a higher quality, Nanopore reads reach longer lengths, including some reads of 100&#x2b; kb (<xref ref-type="fig" rid="F1">Figure 1A</xref>). Ploidy analyses using Smudgeplot predicts <italic>R. culicivorax</italic> as a diploid genome, while <italic>P.</italic> sp. PS1159 is expected to be triploid (<xref ref-type="fig" rid="F1">Figure 1B</xref>). Nanopore reads with Q20&#x2b; quality were selected for initial assembly of <italic>P.</italic> sp. PS1159, but no quality threshold was applied for <italic>R. culicivorax</italic> Nanopore reads due to their limited amount. All PacBio HiFi reads were used for initial assemblies.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Initial analyses of the long reads. <bold>(A)</bold> Quality and length of Nanopore and PacBio HiFi reads. <bold>(B)</bold> <italic>k</italic>-mer analysis of the ploidy of the genomes using PacBio HiFi reads.</p>
</caption>
<graphic xlink:href="fgene-15-1308527-g001.tif"/>
</fig>
</sec>
<sec id="s3-2">
<title>3.2 High-quality long-read assemblies</title>
<p>Depending on the program used, assemblies of PacBio HiFi, and Nanopore reads yielded contigs with variable contiguity, and cumulative size (<xref ref-type="fig" rid="F2">Figure 2</xref>). For <italic>Romanomermis culicivorax</italic>, some PacBio HiFi assemblies had a size moderately above the Illumina assembly size of 322.8&#xa0;Mb (467.2 for NextDenovo, 404.8&#xa0;Mb for wtdbg2), but hifiasm and Flye produced oversized assemblies (969.3 Mb and 1.1&#xa0;Gb respectively). These large genome sizes could not be explained by bacterial contamination in the data coming from their environment, as there was almost none in HiFi assemblies (<xref ref-type="sec" rid="s10">Supplementary Figure S1</xref>). Nanopore assemblies were smaller: Flye and wtdbg2 assembly sizes were above the Illumina assembly size (499.3&#xa0;Mb and 398.2&#xa0;Mb), and Canu and NextDenovo assemblies were much shorter (114.9&#xa0;Mb and 101.7&#xa0;Mb). This is likely due to the low coverage of the Nanopore dataset, which was aggravated by a high amount of contamination from Proteobacteria and Bacteroidetes (<xref ref-type="sec" rid="s10">Supplementary Figure S2</xref>), and led to a suboptimal sequencing coverage for these assemblers. Therefore, it is expected for Flye and wtdbg2 to yield the most qualitative assemblies as they have been shown to be more robust with low-coverage datasets (<xref ref-type="bibr" rid="B15">Guiglielmoni et al. 2021</xref>). The hybrid assembly obtained using hifiasm is oversized (1.1&#xa0;Gb), similar to the PacBio-HiFi-only hifiasm assembly. N50s ranged from 108&#xa0;kb (wtdbg2, Nanopore) to 550&#xa0;kb (hifiasm, hybrid); although these values do not reach the Megabase level, they are still one order of magnitude larger than for the Illumina assembly (17.6&#xa0;kb).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Draft assembly statistics of PacBio HiFi reads (red), Nanopore reads (blue) and the two combined (purple) with assembly size (in Mb), N50 (in kb) and BUSCO completeness against the Metazoa and Nematoda lineages.</p>
</caption>
<graphic xlink:href="fgene-15-1308527-g002.tif"/>
</fig>
<p>For <italic>Panagrolaimus</italic> sp. PS1159, assemblies ranged from 128.4&#xa0;Mb (wtdbg2, PacBio HiFi) to 473.9&#xa0;Mb (Flye, Nanopore). Shorter assemblies correlated with a low number of duplicated BUSCO orthologs, suggesting that they would be collapsed assemblies, in which homologous chromosomes are represented by one sequence. Larger assemblies have a high number of duplicated BUSCO orthologs, indicating that haplotypes are separated. These values would match the expectation of a phased assembly with a size three times larger than a collapsed assembly, for a triploid genome. These draft assemblies were overall more contiguous than for <italic>R. culicivorax</italic>, with a minimum of 240&#xa0;kb (wtdbg2, Nanopore) and a maximum of 1.1&#xa0;Mb (Flye, Nanopore). In addition, Nanopore assemblies had fewer bacterial contaminants than PacBio HiFi assemblies (<xref ref-type="sec" rid="s10">Supplementary Figures S3, S4</xref>), likely owed to the supplementary sucrose decontamination step during library preparation. These read sets overall suffered much less from bacterial contamination than the Illumina data used in <xref ref-type="bibr" rid="B43">Schiffer et al. (2019)</xref>.</p>
<p>After decontamination, haplotig purging and scaffolding, high-quality assemblies were obtained for both species. Although long reads were not sufficient to reach chromosome level, the final assemblies had an N50 over 1&#xa0;Mb (1.1&#xa0;Mb for <italic>R. culicivorax</italic> and 3.1&#xa0;Mb for <italic>P.</italic> sp. PS1159) and their contiguity is drastically improved compared to Illumina assemblies (<xref ref-type="table" rid="T1">Table 1</xref>). Furthermore, their BUSCO scores against the Metazoa and Nematoda lineages were also improved. Interestingly, the nematode BUSCO score of <italic>R. culicivorax</italic> remained low (35.2%), despite a higher metazoan BUSCO score. This suggests that the genome could be lacking many orthologs that would be expected in nematodes. The assembly of <italic>R. culicivorax</italic> has a QV score of 54.97; the <italic>k</italic>-mer spectrum shows a mostly collapsed assembly with yet some remaining artefactual duplications (<xref ref-type="sec" rid="s10">Supplementary Figure S5</xref>). The assembly of <italic>P.</italic> sp. PS1159 has a QV score of 47.73 and the <italic>k</italic>-mer spectrum also supports a mostly collapsed assembly with limited artefactual duplications (<xref ref-type="sec" rid="s10">Supplementary Figure S6</xref>).</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Assembly statistics of previous (v1) and new (v2) versions of <italic>Romanomermis culicivorax</italic> and <italic>Panagrolaimus</italic> sp. PS1159.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="left"/>
<th align="center">
<italic>Romanomermis <italic>culicivorax</italic> v1</italic>
</th>
<th align="center">
<italic>Romanomermis</italic> <italic>culicivorax</italic> v2</th>
<th align="center">
<italic>Panagrolaimus</italic>PS1159 v1</th>
<th align="center">
<italic>Panagrolaimus</italic>PS1159 v2</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Assembly size</td>
<td align="center">322.8&#xa0;Mb</td>
<td align="center">359.1&#xa0;Mb</td>
<td align="center">85.0&#xa0;Mb</td>
<td align="center">101.2&#xa0;Mb</td>
</tr>
<tr>
<td align="left">Number of scaffolds</td>
<td align="center">62,537</td>
<td align="center">595</td>
<td align="center">17,628</td>
<td align="center">67</td>
</tr>
<tr>
<td align="left">N50</td>
<td align="center">17.6&#xa0;kb</td>
<td align="center">1.0&#xa0;Mb</td>
<td align="center">9.9&#xa0;kb</td>
<td align="center">3.1&#xa0;Mb</td>
</tr>
<tr>
<td align="left">L50</td>
<td align="center">4,624</td>
<td align="center">114</td>
<td align="center">2,232</td>
<td align="center">11</td>
</tr>
<tr>
<td align="left">N90</td>
<td align="center">2.2&#xa0;kb</td>
<td align="center">315.0&#xa0;kb</td>
<td align="center">2.0&#xa0;kb</td>
<td align="center">1.1&#xa0;Mb</td>
</tr>
<tr>
<td align="left">L90</td>
<td align="center">26,088</td>
<td align="center">344</td>
<td align="center">9,419</td>
<td align="center">32</td>
</tr>
<tr>
<td align="left">Number of gaps</td>
<td align="center">303,605</td>
<td align="center">716</td>
<td align="center">49,960</td>
<td align="center">140</td>
</tr>
<tr>
<td align="left">Number of Ns</td>
<td align="center">55.1&#xa0;Mb</td>
<td align="center">70.6&#xa0;kb</td>
<td align="center">1.7&#xa0;Mb</td>
<td align="center">12.7&#xa0;kb</td>
</tr>
<tr>
<td align="left">BUSCO score (Metazoa)</td>
<td align="center">66.7%</td>
<td align="center">68.4%</td>
<td align="center">60.2%</td>
<td align="center">65.9%</td>
</tr>
<tr>
<td align="left">Single-copy orthologs</td>
<td align="center">66.5%</td>
<td align="center">65.2%</td>
<td align="center">57.0%</td>
<td align="center">57.2%</td>
</tr>
<tr>
<td align="left">Duplicated orthologs</td>
<td align="center">0.2%</td>
<td align="center">3.2%</td>
<td align="center">3.2%</td>
<td align="center">8.7%</td>
</tr>
<tr>
<td align="left">Fragmented orthologs</td>
<td align="center">14.7%</td>
<td align="center">9.2%</td>
<td align="center">10.4%</td>
<td align="center">7.4%</td>
</tr>
<tr>
<td align="left">BUSCO score (Nematoda)</td>
<td align="center">35.2%</td>
<td align="center">39.4%</td>
<td align="center">59.7%</td>
<td align="center">66.6%</td>
</tr>
<tr>
<td align="left">Single-copy orthologs</td>
<td align="center">34.0%</td>
<td align="center">37.3%</td>
<td align="center">57.1%</td>
<td align="center">58.7%</td>
</tr>
<tr>
<td align="left">Duplicated orthologs</td>
<td align="center">1.2%</td>
<td align="center">2.1%</td>
<td align="center">2.6%</td>
<td align="center">7.9%</td>
</tr>
<tr>
<td align="left">Fragmented orthologs</td>
<td align="center">4.4%</td>
<td align="center">4.1%</td>
<td align="center">4.8%</td>
<td align="center">4.4%</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3-3">
<title>3.3 Repeat and gene annotation</title>
<p>Repetitions were better resolved in the new long-read assemblies, than in the originally published ones (<xref ref-type="fig" rid="F3">Figure 3</xref>). 68.2% of repeats were identified in the assembly of <italic>R. culicivorax</italic>, bringing it closer to the repetitive content of <italic>Mermis negrescens</italic>. The assembly of <italic>P.</italic> sp. PS1159 only has 16.0%, which is also higher than the 7.2% of repeats in the Illumina assembly. Many transposable elements (TE) were recovered in these improved assemblies that were undetected in Illumina assemblies. Notably, more long terminal repeats (LTR) were identified in <italic>R. culicivorax</italic>, the number of target inverted repeats was greatly increased, and 6.5&#xa0;Mb of polintons were uncovered while they were almost absent in the Illumina assembly (<xref ref-type="sec" rid="s10">Supplementary Table S3</xref>). The load of transposable elements is much lower in <italic>P.</italic> sp. PS1159 but still has a wider variety of LTRs, TIRs, helitrons and other elements than the Illumina assembly. Gene prediction resulted in 16,689 annotated genes for <italic>R. culicivorax</italic>, with overall BUSCO scores of 77.6% (Metazoa) and 56.2% (Nematoda), and 27,203 annotated genes for <italic>P.</italic> sp. PS1159 with overall BUSCO scores of 77.3% (Metazoa) and 78.6% (Nematoda). As expected, these annotations are more complete than the ones published with the previous Illumina assemblies.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Comparison of assemblies based on TE (top) count and BUSCO ortholog (bottom) statistics for the protein annotations (against the Metazoa and Nematoda lineages) shows higher repeat and gene completeness of the new assemblies.</p>
</caption>
<graphic xlink:href="fgene-15-1308527-g003.tif"/>
</fig>
</sec>
<sec id="s3-4">
<title>3.4 Orthology analyses of the phased assembly of <italic>Panagrolaimus</italic> sp. PS1159</title>
<p>To re-analyse the <italic>Panagrolaimus</italic> sp. PS1159 in regard to being a triploid genome, we selected the hybrid hifiasm assembly as a phased candidate. After decontamination, this assembly has a size of 264.7 Mb, 876 contigs and an N50 of 559&#xa0;kb. The assembly&#x2019;s BUSCO scores have high numbers of duplicated orthologs: 1.8% single-copy orthologs and 65.3% duplicated orthologs against Metazoa; 2.1% single-copy orthologs and 66.3% duplicated orthologs against Nematoda. The <italic>k</italic>-mer spectrum shows that the assembly has <italic>k</italic>-mers represented once, twice, or in three copies in the three different peaks at 50X, 100X and 150X (<xref ref-type="sec" rid="s10">Supplementary Figure S7</xref>), which is expected for a phased triploid genome assembly. In addition, the QV score reaches 48.18. Annotation resulted in 70,448 predicted genes, with BUSCO scores of 78.1% (77.4% duplicated) against Metazoa and 79.7% (78.7% duplicated) against Nematoda. We analyzed the number of ortholog copies from the annotated genes in the collapsed and phased assemblies (<xref ref-type="fig" rid="F4">Figure 4</xref>), considering that orthologs used by BUSCO are expected as single copy. For the collapsed assembly, most orthologs are in only one copy. In the phased assembly, the majority of orthologs are in three copies, as there would be one copy for each haplotype. This brings further support to the triploidy of <italic>P.</italic> sp. PS1159.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Ortholog analysis of <italic>Panagrolaimus</italic> sp. PS1159 collapsed and phased assemblies supports triploidy. The histograms represent the number of orthologs with their copy number from the lineages Metazoa and Nematoda identified in the protein-coding genes annotated for <italic>P.</italic> sp. PS1159. In the collapsed assembly, the majority of orthologs are present in a single copy, while most orthologs are in three copies in the phased assembly.</p>
</caption>
<graphic xlink:href="fgene-15-1308527-g004.tif"/>
</fig>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>4 Discussion</title>
<p>Our new long-read assemblies for <italic>Romanomermis culicivorax</italic> and <italic>Panagrolaimus</italic> sp. PS1159 provide a drastic improvement to the previously published short-read-based assemblies, with higher contiguity, improved repeat resolution, and more accurate gene annotation. Furthermore, we generated a draft phased assembly of <italic>Panagrolaimus</italic> sp. PS1159, which opens new possibilites for haplotype-specicific analyses. With the addition of long-range sequencing data, such as chromosome conformation capture, we can expect to scaffold these high-quality assemblies into chromosome candidates and further investigate genome structures.</p>
<p>The first challenge consisted in generating PacBio HiFi and Nanopore sequencing data for these two non-model species. The resulting reads clearly highlight the strengths of these technologies: while Nanopore reads provide an advantage on length, PacBio HiFi reads have the highest accuracy. It should be noted however that the overall accuracy of Nanopore reads has increased compared to data from R9.4.1 flowcells (<xref ref-type="bibr" rid="B15">Guiglielmoni et al. 2021</xref>) and were sufficient to produce assemblies with a high BUSCO completeness. Ultra-low input PacBio HiFi sequencing resulted in large datasets (over 29&#xa0;Gb) despite the use of only a few individuals, and also led to high-quality draft assemblies. This amplification-based approach can be favored when the DNA availability for a species is limited, and for instance for nematodes which cannot be cultured. It should be considered however that amplification protocols can lead to a bias in the sequencing data. To better understand the impact of amplification bias on assemblies, additional PacBio HiFi reads without amplification from a large pool of individuals could be generated in future experiments.</p>
<p>Most initial assemblies improved on the published Illumina assemblies of the two species. The oversized PacBio HiFi assemblies of <italic>R. culicivorax</italic> could be attributed to the use of several individuals combined with the high accuracy of PacBio HiFi reads, leading to the separation of multiple haplotypes in heterozygous regions. Based on the quality of the assemblies obtained from the ultra-low input PacBio HiFi reads, we can expect that further improvements would enable the generation of data from a single individual, which would prevent issues introduced by alternative haplotypes and could additionally be used to generate a phased assembly. Nanopore assemblies did not have similar large sizes which may be owed to the lower accuracy of Nanopore reads which did not discriminate alternative haplotypes. For <italic>P.</italic> sp. PS1159, the Nanopore dataset was large enough to select for the more accurate Q20&#x2b; reads; therefore, haplotypes could be separated in both PacBio HiFi and Nanopore assemblies. In fact, almost all draft assemblies had the three haplotypes mostly separated with sizes close to 300&#xa0;Mb (which would be the expected phased assembly size) and most BUSCO orthologs in multiple copies. Regarding contiguity, <italic>R. culicivorax</italic> assemblies were generally less contiguous than <italic>P.</italic> sp. PS1159 assemblies, which might be attributed to the higher repetitive content of this genome.</p>
<p>The most striking improvement in these assemblies lies in the resolution of repetitive regions. For both species, the percentage of repetitions in the genomes increased and revealed a wider variety of transposable elements. The comparison highlights that these transposable elements were in fact almost absent in the assembly of <italic>P.</italic> sp. PS1159 and very partially recovered in the assembly of <italic>R. culicivorax</italic>. Considering that TEs represent 289&#xa0;Mb of the 359-Mb genome, we can estimate that a large aspect of this genome was completely overlooked in the past. A recent study has shown that genome assemblies from basal nematodes contain more repeats (ranging from 23.4% up to 50.6% repeats) than nematodes belonging to other clades (ranging from 0.8 %p to 31%) (<xref ref-type="bibr" rid="B24">Lee et al. 2023</xref>). The results here presented are consistent with the previous findings as <italic>R. culicivorax</italic>, a basal nematode, showed a high repeat and TE content and the derived <italic>P.</italic> sp. PS1159 has a low repeat and TE content. These variations and the better resolution of repetitions in long-read assembly should prompt further investigation into TE contents through nematode evolution.</p>
<p>The numbers of annotated genes for version 1 and 2 of <italic>P.</italic> sp. PS1159 are similar (26,760 genes v. 27,203), yet this number shrank for <italic>R. culicivorax</italic>: while the first assembly had 48,376 annotated genes, the long-read assembly has 16,689. This did not lead to a decrease in ortholog completeness as the BUSCO scores of the new assemblies and annotations both reached higher values. Interestingly, the score of <italic>R. culicivorax</italic> against the Metazoa lineage is slightly higher than <italic>P.</italic> sp. PS1159, but its score against the Nematoda lineage is low with a value of only 39.4%. As a matter of fact, the Nematoda dataset is composed of seven nematode species, out of which only one is a basal nematode (<italic>Trichinella spiralis</italic>). The lack of representation of early branching nematodes could explain the lower BUSCO completeness on basal nematodes genomes when compared to representatives of higher clades like <italic>P.</italic> sp. PS1159, and illustrates the bias of current genomics resources. Early branching nematode genomes are scarce: even at the subclass level, genome assemblies are available on GenBank for only 15 Dorylaimia species and four Enoplia species (accessed on 06.10.2023). Therefore this study brings crucial resources to guide future sequencing projects for understudied nematodes and to fill the gaps among available assemblies.</p>
<p>The use of high-accuracy long reads permitted the generation of a first draft phased assembly of <italic>P.</italic> sp. PS1159. This assembly, combined with <italic>k</italic>-mer predictions based on PacBio HiFi reads and the analysis of <xref ref-type="bibr" rid="B43">Schiffer et al. (2019)</xref>, confirms that this species has a triploid genome. Considering the potential hybridization which could have introduced this third copy, a haplotype-resolved assembly is especially warranted to identify the original and newly acquired alleles. These analyses demonstrate the feasibility of long-read collapsed and phased assemblies for challenging genomes of understudied nematode species, including in the context of high repetitiveness and polyploidy. We gained new insights into these genomes regarding their gene and repeat content, which pave the way for more in-depth comparative genomics.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found below: <ext-link ext-link-type="uri" xlink:href="https://www.ebi.ac.uk/ena">https://www.ebi.ac.uk/ena</ext-link>, PRJEB66727.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>NG: Conceptualization, Data curation, Formal Analysis, Funding acquisition, Investigation, Methodology, Visualization, Writing&#x2013;original draft, Writing&#x2013;review and editing. LV: Data curation, Formal Analysis, Investigation, Methodology, Visualization, Writing&#x2013;original draft, Writing&#x2013;review and editing. JK: Data curation, Formal Analysis, Investigation, Visualization, Writing&#x2013;original draft, Writing&#x2013;review and editing. PS: Conceptualization, Funding acquisition, Supervision, Writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s7">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This project was supported through a DFG Emmy Noether Program (ENP) Projekt (434028868) and the DFG funded project B08 in the CRC1211 (268236062) to PHS. NG&#x2019;s position was first funded through a Deutsche Forschungsgemeinschaft (DFG) grant (458953049) to PHS and subsequently through the European Union&#x2019;s Horizon Europe research and innovation programme under the Marie Sk&#x142;odowska-Curie grant agreement No. 101110569.</p>
</sec>
<ack>
<p>We thank Christopher Kraus for his contribution to RNA sequencing and the Cologne Center for Genomics and the Genomics and Transcriptomics Laboratory for generation of sequencing data.</p>
</ack>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2024.1308527/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2024.1308527/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abagli</surname>
<given-names>A. Z.</given-names>
</name>
<name>
<surname>Alavo</surname>
<given-names>T. B.</given-names>
</name>
<name>
<surname>Perez-Pacheco</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Platzer</surname>
<given-names>E. G.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Efficacy of the mermithid nematode, <italic>Romanomermis iyengari</italic>, for the biocontrol of <italic>Anopheles gambiae</italic>, the major malaria vector in sub-saharan africa</article-title>. <source>Parasites &#x26; Vectors</source> <volume>12</volume>, <fpage>253</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1186/s13071-019-3508-6</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alonge</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lebeigle</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Kirsche</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Jenike</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Ou</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Aganezov</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Automated assembly scaffolding using RagTag elevates a new tomato system for high-throughput genome editing</article-title>. <source>Genome Biology</source> <volume>23</volume>, <fpage>258</fpage>&#x2013;<lpage>19</lpage>. <pub-id pub-id-type="doi">10.1186/s13059-022-02823-7</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Altschul</surname>
<given-names>S. F.</given-names>
</name>
<name>
<surname>Gish</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Miller</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Myers</surname>
<given-names>E. W.</given-names>
</name>
<name>
<surname>Lipman</surname>
<given-names>D. J.</given-names>
</name>
</person-group> (<year>1990</year>). <article-title>Basic local alignment search tool</article-title>. <source>Journal of Molecular Biology</source> <volume>215</volume>, <fpage>403</fpage>&#x2013;<lpage>410</lpage>. <pub-id pub-id-type="doi">10.1016/S0022-2836(05)80360-2</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bhattarai</surname>
<given-names>U. R.</given-names>
</name>
<name>
<surname>Poulin</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Gemmell</surname>
<given-names>N. J.</given-names>
</name>
<name>
<surname>Dowle</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Genome assembly and annotation of the mermithid nematode <italic>Mermis nigrescens</italic>
</article-title>. <comment>bioRxiv</comment>. <pub-id pub-id-type="doi">10.1101/2022.11.05.515230</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Challis</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Richards</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Rajan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cochrane</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Blaxter</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Blobtoolkit&#x2013;interactive quality assessment of genome assemblies</article-title>. <source>G3: Genes, Genomes, Genetics</source> <volume>10</volume>, <fpage>1361</fpage>&#x2013;<lpage>1374</lpage>. <pub-id pub-id-type="doi">10.1534/g3.119.400908</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Concepcion</surname>
<given-names>G. T.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Haplotype-resolved <italic>de novo</italic> assembly using phased assembly graphs with hifiasm</article-title>. <source>Nature Methods</source> <volume>18</volume>, <fpage>170</fpage>&#x2013;<lpage>175</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-020-01056-5</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chomczynski</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Sacchi</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>1987</year>). <article-title>Single-step method of RNA isolation by acid guanidinium thiocyanate-phenol-chloroform extraction</article-title>. <source>Analytical Biochemistry</source> <volume>162</volume>, <fpage>156</fpage>&#x2013;<lpage>159</lpage>. <pub-id pub-id-type="doi">10.1006/abio.1987.9999</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Danecek</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Bonfield</surname>
<given-names>J. K.</given-names>
</name>
<name>
<surname>Liddle</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Marshall</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ohan</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Pollard</surname>
<given-names>M. O.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Twelve years of SAMtools and BCFtools</article-title>. <source>GigaScience</source> <volume>10</volume>, <fpage>Giab008</fpage>. <pub-id pub-id-type="doi">10.1093/gigascience/giab008</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>De Coster</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Rademakers</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>NanoPack2: population-scale evaluation of long-read sequencing data</article-title>. <source>Bioinformatics</source> <volume>39</volume>, <fpage>btad311</fpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btad311</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Flynn</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Hubley</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Goubert</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Rosen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Clark</surname>
<given-names>A. G.</given-names>
</name>
<name>
<surname>Feschotte</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>RepeatModeler2 for automated genomic discovery of transposable element families</article-title>. <source>Proceedings of the National Academy of Sciences</source> <volume>117</volume>, <fpage>9451</fpage>&#x2013;<lpage>9457</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1921046117</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gabriel</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Bruna</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Hoff</surname>
<given-names>K. J.</given-names>
</name>
<name>
<surname>Ebel</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lomsadze</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Borodovsky</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>BRAKER3: Fully Automated Genome Annotation Using RNA-Seq and Protein Evidence with GeneMark-ETP, AUGUSTUS and TSEBRA</article-title>. <comment>bioRxiv</comment>. <pub-id pub-id-type="doi">10.1101/2023.06.10.544449</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Giblin</surname>
<given-names>R. M.</given-names>
</name>
<name>
<surname>Platzer</surname>
<given-names>E. G.</given-names>
</name>
</person-group> (<year>1985</year>). <article-title>
<italic>Romanomermis culicivorax</italic> parasitism and the development, growth, and feeding rates of two mosquito species</article-title>. <source>Journal of Invertebrate Pathology</source> <volume>46</volume>, <fpage>11</fpage>&#x2013;<lpage>19</lpage>. <pub-id pub-id-type="doi">10.1016/0022-2011(85)90124-7</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gremme</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Steinbiss</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kurtz</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>GenomeTools: a comprehensive software library for efficient processing of structured genome annotations</article-title>. <source>IEEE/ACM Transactions on Computational Biology and Bioinformatics</source> <volume>10</volume>, <fpage>645</fpage>&#x2013;<lpage>656</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2013.68</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guan</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>McCarthy</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Wood</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Howe</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Durbin</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Identifying and removing haplotypic duplication in primary genome assemblies</article-title>. <source>Bioinformatics</source> <volume>36</volume>, <fpage>2896</fpage>&#x2013;<lpage>2898</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa025</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guiglielmoni</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Houtain</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Derzelle</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Van Doninck</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Flot</surname>
<given-names>J.-F.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Overcoming uncollapsed haplotypes in long-read assemblies of non-model organisms</article-title>. <source>BMC Bioinformatics</source> <volume>22</volume>, <fpage>303</fpage>&#x2013;<lpage>23</lpage>. <pub-id pub-id-type="doi">10.1186/s12859-021-04118-3</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guiglielmoni</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Rivera-Vic&#xe9;ns</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Koszul</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Flot</surname>
<given-names>J.-F.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A deep dive into genome assemblies of non-vertebrate animals</article-title>. <source>Peer Community Journal</source> <volume>2</volume>, <fpage>e29</fpage>. <pub-id pub-id-type="doi">10.24072/pcjournal.128</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hodda</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Phylum nematoda: trends in species descriptions, the documentation of diversity, systematics, and the species concept</article-title>. <source>Zootaxa</source> <volume>5114</volume>, <fpage>290</fpage>&#x2013;<lpage>317</lpage>. <pub-id pub-id-type="doi">10.11646/zootaxa.5114.1.2</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Paggi</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Bennett</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Salzberg</surname>
<given-names>S. L.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype</article-title>. <source>Nature Biotechnology</source> <volume>37</volume>, <fpage>907</fpage>&#x2013;<lpage>915</lpage>. <pub-id pub-id-type="doi">10.1038/s41587-019-0201-4</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Koetsier</surname>
<given-names>P. A. G.</given-names>
</name>
<name>
<surname>Cantor</surname>
<given-names>E. J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A simple approach for effective shearing and reliable concentration measurement of ultra-high-molecular-weight DNA</article-title>. <source>BioTechniques</source> <volume>71</volume>, <fpage>439</fpage>&#x2013;<lpage>444</lpage>. <pub-id pub-id-type="doi">10.2144/btn-2021-0051</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kolmogorov</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Pevzner</surname>
<given-names>P. A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Assembly of long, error-prone reads using repeat graphs</article-title>. <source>Nature Biotechnology</source> <volume>37</volume>, <fpage>540</fpage>&#x2013;<lpage>546</lpage>. <pub-id pub-id-type="doi">10.1038/s41587-019-0072-8</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Koren</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Walenz</surname>
<given-names>B. P.</given-names>
</name>
<name>
<surname>Berlin</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Miller</surname>
<given-names>J. R.</given-names>
</name>
<name>
<surname>Bergman</surname>
<given-names>N. H.</given-names>
</name>
<name>
<surname>Phillippy</surname>
<given-names>A. M.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Canu: scalable and accurate long-read assembly via adaptive <italic>k</italic>-mer weighting and repeat separation</article-title>. <source>Genome Research</source> <volume>25</volume>, <fpage>722</fpage>&#x2013;<lpage>736</lpage>. <pub-id pub-id-type="doi">10.1101/gr.215087.116</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kumar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Koutsovoulos</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kaur</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Blaxter</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2012a</year>). <article-title>Toward 959 nematode genomes</article-title>. <source>Worm</source> <volume>1</volume>, <fpage>42</fpage>&#x2013;<lpage>50</lpage>. <pub-id pub-id-type="doi">10.4161/worm.19046</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kumar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Schiffer</surname>
<given-names>P. H.</given-names>
</name>
<name>
<surname>Blaxter</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2012b</year>). <article-title>959 nematode genomes: a semantic wiki for coordinating sequencing projects</article-title>. <source>Nucleic Acids Research</source> <volume>40</volume>, <fpage>D1295</fpage>&#x2013;<lpage>D1300</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkr826</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>Y.-C.</given-names>
</name>
<name>
<surname>Ke</surname>
<given-names>H.-M.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.-C.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>H.-H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>M.-C.</given-names>
</name>
<name>
<surname>Tseng</surname>
<given-names>Y.-C.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Single-worm long-read sequencing reveals genome diversity in free-living nematodes</article-title>. <source>Nucleic Acids Research</source> <volume>51</volume>, <fpage>8035</fpage>&#x2013;<lpage>8047</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkad647</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lewis</surname>
<given-names>S. C.</given-names>
</name>
<name>
<surname>Dyal</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Hilburn</surname>
<given-names>C. F.</given-names>
</name>
<name>
<surname>Weitz</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liau</surname>
<given-names>W.-S.</given-names>
</name>
<name>
<surname>LaMunyon</surname>
<given-names>C. W.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>Molecular evolution in <italic>Panagrolaimus</italic> nematodes: origins of parthenogenesis, hermaphroditism and the Antarctic species <italic>P. davidi</italic>
</article-title>. <source>BMC Evolutionary Biology</source> <volume>9</volume>, <fpage>15</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2148-9-15</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2012</year>). <source>Seqtk</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/lh3/seqtk">https://github.com/lh3/seqtk</ext-link>.</comment>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Minimap2: pairwise alignment for nucleotide sequences</article-title>. <source>Bioinformatics</source> <volume>34</volume>, <fpage>3094</fpage>&#x2013;<lpage>3100</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty191</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Manni</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Berkeley</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Seppey</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sim&#xe3;o</surname>
<given-names>F. A.</given-names>
</name>
<name>
<surname>Zdobnov</surname>
<given-names>E. M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>BUSCO update: novel and streamlined workflows along with broader and deeper phylogenetic coverage for scoring of eukaryotic, prokaryotic, and viral genomes</article-title>. <source>Molecular Biology and Evolution</source> <volume>38</volume>, <fpage>4647</fpage>&#x2013;<lpage>4654</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msab199</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>McGill</surname>
<given-names>L. M.</given-names>
</name>
<name>
<surname>Shannon</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Pisani</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>F&#xe9;lix</surname>
<given-names>M.-A.</given-names>
</name>
<name>
<surname>Raml&#xf8;v</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Dix</surname>
<given-names>I.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Anhydrobiosis and freezing-tolerance: Adaptations that facilitate the establishment of panagrolaimus nematodes in polar habitats</article-title>. <source>PLOS ONE</source> <volume>10</volume>, <fpage>e0116084</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0116084</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mitreva</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Jasmer</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Zarlenga</surname>
<given-names>D. S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Abubucker</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Martin</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>The draft genome of the parasitic nematode <italic>Trichinella spiralis</italic>
</article-title>. <source>Nature Genetics</source> <volume>43</volume>, <fpage>228</fpage>&#x2013;<lpage>235</lpage>. <pub-id pub-id-type="doi">10.1038/ng.769</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="book">
<collab>NextOmics</collab> (<year>2019</year>). <source>NextDenovo</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/Nextomics/NextDenovo">https://github.com/Nextomics/NextDenovo</ext-link>.</comment>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ou</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>LTR_retriever: a highly accurate and sensitive program for identification of long terminal repeat retrotransposons</article-title>. <source>Plant Physiology</source> <volume>176</volume>, <fpage>1410</fpage>&#x2013;<lpage>1422</lpage>. <pub-id pub-id-type="doi">10.1104/pp.17.01310</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ou</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Liao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chougule</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Agda</surname>
<given-names>J. R.</given-names>
</name>
<name>
<surname>Hellinga</surname>
<given-names>A. J.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Benchmarking transposable element annotation methods for creation of a streamlined, comprehensive pipeline</article-title>. <source>Genome Biology</source> <volume>20</volume>, <fpage>275</fpage>&#x2013;<lpage>18</lpage>. <pub-id pub-id-type="doi">10.1186/s13059-019-1905-y</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="book">
<collab>Oxford Nanopore Technologies</collab> (<year>2022</year>). <source>Dorado</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/nanoporetech/dorado">https://github.com/nanoporetech/dorado</ext-link>.</comment>
</citation>
</ref>
<ref id="B35">
<citation citation-type="book">
<collab>Sanger-Pathogens</collab> (<year>2014</year>). <source>Pathogen Informatics, Wellcome Sanger Institute</source>. <comment>Assembly-Stats. Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/sanger-pathogens">https://github.com/sanger-pathogens</ext-link>.</comment>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Petersen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chapman</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Willis</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Fukuda</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>1978</year>). <article-title>Release of <italic>Romanomermis culicivorax</italic> for the control of <italic>Anopheles albimanus</italic> in El Salvador II. Application of the nematode</article-title>. <source>The American Journal of Tropical Medicine and Hygiene</source> <volume>27</volume>, <fpage>1268</fpage>&#x2013;<lpage>1273</lpage>. <pub-id pub-id-type="doi">10.4269/ajtmh.1978.27.1268</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Presswell</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Evans</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Poulin</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Jorge</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Morphological and molecular characterization of Mermis nigrescens Dujardin, (Nematoda: Mermithidae) parasitizing the introduced European earwig (Dermaptera: Forficulidae) in New Zealand</article-title>. <source>Journal of Helminthology</source> <volume>89</volume>, <fpage>267</fpage>&#x2013;<lpage>276</lpage>. <pub-id pub-id-type="doi">10.1017/S0022149X14000017</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ranallo-Benavidez</surname>
<given-names>T. R.</given-names>
</name>
<name>
<surname>Jaron</surname>
<given-names>K. S.</given-names>
</name>
<name>
<surname>Schatz</surname>
<given-names>M. C.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>GenomeScope 2.0 and Smudgeplot for reference-free profiling of polyploid genomes</article-title>. <source>Nature Communications</source> <volume>11</volume>, <fpage>1432</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-020-14998-3</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rautiainen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Nurk</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Walenz</surname>
<given-names>B. P.</given-names>
</name>
<name>
<surname>Logsdon</surname>
<given-names>G. A.</given-names>
</name>
<name>
<surname>Porubsky</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Rhie</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Telomere-to-telomere assembly of diploid chromosomes with Verkko</article-title>. <source>Nature Biotechnology</source> <volume>41</volume>, <fpage>1474</fpage>&#x2013;<lpage>1482</lpage>. <pub-id pub-id-type="doi">10.1038/s41587-023-01662-6</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rhie</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Walenz</surname>
<given-names>B. P.</given-names>
</name>
<name>
<surname>Koren</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Phillippy</surname>
<given-names>A. M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Merqury: reference-free quality, completeness, and phasing assessment for genome assemblies</article-title>. <source>Genome Biology</source> <volume>21</volume>, <fpage>245</fpage>&#x2013;<lpage>27</lpage>. <pub-id pub-id-type="doi">10.1186/s13059-020-02134-9</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rice</surname>
<given-names>E. S.</given-names>
</name>
<name>
<surname>Green</surname>
<given-names>R. E.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>New approaches for genome assembly and scaffolding</article-title>. <source>Annual Review of Animal Biosciences</source> <volume>7</volume>, <fpage>17</fpage>&#x2013;<lpage>40</lpage>. <pub-id pub-id-type="doi">10.1146/annurev-animal-020518-115344</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ruan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Fast and accurate long-read assembly with wtdbg2</article-title>. <source>Nature Methods</source> <volume>17</volume>, <fpage>155</fpage>&#x2013;<lpage>158</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-019-0669-3</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schiffer</surname>
<given-names>P. H.</given-names>
</name>
<name>
<surname>Danchin</surname>
<given-names>E. G.</given-names>
</name>
<name>
<surname>Burnell</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Creevey</surname>
<given-names>C. J.</given-names>
</name>
<name>
<surname>Wong</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Dix</surname>
<given-names>I.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Signatures of the Evolution of Parthenogenesis and Cryptobiosis in the Genomes of Panagrolaimid Nematodes</article-title>. <source>iScience</source> <volume>21</volume>, <fpage>587</fpage>&#x2013;<lpage>602</lpage>. <pub-id pub-id-type="doi">10.1016/j.isci.2019.10.039</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schiffer</surname>
<given-names>P. H.</given-names>
</name>
<name>
<surname>Kroiher</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kraus</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Koutsovoulos</surname>
<given-names>G. D.</given-names>
</name>
<name>
<surname>Kumar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>R Camps</surname>
<given-names>J. I.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>The genome of <italic>Romanomermis culicivorax</italic>: revealing fundamental changes in the core developmental genetic toolkit in Nematoda</article-title>. <source>BMC Genomics</source> <volume>14</volume>, <fpage>923</fpage>&#x2013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.1186/1471-2164-14-923</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sereika</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kirkegaard</surname>
<given-names>R. H.</given-names>
</name>
<name>
<surname>Karst</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Michaelsen</surname>
<given-names>T. Y.</given-names>
</name>
<name>
<surname>S&#xf8;rensen</surname>
<given-names>E. A.</given-names>
</name>
<name>
<surname>Wollenberg</surname>
<given-names>R. D.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Oxford Nanopore R10.4 long-read sequencing enables near-perfect bacterial genomes from pure cultures and metagenomes without short-read or reference polishing</article-title>. <comment>bioRxiv</comment>. <pub-id pub-id-type="doi">10.1101/2021.10.27.466057</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shahid</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Slotkin</surname>
<given-names>R. K.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>The current revolution in transposable element biology enabled by long reads</article-title>. <source>Current Opinion in Plant Biology</source> <volume>54</volume>, <fpage>49</fpage>&#x2013;<lpage>56</lpage>. <pub-id pub-id-type="doi">10.1016/j.pbi.2019.12.012</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shannon</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Browne</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Boyd</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Fitzpatrick</surname>
<given-names>D. A.</given-names>
</name>
<name>
<surname>Burnell</surname>
<given-names>A. M.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>The anhydrobiotic potential and molecular phylogenetics of species and strains of <italic>Panagrolaimus</italic> (Nematoda, Panagrolaimidae)</article-title>. <source>Journal of Experimental Biology</source> <volume>208</volume>, <fpage>2433</fpage>&#x2013;<lpage>2445</lpage>. <pub-id pub-id-type="doi">10.1242/jeb.01629</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shatilovich</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Gade</surname>
<given-names>V. R.</given-names>
</name>
<name>
<surname>Pippel</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hoffmeyer</surname>
<given-names>T. T.</given-names>
</name>
<name>
<surname>Tchesunov</surname>
<given-names>A. V.</given-names>
</name>
<name>
<surname>Stevens</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>A novel nematode species from the siberian permafrost shares adaptive mechanisms for cryptobiotic survival with <italic>C. elegans</italic> dauer larva</article-title>. <source>PLOS Genetics</source> <volume>19</volume>, <fpage>e1010798</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pgen.1010798</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shi</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Generic Repeat Finder: a high-sensitivity tool for genome-wide <italic>de novo</italic> repeat detection</article-title>. <source>Plant Physiology</source> <volume>180</volume>, <fpage>1803</fpage>&#x2013;<lpage>1815</lpage>. <pub-id pub-id-type="doi">10.1104/pp.19.00386</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Su</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Gu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Peterson</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>TIR-Learner, a new ensemble method for TIR transposable element annotation, provides evidence for abundant new transposable elements in the maize genome</article-title>. <source>Molecular Plant</source> <volume>12</volume>, <fpage>447</fpage>&#x2013;<lpage>460</lpage>. <pub-id pub-id-type="doi">10.1016/j.molp.2019.02.008</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wenger</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Peluso</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Rowell</surname>
<given-names>W. J.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Hall</surname>
<given-names>R. J.</given-names>
</name>
<name>
<surname>Concepcion</surname>
<given-names>G. T.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Accurate circular consensus long-read sequencing improves variant detection and assembly of a human genome</article-title>. <source>Nature Biotechnology</source> <volume>37</volume>, <fpage>1155</fpage>&#x2013;<lpage>1162</lpage>. <pub-id pub-id-type="doi">10.1038/s41587-019-0217-9</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lai</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Dooner</surname>
<given-names>H. K.</given-names>
</name>
<name>
<surname>Du</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>HelitronScanner uncovers a large overlooked cache of Helitron transposons in many plant genomes</article-title>. <source>Proceedings of the National Academy of Sciences</source> <volume>111</volume>, <fpage>10263</fpage>&#x2013;<lpage>10268</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1410068111</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>LTR_FINDER: an efficient tool for the prediction of full-length LTR retrotransposons</article-title>. <source>Nucleic Acids Research</source> <volume>35</volume>, <fpage>W265</fpage>&#x2013;<lpage>W268</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkm286</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>