<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2024.1430443</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Chlomito: a novel tool for precise elimination of organelle genome contamination from nuclear genome assembly</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Song</surname>
<given-names>Wei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Chong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lu</surname>
<given-names>Yanming</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2699287"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Shen</surname>
<given-names>Dawei</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Jia</surname>
<given-names>Yunxiao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Huo</surname>
<given-names>Yixin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/767283"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Piao</surname>
<given-names>Weilan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Jin</surname>
<given-names>Hua</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2140559"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Laboratory of Genetics and Disorders, Key Laboratory of Molecular Medicine and Biotherapy, Aerospace Center Hospital, School of Life Science, Beijing Institute of Technology</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Research Institute for Science and Technology, Beijing Institute of Technology</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Advanced Technology Research Institute, Beijing Institute of Technology</institution>, <addr-line>Jinan</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Department of Pathology, Aerospace Center Hospital</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Ling Li, Mississippi State University, United States</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Christos Noutsos, State University of New York at Old Westbury, United States</p>
<p>Elly Poretsky, Agricultural Research Service (USDA), United States</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Hua Jin, <email xlink:href="mailto:huajin@bit.edu.cn">huajin@bit.edu.cn</email>; Weilan Piao, <email xlink:href="mailto:weilanpiao@bit.edu.cn">weilanpiao@bit.edu.cn</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>27</day>
<month>08</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1430443</elocation-id>
<history>
<date date-type="received">
<day>09</day>
<month>05</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>01</day>
<month>08</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Song, Li, Lu, Shen, Jia, Huo, Piao and Jin</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Song, Li, Lu, Shen, Jia, Huo, Piao and Jin</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Accurate reference genomes are fundamental to understanding biological evolution, biodiversity, hereditary phenomena and diseases. However, many assembled nuclear chromosomes are often contaminated by organelle genomes, which will mislead bioinformatic analysis, and genomic and transcriptomic data interpretation.</p>
</sec>
<sec>
<title>Methods</title>
<p>To address this issue, we developed a tool named Chlomito, aiming at precise identification and elimination of organelle genome contamination from nuclear genome assembly. Compared to conventional approaches, Chlomito utilized new metrics, alignment length coverage ratio (ALCR) and sequencing depth ratio (SDR), thereby effectively distinguishing true organelle genome sequences from those transferred into nuclear genomes via horizontal gene transfer (HGT).</p>
</sec>
<sec>
<title>Results</title>
<p>The accuracy of Chlomito was tested using sequencing data from Plum, Mango and <italic>Arabidopsis</italic>. The results confirmed that Chlomito can accurately detect contigs originating from the organelle genomes, and the identified contigs covered most regions of the organelle reference genomes, demonstrating efficiency and precision of Chlomito. Considering user convenience, we further packaged this method into a Docker image, simplified the data processing workflow.</p>
</sec>
<sec>
<title>Discussion</title>
<p>Overall, Chlomito provides an efficient, accurate and convenient method for identifying and removing contigs derived from organelle genomes in genomic assembly data, contributing to the improvement of genome assembly quality.</p>
</sec>
</abstract>
<kwd-group>
<kwd>mitochondrial genome</kwd>
<kwd>chloroplast genome</kwd>
<kwd>chromosome-level assembly</kwd>
<kwd>organelle identification</kwd>
<kwd>horizontal gene transfer</kwd>
</kwd-group>
<counts>
<fig-count count="7"/>
<table-count count="0"/>
<equation-count count="2"/>
<ref-count count="65"/>
<page-count count="14"/>
<word-count count="6494"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Plant Bioinformatics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>With the widespread application of high-throughput sequencing technology, researchers can rapidly obtain genomes of various species (<xref ref-type="bibr" rid="B40">Rhoads and Au, 2015</xref>; <xref ref-type="bibr" rid="B11">Goodwin et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B16">Jain et&#xa0;al., 2018</xref>). For chromosome-level <italic>de-novo</italic> genome assembly, long reads from third-generation sequencing (TGS) or short reads from second/next-generation sequencing (SGS/NGS) are first assembled into contigs. Then, these contigs are anchored into chromosomes based on Hi-C sequencing, thereby compleling nuclear chromosome assembly (<xref ref-type="bibr" rid="B7">Du et&#xa0;al., 2022</xref>). However, during genome assembly, the issue of organelle genome contamination often arises, where the sequences from mitochondrial DNA (mtDNA) and chloroplast DNA (cpDNA) are mistakenly assembled into nuclear genome. This phenomenon occurs because organelle DNA is co-extracted with nuclear DNA during genomic DNA purification, producing a mixed sequencing data set. Since hundreds copies of organelle genomes exist within a single cell with extremely smaller sizes compared to the nuclear genomes (<xref ref-type="bibr" rid="B37">Pyke, 1999</xref>), they are overrepresented in the sequencing data and high-frequently mis-assembled into nuclear genomes. Furthermore, organelle genome sequences exhibit significant similarity to certain nuclear genome sequences, particularly those organelle genes had transferred into nuclear genomes through horizontal gene transfer (HGT) (<xref ref-type="bibr" rid="B32">Martin, 2003</xref>; <xref ref-type="bibr" rid="B49">Timmis et&#xa0;al., 2004</xref>; <xref ref-type="bibr" rid="B57">Wei et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B54">Wang et&#xa0;al., 2024</xref>). Thus, it is challenging to accurately distinguish contigs derived from organelle genomes among all assembled contigs, especially those containing sequences transferred from the organelle genomes through HGT. Accurate identification and elimination of organelle genome sequences are essential for minimizing the contamination issue and will enhance the quality of genome assembly.</p>
<p>To identify and remove organelle genome contamination from nuclear genome assembly data, current methods primarily employ two approaches: the experimental removal prior to sequencing and the bioinformatic identification after assembly. For experimental removal, the density gradient centrifugation technique can be utilized to deplete organelle DNA during nuclear genomic DNA extraction, hence the contamination is reduced in downstream sequencing data (<xref ref-type="bibr" rid="B29">Lutz et&#xa0;al., 2011</xref>; <xref ref-type="bibr" rid="B46">Sikorskaite et&#xa0;al., 2013</xref>; <xref ref-type="bibr" rid="B42">Sandhya et&#xa0;al., 2020</xref>). However, the density gradient centrifugation is not easy to carry out, and requires a large amount of material for DNA extraction but generates low DNA yield. Thus, most reported <italic>de-novo</italic> genome assembly did not include this step. Also, experimental separation is often incomplete. Alternatively, for bioinformatic identification, the most widely adopted approach is to align assembled contigs to organelle reference genomes, followed by filtering based on alignment lengths (<xref ref-type="bibr" rid="B14">Howe et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B33">Mishra et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B39">Rhie et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B60">Zhang et&#xa0;al., 2024</xref>) or sequence similarity (<xref ref-type="bibr" rid="B45">Shirasawa et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B2">Bae et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B59">Yu et&#xa0;al., 2024</xref>). Though effective in reducing contamination, these computational approaches have limitations. It ignored potential HGT of organelle sequences into the nuclear genome (<xref ref-type="bibr" rid="B5">Cecchin et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B1">Allio et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B18">Kenny et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B31">Martin et&#xa0;al., 2023</xref>). Due to such transfer, the fragments of organelle genomes were inserted in the nuclear genomes, therefore, traditional sequence similarity-based methods hardly distinguish organelle genomes from the nuclear HGT regions. Additionally, current methods often require pre-assembled reference organelle genomes, limiting their applicability in the species without well-established organelle references. Moreover, the implementation of current methods generally lacks support from integrated and user-friendly software, requires users to manually perform all steps. The data processing is still time-consuming and prone to errors, particularly when dealing with large numbers of data sets.</p>
<p>To address the issue identifying organelle genome sequences from genomic assembly accurately, we established a novel method, which employed two key filtering criteria: the alignment length coverage ratio (ALCR) and sequencing depth ratio (SDR). The ALCR refers to the proportion of a contig&#x2019;s total length that is aligned with the organelle reference genome relative to the total length of the contig. This criterion can differentiate contigs that contain only small pieces of organelle DNA, which more likely arise from HGT, as these fragments usually constitute only a small portion of the contig. Therefore, a low ALCR may indicate that the contig belongs to nuclear genome rather than organelle genome (<xref ref-type="bibr" rid="B65">Zhu et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B34">Nath et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B13">Hao et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B64">Zhou et&#xa0;al., 2023b</xref>). Meanwhile, the SDR refers to the ratio of each coting&#x2019;s sequencing depth to the average sequencing depth of the organelle genome. Given that organelle genomes exist in many copies within a cell, they typically exhibit higher sequencing depths than nuclear genomes (<xref ref-type="bibr" rid="B43">Sanita Lima et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B56">Wang et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B26">Li et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B10">Giorgashvili et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B63">Zhou et&#xa0;al., 2023a</xref>). Therefore, a contig with a high sequencing depth ratio, similar to the average of the organelle genome, is more likely to be a part of the organelle genome. By combining these two metrics, we can significantly improve the accuracy of identifying and removing organelle genome sequences from genome assembly data.</p>
<p>Furthermore, to facilitate usage by researchers with limited bioinformatics experience, we have implemented this new approach as easy-to-use software and packaged it as a Docker image, enabling easy distribution and execution across diverse computing platforms with a single command. We validated the accuracy and reliability of our tool using sequencing data from Plum (<italic>Prunus salicina</italic>) (<xref ref-type="bibr" rid="B27">Liu et&#xa0;al., 2020</xref>), Mango (<italic>Mangifera indica</italic>) (<xref ref-type="bibr" rid="B55">Wang et&#xa0;al., 2020</xref>) and <italic>Arabidopsis</italic> (<italic>Arabidopsis thaliana</italic>) (<xref ref-type="bibr" rid="B53">Wang et&#xa0;al., 2022</xref>). Our software can not only accurately identify organelle genome contigs from genome assembly, but also accurately distinguish native organelle sequences from those inserted into the nuclear genome via HGT. Our tool will offer an accurate and effective solution for eliminating organelle DNA fragments from genome assembly contigs, hold significant merit in improving chromosome assembly, and deepen our understanding of the complex interactions between organelle and nuclear genomes.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Availability of data and materials</title>
<p>To validate the accuracy of the Chlomito software in detecting organelle genome sequences, we utilized sequencing data of Mango and Plum from the NCBI Bioproject database, with accession numbers PRJNA487154 and PRJNA574159, respectively. The raw sequencing data for the PacBio HiFi reads and Illumina short reads of <italic>Arabidopsis</italic> were obtained from the National Genomics Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences/China National Center for Bioinformation (GSA: CRA004538). These datasets include high-quality second and third-generation sequencing data, which were utilized for organelle genome identification and chromosome-level genome assembly.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Installation and implementation of Chlomito</title>
<p>Chlomito is Python (v3.8.5)-based software provided in the form of a Docker image. The image is accessible at <ext-link ext-link-type="uri" xlink:href="https://hub.docker.com/repository/docker/songweidocker/chlomito">https://hub.docker.com/repository/docker/songweidocker/chlomito</ext-link>. All analyses were conducted on an Ubuntu Linux 18.04.3 server, equipped with two Intel Xeon processors (32 cores each, totaling 64 threads) and 512 GB of RAM. The user manual for Chlomito is available on GitHub (<ext-link ext-link-type="uri" xlink:href="https://github.com/songwei-hxb/chlomito">https://github.com/songwei-hxb/chlomito</ext-link>).</p>
<p>Chlomito can be installed using the Docker v19.03.5 command:</p>
<p>docker pull songweidocker/chlomito:v1</p>
<p>The command for running Chlomito organelle genome identification and removing is as follow:</p>
<p>docker run &#x2013;rm -v/var/run/docker.sock:/var/run/docker.sock -v `pwd`:/data -w/data songweidocker/chlomito:v1 chlomito -species plant -raw_genome genome_contigs.fasta -NGS_1 ngs_1.fastq -NGS_2 ngs_2.fastq -output identify_result -mito_ALCR_cutoff 0.5 -mito_SDR_cutoff 0.1 -chlo_ALCR_cutoff 0.5 -chlo_SDR_cutoff 0.1 -threads 60</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Contig-level genome assembly</title>
<p>Flye v2.9 (<xref ref-type="bibr" rid="B20">Kolmogorov et&#xa0;al., 2019</xref>) is genome assembler software designed for long-read sequencing data from third-generation platforms such as PacBio and Oxford Nanopore. It is capable of assembling raw error-prone long reads into contiguous genomic sequences known as contigs. The goal of Flye is to generate high-quality genome assembly, especially for large or complex genomes. In this study, we utilized Flye to assemble PacBio sequencing data of Mango and Plum into genome assembly contigs. Due to the high error rate of TGS data in PacBio CLR reads and Nanopore reads, the assembled contigs were then corrected using Racon v1.3.1 (<xref ref-type="bibr" rid="B50">Vaser et&#xa0;al., 2017</xref>) and Pilon v1.22 (<xref ref-type="bibr" rid="B52">Walker et&#xa0;al., 2014</xref>).</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Construction of organelle genome database</title>
<p>The mitochondria and chloroplast organelle genomes were firstly assembled from SGS data using GetOrganelle v1.7.1 (<xref ref-type="bibr" rid="B17">Jin et&#xa0;al., 2020</xref>). GetOrganelle is a powerful genomics software tool specifically designed for efficient assembly of mitochondrial and chloroplast genomes. It is capable of simultaneously assembling organelle genomes from both mitochondria and chloroplast. Compared to other similar software tools, GetOrganelle demonstrates superior performance in terms of both accuracy and speed for organelle genome assembly. After that, we merged the mitochondrial and chloroplast genomes published in the NCBI organelle database with organelle genomes assembled using GetOrganelle, and created a comprehensive local organelle genome database. Since the local database integrated existing public data resources with high-precision assembly outcomes, it could offer more comprehensive and accurate reference for organelle genomes.</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>The annotation of chloroplast and mitochondrial genomes</title>
<p>The chloroplast and mitochondria genome sequences were annotated with GeSeq (<xref ref-type="bibr" rid="B48">Tillich et&#xa0;al., 2017</xref>) and OGDRAW (<xref ref-type="bibr" rid="B28">Lohse et&#xa0;al., 2013</xref>). GeSeq pipeline analysis was performed using the annotation packages ARAGORN (<xref ref-type="bibr" rid="B23">Laslett and Canback, 2004</xref>), blatN (<xref ref-type="bibr" rid="B19">Kent, 2002</xref>), Chloe (<xref ref-type="bibr" rid="B62">Zhong, 2020</xref>) and HMMER (<xref ref-type="bibr" rid="B8">Eddy, 2011</xref>). GeSeq is a user-friendly online service specifically designed for the annotation of mitochondrial and chloroplast genomes. This platform enables researchers to upload unannotated DNA sequences and utilizes its database of existing high-quality annotations to identify and label genes, coding sequences, and other significant genomic features.</p>
</sec>
<sec id="s2_6">
<label>2.6</label>
<title>Calculation of alignment length coverage ratio</title>
<p>Following the construction of local organelle genome database, all contigs assembled by the Flye v2.9 software were aligned against this database using Minimap2 v2.17 (<xref ref-type="bibr" rid="B24">Li, 2018</xref>). Subsequent to the alignment process, Alignment Length Coverage Ratio (ALCR) was calculated for each contig. The core filtering criterion ALCR is defined as the ratio of the aligned length sum from a contig to its total length, which can be formulated as:</p>
<disp-formula>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mtable>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mtext>ALCR</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mo>=</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:mtext>aligned</mml:mtext>
<mml:mo>_</mml:mo>
<mml:mtext>length</mml:mtext>
<mml:msub>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>g</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xa0;</mml:mo>
<mml:mo stretchy="false">/</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mtext>total</mml:mtext>
<mml:mo>_</mml:mo>
<mml:mtext>length</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In the formula, ALCR(contig) is the ALCR value for a given contig, &#x3a3;aligned_length(contig, ref) is the sum of lengths of all aligned regions (1 to n) between the contig and the reference organelle genome, and total_length(contig) is the total length of the contig. A higher ALCR value indicates greater similarity between the contig and the reference organelle genome, and thus a higher chance that the contig is from the organelle genome. Unlike previously reported methods, the calculation of ALCR does not solely rely on the single longest alignment region. Instead, it aggregates the lengths of all contig regions that align with the organelle genome reference. This approach offers a more comprehensive reflection of the alignment coverage between the contig and the organelle genomes. Finally, by comparing the aggregated alignment length of each contig against its total length, the alignment length coverage ratio for each contig is computed.</p>
</sec>
<sec id="s2_7">
<label>2.7</label>
<title>Calculation of sequencing depth ratio</title>
<p>The sequencing depth ratio (SDR) refers to the ratio between the sequencing depth of each contig and the average sequencing depth of the organelle genome, which can be formulated as:</p>
<disp-formula>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:mtext>SDR</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mtext>depth</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#xa0;</mml:mo>
<mml:mo stretchy="false">/</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mtext>avg</mml:mtext>
<mml:mo>_</mml:mo>
<mml:mtext>depth</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>_</mml:mo>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In the formula, SDR(contig) is the SDR value for a given contig, depth(contig) is the average SGS depth of the contig, and avg_depth(organelle_genome) is the average SGS depth of the organelle genome assembled by GetOrganelle. The average sequencing depth of the organelle genome is determined by aligning the SGS reads to the organelle genome assembled by GetOrganelle v1.7.1 using Bowtie2 v2.4.2 (<xref ref-type="bibr" rid="B22">Langmead and Salzberg, 2012</xref>), which generates a SAM file. This SAM file is then processed by Samtools v1.6 (<xref ref-type="bibr" rid="B25">Li et&#xa0;al., 2009</xref>) to produce a sorted BAM file with depth information. Finally, Bedtools v2.30.0 (<xref ref-type="bibr" rid="B38">Quinlan and Hall, 2010</xref>) is utilized to analyze this depth data and calculate the average sequencing depth across the organelle genome. The method for calculating the sequencing depth of each contig is identical to that used for the organelle genome. Upon completion of these calculations, the sequencing depth for each contig is divided by the average sequencing depth of the organelle genome to obtain SDR for each contig.</p>
</sec>
<sec id="s2_8">
<label>2.8</label>
<title>Identification of organelle sequences</title>
<p>After calculating the ALCR and SDR values for each contig using the locally constructed organelle genome database and SGS data, contigs belonging to the organelle genome are identified from genome assembly contigs based on ALCR and SDR filtering thresholds inputted by the user. Here, we used thresholds ALCR&gt;0.5 and SDR&gt;0.1 at the first round of filtering. The filtering thresholds can be further optimized according to the ALCR and SDR visualization scatter plot generated after running Chlomito. By utilizing adjusted filtering thresholds, the genome assembly contigs can be filtered and selected again, resulting in more precise outcomes.</p>
</sec>
<sec id="s2_9">
<label>2.9</label>
<title>Chromosomal-level genome assembly</title>
<p>The genome sizes of various species were calculated using jellyfish v2.2.10 (<xref ref-type="bibr" rid="B30">Marcais and Kingsford, 2011</xref>) and GenomeScope v2.0 (<xref ref-type="bibr" rid="B51">Vurture et&#xa0;al., 2017</xref>) with SGS data and input into Flye for contig-level genome assembly with TGS data. After contig-level genome assembly, contig sequences were corrected with SGS reads using racon v1.3.1 and pilon v1.22, and redundancy was reduced using purge_dups v1.2.5 (<xref ref-type="bibr" rid="B12">Guan et&#xa0;al., 2020</xref>). Hi-C sequencing data were aligned to the deduplicated contigs using HiC-Pro v3.1.0 (<xref ref-type="bibr" rid="B44">Servant et&#xa0;al., 2015</xref>), and finally, Allhic v0.9.8 (<xref ref-type="bibr" rid="B61">Zhang et&#xa0;al., 2019</xref>) was used to cluster, order, and orient the contigs based on Hi-C alignment results, achieving the final chromosomal-level genome assembly. Gaps or missing regions may be present in genome assembly due to the limitations of sequencing technologies. To obtain more complete and accurate genome sequences, we applied two approaches - Abyss Sealer v2.0.2 (<xref ref-type="bibr" rid="B15">Jackman et&#xa0;al., 2017</xref>) and TGS-GapCloser v1.1.1 (<xref ref-type="bibr" rid="B58">Xu et&#xa0;al., 2020</xref>) - for closing gaps in our chromosome-level genome assembly. TGS-GapCloser v1.1.1 utilizes long reads from TGS platforms to fill gaps between contigs and extend contig ends based on overlaps between contigs and long reads. Abyss Sealer v2.0.2 is a computational tool that seals gaps in genome assembly by aligning Illumina short reads to contig ends and performing local assembly to generate consensus sequences for gap regions. The methods applied in this study for chromosomal-level genome assembly are based on the previous study (<xref ref-type="bibr" rid="B47">Song et&#xa0;al., 2024</xref>).</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>The development of Chlomito software</title>
<p>Chlomito has two main functions, assembly of organelle genomes with NGS reads and screening contigs originated from organelle genomes. These functions are achieved through the workflow comprising three parts: the construction of local organelle genome database, organelle genome contig identification based on ALCR, and organelle genome contig identification based on SDR (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>).</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Workflow of the Chlomito software package.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1430443-g001.tif"/>
</fig>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>The construction of local organelle genome database</title>
<p>At the first step, Chlomito constructs a local organelle genome database by combining two approaches (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>, step 1). The initial approach employs GetOrganelle to assemble mitochondrial and chloroplast genomes with SGS data. This is particularly valuable for the species without publicly-available organelle genome references, although the short-read assembly is sometimes incomplete. To further complement the database, the second approach downloads published high-quality organelle genomes from NCBI organelle database. With these approaches, the constructed local database is able to avoid the limitations from relying on a single data source, offering a broad and reliable organelle genome reference for downstream analysis.</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Organelle genome identification based on ALCR</title>
<p>At the second step of Chlomito, organelle genome contigs are identified based on alignment length coverage ratio (ALCR). Chlomito preliminarily screens for potential organelle genomic sequences by aligning TGS-assembled genome contigs to the local database and filtering based on ALCR of the alignment (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>, step 2). The core filtering criterion ALCR is defined as the ratio of the aligned length sum from a contig to its total length. Traditional methods typically consider only the longest length aligned between a contig and the organelle reference genome. In contrast, the alignment length of a contig in our approach is the sum of all aligned region lengths. This provides a more comprehensive assessment for the similarity between a contig and the organelle genome, improving the accuracy and sensitivity of organelle genome contig identification. In addition, compared to traditional methods, ALCR can also effectively distinguish organelle genome sequences from those inserted into the nuclear genome via HGT, as HGT insertions tend to be smaller, holding a lower ALCR value.</p>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Organelle genome identification based on SDR</title>
<p>At the third step, Chlomito employs sequencing depth ratio (SDR) to further validate the organelle genome contigs previously filtered by the ALCR criteria (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>, step 3). SDR refers to the ratio of the average sequencing depth of a contig to the average sequencing depth of the organelle genome. Given that the copy number of organelle genome is significantly higher in each cell compared to the nuclear genome, the sequencing depth ratio can be utilized to further distinguish organelle genomes from nuclear genomes. Considering the variable copy numbers of organelle genomes across various tissues and developmental stages (<xref ref-type="bibr" rid="B36">Preuten et&#xa0;al., 2010</xref>), it is difficult to accurately estimate the precise ratio of organelle to nuclear genome. Therefore, instead of using nuclear genome sequencing depth as a reference (<xref ref-type="bibr" rid="B56">Wang et&#xa0;al., 2018</xref>), the SDR approach adopts a method of comparing the sequencing depth of each contig against the average sequencing depth of organelle genomes to more accurately identify contigs derived from organelle genomes.</p>
<p>In summary, by utilizing both ALCR and SDR filtering methods, Chlomito can accurately identify organelle genome contigs from the total contigs. Furthermore, it can effectively reduce the misidentification of nuclear genome contigs as organelle genomes caused by HGT of organelle genomes.</p>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>The investigation into mitochondrial and chloroplast genomes in the NCBI database</title>
<p>To gain comprehensive understanding about the characteristics of mitochondrial and chloroplast genomes across a wide range of organisms, we explored genome sizes, gene numbers, and other features for mitochondrial and chloroplast genomes listed in the NCBI organelle database (<ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/genome/browse#!/organelles/">https://www.ncbi.nlm.nih.gov/genome/browse#!/organelles/</ext-link>). The results revealed that the database contains 152 mitochondrial genomes (mitogenomes) and 263 chloroplast genomes (chlorogenomes) derived from plants, with approximately 50% to 60% of these genomes being annotated. In comparison, the numbers of animal and fungal mitogenomes were significantly higher, with 1,568 and 692 genomes respectively. However, the annotation rates for animal and fungal mitogenomes were lower, standing at only 30% (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2A</bold>
</xref>). Further inspection showed that most mitogenomes in the database were from insects in animal and ascomycetes in fungi, while chlorogenomes were predominantly from land plants and green algae (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2B</bold>
</xref>).</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Overview of mitochondrial and chloroplast genome data in the NCBI organelle genome database. <bold>(A)</bold> Counts of the total and annotated mitochondrial and chloroplast genomes across typical kingdoms. <bold>(B)</bold> The numbers of mitochondrial and chloroplast genomes assembled for various taxonomic groups. <bold>(C)</bold> The length distribution of mitochondrial and chloroplast genomes for plants, animals and fungi. <bold>(D)</bold> The numbers of genes contained in mitochondrial and chloroplast genomes across different species.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1430443-g002.tif"/>
</fig>
<p>Plant chlorogenomes exhibited relative stability in terms of genome lengths and gene numbers, averaging around 0.15 Mb in size and containing ~82 genes on average (<xref ref-type="fig" rid="f2">
<bold>Figures&#xa0;2C, D</bold>
</xref>). In contrast, plant mitogenomes displayed greater variability in both lengths and gene numbers as previously reported (<xref ref-type="bibr" rid="B4">Bendich, 2010</xref>; <xref ref-type="bibr" rid="B35">Oldenburg and Bendich, 2015</xref>), suggesting the potential involvement of more complex evolutionary processes (<xref ref-type="bibr" rid="B21">Kubo and Newton, 2008</xref>). In terms of animal mitogenomes, we found a high degree of conservation, with an average size of 0.017 Mb and typically including 13 genes. Fungal mitogenomes, on the other hand, had an average size of 0.063 Mb and contained an average of 14 genes (<xref ref-type="fig" rid="f2">
<bold>Figures&#xa0;2C, D</bold>
</xref>). These analyses thoroughly characterized the features of organelle genome sizes and annotated status, as well as gene numbers, across different kingdoms such as Plantae, Animalia, and fungi, providing crucial support for effectively identifying and removing organelle genome segments from genomic assembly sequences in future research.</p>
</sec>
<sec id="s3_6">
<label>3.6</label>
<title>The performance of Chlomito on the detection of chloroplast genomes</title>
<p>To evaluate the performance of Chlomito in identifying chlorogenomes, we tested it with sequencing data derived from Plum (<italic>Prunus salicina</italic>) and Mango (<italic>Mangifera indica</italic>). Prior to detecting chloroplast genomic sequences from the contigs assembled from TGS reads, we first assembled the chlorogenomes of Mango and Plum from their NGS reads using Getorganelle respectively, and the assembly of each species generated a single sequence of complete chlorogenome. Collinearity analysis revealed high consistency between the assembled chlorogenomes and the published reference genomes in both Mango and Plum (<xref ref-type="supplementary-material" rid="SF1">
<bold>Supplementary Figure S1</bold>
</xref>). This demonstrated the accuracy and reliability of the Getorganelle assembly for downstream analysis. We then annotated the chlorogenomes of Mango and Plum using Geseq and found that they contained similar numbers of genes with highly similar arrangements (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>). To investigate the structural conservation of chlorogenomes across diverse plant species, we compared the chlorogenomes of Mango and Plum with those of other plant species including <italic>Arabidopsis thaliana</italic> and <italic>Zea mays</italic>. The results showed that chlorogenomes were highly conserved in gene contents and orders across diverse plant species analyzed here (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplementary Figure S2</bold>
</xref>), implying that chlorogenomes may be structurally conserved across diverse plants.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Chloroplast genome annotation of Mango <bold>(A)</bold> and Plum <bold>(B)</bold>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1430443-g003.tif"/>
</fig>
<p>After assembling the chlorogenomes of Mango and Plum using GetOrganelle, we integrated these sequences with the NCBI chlorogenome database to create a localized chloroplast database. Then, the TGS-assembled genome contigs were aligned to the local organelle genome database. We next employed two key metrics to identify chloroplast-derived contigs among the total TGS-assembled contigs. The first metric is Alignment Length Coverage Ratio (ALCR), which calculates the ratio of the aligned sum length of each contig to the total length of that contig. The second metric is Sequencing Depth Ratio (SDR), which computes the sequencing depth of each contig to the average sequencing depth of the assembled chlorogenome. Based on default parameters ALCR&gt;0.5 and SDR&gt;0.1, we identified 3 and 5 potential chloroplast-derived contigs from Mango and Plum samples respectively (<xref ref-type="fig" rid="f4">
<bold>Figures&#xa0;4A, D</bold>
</xref>). These contigs showed similar alignment lengths in the GetOrganelle-assembled and the NCBI database chlorogenomes (<xref ref-type="fig" rid="f4">
<bold>Figures&#xa0;4B, E</bold>
</xref>), further validating the reliability of the chloroplast genomic contigs detected by Chlomito. Collinearity analysis displayed excellent consistency between these identified contigs and the chloroplast reference genomes, and two inverted repeat regions of the chlorogenomes (IRA and IRB) were also clearly observed in the co-linearity analysis (<xref ref-type="fig" rid="f4">
<bold>Figures&#xa0;4C, F</bold>
</xref>). These results further confirmed that these contigs were indeed derived from the chlorogenomes and were completely detected.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Chlomito accurately identifies chloroplast-derived contigs and validates their collinearity with chloroplast reference genomes from Mango <bold>(A&#x2013;C)</bold> and Plum <bold>(D&#x2013;F)</bold>. <bold>(A, D)</bold> Identification of chloroplast-derived contigs in Mango <bold>(A)</bold> and Plum <bold>(D)</bold> based on ALCR and SDR metrics. <bold>(B, E)</bold> Alignment lengths of Mango <bold>(B)</bold> and Plum <bold>(E)</bold> contigs with chloroplast genomes assembled using GetOrganelle and downloaded from NCBI database. <bold>(C, F)</bold> Collinearity analysis of Mango <bold>(C)</bold> and Plum <bold>(F)</bold> contigs identified by Chlomito against published chloroplast reference genomes.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1430443-g004.tif"/>
</fig>
<p>In addition to the chloroplast-derived contigs, we also observed some contigs with low ALCR (&lt;0.5) and high SDR (&gt;0.1) in both Mango (11 contigs) and Plum (12 contigs) genomes. In Mango, 8 out of the 11 contigs with low ALCR and high SDR were confirmed to be of mitochondrial origin. These mitogenome contigs were detected during our chlorogenome contamination analysis, likely due to the occurrence of HGT between mitochondrial and chlorogenomes. Such gene transfer events can result in contigs with low ALCR and high SDR. In Plum, 5 of the 12 contigs were similarly identified as mitochondrial derivation. The remaining 7 contigs in Plum were subjected to further analysis using RepeatMasker, which revealed that the majority of these contigs contained repetitive sequences that constituted more than 50% of their lengths (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table S1</bold>
</xref>). This high proportion of repetitive sequences likely contributed to the unusually high sequencing depths observed in these contigs.</p>
<p>Based on the alignment results of all contigs from Mango and Plum against the local chloroplast genome database, we identified 226 Mango contigs and 174 Plum contigs that aligned with the database sequences at lengths greater than 5000 bp. However, the ratio of each contig&#x2019;s length that aligned to the database (ALCR) was low, most of them had less than 10% coverage (<xref ref-type="supplementary-material" rid="SF3">
<bold>Supplementary Figure S3</bold>
</xref>). The chloroplast genomic fragments present in these ultra-long contigs may have been inserted into the nuclear chromosomes through HGT from the chloroplast genome. These results indicated that the traditional method of filtering out chlorogenomes based solely on alignment lengths might erroneously identify some ultra-long nuclear contigs that contain only a small proportion of chloroplast genomic content. Therefore, adopting more refined filtering criteria, such as ALCR and SDR, is critical to accurately differentiate contigs that are truly from the organelle genomes versus those inserted into nuclear genomes through HGT.</p>
</sec>
<sec id="s3_7">
<label>3.7</label>
<title>The performance of Chlomito on the detection of mitogenomes</title>
<p>Following the validation of Chlomito&#x2019;s efficacy in chlorogenome identification, we expanded our investigation to assess its performance in detecting mitogenomes. Mitogenomes of Mango and Plum were first assembled from NGS data using GetOrganelle. Unlike a single sequence of complete chlorogenome assembled by Chlomito above (<xref ref-type="supplementary-material" rid="SF1">
<bold>Supplementary Figure S1</bold>
</xref>), the constructed mitogenomes of Mango and Plum were composed of multiple fragments. The assembled Mango mitogenome consisted of 15 sequences totaling 0.48 Mb, while Plum mitogenome was composed of 11 sequences totaling 0.36 Mb (<xref ref-type="supplementary-material" rid="SF4">
<bold>Supplementary Figure S4</bold>
</xref>). To validate the accuracy of the mitogenome assembly, we performed collinearity analysis between the assembled and the NCBI reference mitogenomes for Mango (MZ751075.1) and Plum (OK563724.1). The results showed that the assembled mitogenomes had high collinearity with the reference and covered most regions of the reference genomes (<xref ref-type="supplementary-material" rid="SF4">
<bold>Supplementary Figure S4</bold>
</xref>), indicating the high accuracy and completeness of the GetOrganelle-assembled mitogenomes. To evaluate the structural conservation of mitogenomes across different plant species, we annotated and compared the mitogenomes of Mango, Plum, <italic>Zea mays</italic>, and <italic>Arabidopsis thaliana</italic> with the OGDRAW website. The results showed that unlike chlorogenomes, the mitogenomes from these species did not show conserved gene contents or gene orders (<xref ref-type="supplementary-material" rid="SF5">
<bold>Supplementary Figure S5</bold>
</xref>). Subsequently, we integrated the assembled mitogenomes with those downloaded from NCBI database to establish a local database.</p>
<p>After constructing local mitogenome databases for Mango and Plum, we mapped SGS data to the mitogenomes in the local databases as well as to the TGS-assembled contigs, then calculated ALCR and SDR for each contig. Based on the parameters ALCR and SDR, we identified 11 and 10 contigs likely originating from the mitogenomes in Mango and Plum respectively (<xref ref-type="fig" rid="f5">
<bold>Figures&#xa0;5A, D</bold>
</xref>). These contigs also exhibited similar alignment lengths with the mitogenomes assembled by GetOrganelle and the NCBI database (<xref ref-type="fig" rid="f5">
<bold>Figures&#xa0;5B, E</bold>
</xref>). Moreover, collinearity analysis revealed that the identified contigs and the mitochondrial reference genomes had a high consistency in Mango and Plum, with the majority of the mitogenome regions being covered by these contigs (<xref ref-type="fig" rid="f5">
<bold>Figures&#xa0;5C, F</bold>
</xref>). Similar to the results in chloroplast, we also detected 116 and 52 contigs with alignment lengths exceeding 5000 bp but exhibiting low coverage (less than 10%) in Plum and Mango (<xref ref-type="supplementary-material" rid="SF6">
<bold>Supplementary Figure S6</bold>
</xref>). These results indicated that HGT of large fragments to the nucleus happened in both mitochondrial and chloroplast genomes.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Chlomito accurately identifies mitochondrial-derived contigs and validates their collinearity with mitochondrial reference genomes from Mango <bold>(A&#x2013;C)</bold> and Plum <bold>(D&#x2013;F)</bold>. <bold>(A, D)</bold> Identification of mitochondria-derived contigs in Mango <bold>(A)</bold> and Plum <bold>(D)</bold> using ALCR and SDR metrics. <bold>(B, E)</bold> Alignment lengths of contigs with GetOrganelle-assembled and NCBI-downloaded mitogenomes for Mango <bold>(B)</bold> and Plum <bold>(E)</bold>. <bold>(C, F)</bold> Collinearity analysis of Chlomito-identified contigs against published mitochondrial reference genomes for Mango <bold>(C)</bold> and Plum <bold>(F)</bold>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1430443-g005.tif"/>
</fig>
</sec>
<sec id="s3_8">
<label>3.8</label>
<title>The performance of Chlomito on the detection of HGT in <italic>Arabidopsis</italic>
</title>
<p>To assess the accuracy of Chlomito in detecting HGT events, we utilized a experimentally-confirmed large nuclear insertion of mitochondrial DNA (numt) in <italic>Arabidopsis</italic> (<xref ref-type="bibr" rid="B9">Fields et&#xa0;al., 2022</xref>) as a test case, representing a HGT event from mitochondria to nuclei. This numt, located on chromosome 2 of <italic>Arabidopsis thaliana</italic>, spans approximately 641 kb and is one of the largest numts reported in plants to date. Its existence has been validated by fiber-based fluorescent <italic>in situ</italic> hybridization.</p>
<p>By applying ALCR and SDR as screening criteria, we identified a total of 22 TGS-assembled contigs that potentially belong to the <italic>Arabidopsis</italic> mitogenome (<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6A</bold>
</xref>). Collinearity analysis of these fragments with the NCBI mitogenome reference revealed that these fragments exhibited high collinearity with the reference and covered most of the mitogenome regions (<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6B</bold>
</xref>). These results again confirmed that Chlomito could effectively find out mitogenome contigs in <italic>Arabidopsis</italic>. Interestingly, we noticed one contig ptg0002l, which didn&#x2019;t pass the mtDNA screening criteria of ALCR and SDR, meaning that it is not a mitogenome contig (<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6A</bold>
</xref>). The alignment of ptg00021 with the local database showed a pretty high coverage rate but a low depth ratio, suggesting that it might be an HGT fragment. Surprisingly, comparison of ptg00021 with the previously validated large numt on chromosome 2 of <italic>Arabidopsis</italic> (<xref ref-type="bibr" rid="B9">Fields et&#xa0;al., 2022</xref>) revealed a perfect match between them (<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6C</bold>
</xref>). Furthermore, we specifically aligned PacBio HiFi reads (<xref ref-type="bibr" rid="B53">Wang et&#xa0;al., 2022</xref>) to the junctions between the identified numt and its flanking regions on ptg0002l. We found that some HiFi reads span these junctions, which further substantiates that this numt was horizontally transferred into chromosome 2, rather than being the result of assembly errors (<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6D</bold>
</xref>).</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Chlomito accurately identifies mitochondria-derived contigs and a horizontally transferred organelle fragment (numt) from <italic>Arabidopsis</italic>. <bold>(A)</bold> Identification of mitochondria-derived contigs in <italic>Arabidopsis</italic> using ALCR and SDR as screening criteria. <bold>(B)</bold> Collinearity analysis of Chlomito-identified contigs against published mitochondrial reference genomes for <italic>Arabidopsis</italic>. <bold>(C)</bold> Comparison of the ptg000002l fragment with the previously reported large numt on chromosome 2 of <italic>Arabidopsis</italic>. <bold>(D)</bold> Alignment of PacBio HiFi sequences (<xref ref-type="bibr" rid="B53">Wang et&#xa0;al., 2022</xref>) with the numt and its flanking regions.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1430443-g006.tif"/>
</fig>
<p>Similar to its performance in mitochondrial genome sequence detection, Chlomito accurately distinguished <italic>Arabidopsis</italic> chloroplast fragments from genomic contigs, as shown in <xref ref-type="supplementary-material" rid="SF7">
<bold>Supplementary Figure S7A</bold>
</xref>. The chloroplast genome fragments detected by Chlomito had similar alignment lengths with the reference genome assembled by GetOrganelle and the NCBI chloroplast reference genome (<xref ref-type="supplementary-material" rid="SF7">
<bold>Supplementary Figure S7B</bold>
</xref>). Furthermore, these detected chloroplast genome fragments showed good collinearity with the chloroplast reference genome and covered most of its regions (<xref ref-type="supplementary-material" rid="SF7">
<bold>Supplementary Figure S7C</bold>
</xref>). This further demonstrates Chlomito&#x2019;s utility in accurately identifying organelle genome contigs within complex genomic datasets.</p>
<p>In summary, the Chlomito tool accurately identified the contigs of <italic>Arabidopsis</italic> mitogenome and chlorogenome, and also effectively distinguished genuine mitogenome fragments from numts, the HGT regions existed in nuclear genome. These results validated our method as a reliable tool for understanding complex genomic evolution.</p>
</sec>
<sec id="s3_9">
<label>3.9</label>
<title>Organelle genome contamination in chromosome assembly</title>
<p>To evaluate the impact of organelle genome contamination on the accuracy of chromosome assembly, we aligned the Plum chromosomes assembled from all contigs without removing organelle sequences to the contigs identified as organelle DNA by Chlomito. The aligned result indicated that two mitogenome fragments (contig_2 and contig_4851) identified by Chlomito were erroneously assembled into chromosome 1 of Plum (<xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7A</bold>
</xref>). The full fragments of contig_2 (13,217 bp) and contig_4851 (6,187 bp) showed perfect matches with mitochondrial reference genomes NC_065233.1 and OK563724.1, which are complete mitogenomes of Plum listed in the NCBI organelle genome database. Further analysis demonstrated that the sequencing depths of these two contigs were much higher than the average depth of the chromosomal genome, and were close to the average depth of the mitogenome (<xref ref-type="fig" rid="f7">
<bold>Figures&#xa0;7B, C</bold>
</xref>). This is consistent with the characteristic that organelle genomes have higher copy numbers than nuclear genomes. In addition, we aligned PacBio sequencing reads to these two contigs and their flanking regions, and found that no PacBio reads could be mapped to their junction and flanking regions, further confirming that these two contigs were the result of chromosome assembly errors rather than true nuclear insertion of mitochondrial DNA (numt).</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Identification and verification of organelle genome contamination in the assembled chromosome 1 of Plum. <bold>(A)</bold> The alignment between two mitochondria-derived contigs (identified by Chlomito) and the assembled chromosome 1 of Plum. <bold>(B, C)</bold> The sequencing depths are shown for contig_2 <bold>(B)</bold> and contig_4851 <bold>(C)</bold>, in parallel with the average depths of chromosome and mitochondrial genomes.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1430443-g007.tif"/>
</fig>
<p>Altogether, these results highlight that unfiltered organelle sequences can truely contaminate the nuclear genome during chromosome-level genome assembly. Therefore, the prior identification and exclusion of organelle genome sequences using Chlomito are curcial for ensuring the accuracy and integrity of chromosome assembly.</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>In this study, we have developed a novel tool called Chlomito that provides an innovative approach for accurately identifying organelle genome sequences from complex genomic assembly. This method significantly improves the accuracy of recognizing organelle genomic fragments by integrally applying two metrics. (1) The first metric is Alignment Length Coverage Ratio (ALCR). Different from previous conventional methods (<xref ref-type="bibr" rid="B34">Nath et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B3">Bao et&#xa0;al., 2024</xref>), the calculation of ALCR does not solely rely on the single longest alignment region. Instead, it adds the lengths of all regions on the contig that is aligned with the organelle genome database. This metric offers a more accurate and comprehensive reflection of the alignment coverage between the contig and the organelle genome. The introduction of ALCR can significantly reduce the likelihood of incorrectly identifying nuclear genomic sequences as organelle genome sequences, especially for those organelle genome sequences that have been inserted into nuclear genomes via horizontal gene transfer (HGT). (2) The second metric is Sequencing Depth Ratio (SDR). Considering the varying copy numbers of organelle genomes in different tissues and developmental stages of plants (<xref ref-type="bibr" rid="B36">Preuten et&#xa0;al., 2010</xref>), the ratio of sequencing depths between organelle genome and nuclear genome is not constant (<xref ref-type="bibr" rid="B56">Wang et&#xa0;al., 2018</xref>). Consequently, in this study, we compared the sequencing depths of contigs against the average sequencing depth of organelle genome to enhance the precision of detection outcomes. The application of SDR provides an additional robust filtering dimension, further ensures the identification of sequences truly belonging to organelle genomes among all the assembled contigs.</p>
<p>Moreover, we have noted that recently, some new methods such as ODNA (<xref ref-type="bibr" rid="B31">Martin et&#xa0;al., 2023</xref>) and Odintifier (<xref ref-type="bibr" rid="B41">Samaniego Castruita et&#xa0;al., 2015</xref>) can also be used for the identification of organelle genomes. ODNA takes a machine learning approach, extracting features from the sequences and training classification models to distinguish organelle and nuclear genome sequences. Odintifier utilizes phasing technology to separate reads containing SNVs into organelle and nuclear genome reads, thereby enabling the identification of organelle genome sequences. Compared to them, Chlomito not only considers sequence alignment information but also takes full advantage of the differences in copy numbers and sequencing depths between organelle and nuclear genomes, thereby enhancing the identification accuracy. The organelle genome contigs identified by Chlomito exhibit high collinearity with its reference and cover most of the reference genome regions (<xref ref-type="fig" rid="f4">
<bold>Figures&#xa0;4C, F</bold>
</xref>, <xref ref-type="fig" rid="f5">
<bold>5C, F</bold>
</xref>, <xref ref-type="fig" rid="f6">
<bold>6B</bold>
</xref>; <xref ref-type="supplementary-material" rid="SF7">
<bold>Supplementary Figure S7</bold>
</xref>), as we tested it using sequencing data from multiple species, including Mango (<italic>Mangifera indica</italic>) (<xref ref-type="bibr" rid="B55">Wang et&#xa0;al., 2020</xref>), Plum (<italic>Prunus salicina</italic>) (<xref ref-type="bibr" rid="B27">Liu et&#xa0;al., 2020</xref>), and <italic>Arabidopsis</italic> (<italic>Arabidopsis thaliana</italic>) (<xref ref-type="bibr" rid="B53">Wang et&#xa0;al., 2022</xref>). Thus, Chlomito is capable of accurately detecting the organelle genome contaminants scattered in the assembly results. Also, it can effectively pick out the entire organelle genome sequences (<xref ref-type="fig" rid="f4">
<bold>Figures&#xa0;4C, F</bold>
</xref>). In short, Chlomito is a reliable tool in organelle genomic studies, and in supporting precise nuclear genome assembly by removing organelle genome contaminants.</p>
<p>In addition to the contigs identified by Chlomito as belonging to organelle genomes, there were also numerous contigs with alignment lengths to the reference organelle genomes exceeding 5 kb but exhibiting low alignment coverage (ALCR less than 10%) relative to the contig lengths (<xref ref-type="supplementary-material" rid="SF6">
<bold>Supplementary Figure S6</bold>
</xref>). This phenomenon may be attributable to the intracellular insertion of organelle genomes into the nuclear genome via HGT events. HGT is a significant mechanism in the evolutionary process, particularly in the exchange of genetic information between organelle genomes and host nuclear genomes. Similar studies have also observed the phenomenon of large organelle genome fragments over 4 kb being transferred into nuclear genomes via HGT in watermelon and melon (<xref ref-type="bibr" rid="B6">Cui et&#xa0;al., 2021</xref>). These findings suggest that large-scale HGT may be a widespread occurrence across diverse species. In this context, it is particularly important to accurately distinguish sequence exchanges between organelle and nuclear genomes caused by HGT. Chlomito is designed to address this challenge by employing two powerful metrics &#x2014; ALCR and SDR. Two-dimensional grouping and filtering in Chlomito with ALCR and SDR can clearly separate different groups: the organellel genome contigs and the nuclear insertions of organellel sequences (<xref ref-type="fig" rid="f4">
<bold>Figures&#xa0;4A, D</bold>
</xref>, <xref ref-type="fig" rid="f5">
<bold>5A, D</bold>
</xref>, <xref ref-type="fig" rid="f6">
<bold>6A</bold>
</xref>). Additionally, some contigs with low ALCR (&lt;0.5) and high SDR (&gt;0.5) may be the result of either horizontal gene transfer between mitochondrial and chloroplast genomes or a high proportion of repetitive sequences within the contigs. To improve the accuracy of detection, users can initially run Chlomito with lower filtering thresholds and then determine more precise filtering thresholds using the generated ALCR and SDR visualization scatter plot for the following run. The application of Chlomito will deepen our understanding of the complex interactions among mitochondrial, chloroplast, and nuclear genomes through HGT.</p>
<p>The organelle genome contaminants greatly affect chromosome-level genome assembly results. When chromosomes were assembled using all contigs without removing organelle genomes, organelle genome segments were erroneously inserted into chromosomes by genome assembly software (<xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref>). Therefore, identifying and eliminating organelle genome contamination prior to chromosomal-level assembly are critical to ensure the fidelity of the assembly outcomes. The development of tools like Chlomito is important for improving the quality and reliability of chromosomal-level genome assembly in scientific research.</p>
<p>In summary, the development of Chlomito offers a precise and efficient approach for detecting and filtering organelle DNA sequences from genome assembly contigs, which significantly contributes to enhancing the quality of chromosome assembly. Furthermore, as Chlomito is capable of effectively distinguishing genuine organelle genome sequences from what have been integrated into the nuclear genome via HGT, it will facilitate broad investigation into the mechanisms of genetic exchange between chromosomal and organelle genomes across a wide range of species in the future, offering new insights on the dynamic changes and evolutionary processes of organelle genomes.</p>
</sec>
</body>
<back>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>To validate the accuracy of the Chlomito software in detecting organelle genome sequences, we utilized sequencing data of Mango and Plum from the NCBI Bioproject database, with accession numbers PRJNA487154 and PRJNA574159, respectively. The raw sequencing data for the PacBio HiFi reads and Illumina short reads of Arabidopsis were obtained from the National Genomics Data Center, Beijing Institute of Genomics, Chinese Academy of Sciences/China National Center for Bioinformation (GSA: CRA004538). These datasets include high-quality second and third-generation sequencing data, which were utilized for organelle genome identification and chromosome-level genome assembly.</p>
</sec>
<sec id="s6" sec-type="author-contributions">
<title>Author contributions</title>
<p>WS: Conceptualization, Software, Visualization, Writing &#x2013; original draft. CL: Methodology, Software, Visualization, Writing &#x2013; original draft. YL: Writing &#x2013; original draft, Visualization. DS: Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. YJ: Software, Writing &#x2013; original draft. YH: Supervision, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. WP: Conceptualization, Funding acquisition, Supervision, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. HJ: Conceptualization, Funding acquisition, Supervision, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing.</p>
</sec>
<sec id="s7" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. The work was supported by the General Program of National Natural Science Foundation of China (31970622) and by the Fundamental Research Funds for the Central Universities.</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>We thank the Biological and Medical Engineering Core Facilities of Beijing Institute of Technology for supporting research equipment. We thank two reviewers for constructive comments on an earlier version of this manuscript.</p>
</ack>
<sec id="s8" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s9" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpls.2024.1430443/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpls.2024.1430443/full#supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Image1.jpeg" id="SF1" mimetype="image/jpeg">
<label>Supplementary Figure&#xa0;1</label>
<caption>
<p>High collinearity between GetOrganelle-assembled chloroplast genomes and NCBI reference genomes. The results from Mango <bold>(A)</bold> and Plum <bold>(B)</bold> are shown.</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="Image2.jpeg" id="SF2" mimetype="image/jpeg">
<label>Supplementary Figure&#xa0;2</label>
<caption>
<p>Annotation and comparison of chloroplast genomes of Mango, Plum, <italic>Arabidopsis</italic>, and <italic>Zea mays</italic>. The chloroplast genomes of these species exhibit high conservation in gene contents and order.</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="Image3.jpeg" id="SF3" mimetype="image/jpeg">
<label>Supplementary Figure&#xa0;3</label>
<caption>
<p>Analysis of contigs with chloroplast reference genome alignments in Mango <bold>(A)</bold> and Plum <bold>(B)</bold> samples. The alignment length coverage ratio (ALCR, blue bars), sequencing depth ratio (SDR, orange bars), and contig alignment lengths with chloroplast reference genomes are shown for contigs with varying alignment lengths.</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="Image4.jpeg" id="SF4" mimetype="image/jpeg">
<label>Supplementary Figure&#xa0;4</label>
<caption>
<p>Collinearity comparison of mitochondrial genomes of Mango <bold>(A)</bold> and Plum <bold>(B)</bold> assembled by GetOrganelle with mitochondrial reference genomes from NCBI.</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="Image5.jpeg" id="SF5" mimetype="image/jpeg">
<label>Supplementary Figure&#xa0;5</label>
<caption>
<p>Annotation and comparison of mitochondrial genomes of Mango, Plum, <italic>Arabidopsis</italic>, and <italic>Zea mays</italic>. In contrast to the conserved chloroplast genomes (<xref ref-type="supplementary-material" rid="SF2">
<bold>Supplementary Figure S2</bold>
</xref>), the mitochondrial genomes of these species show significant variations in gene content and gene order.</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="Image6.jpeg" id="SF6" mimetype="image/jpeg">
<label>Supplementary Figure&#xa0;6</label>
<caption>
<p>Analysis of contigs with mitochondrial reference genome alignments in Mango <bold>(A)</bold> and Plum <bold>(B)</bold> samples. The alignment length coverage ratio (ALCR, blue bars), sequencing depth ratio (SDR, orange bars), and contig alignment lengths with mitochondrial reference genomes are shown for contigs with varying alignment lengths.</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="Image7.jpeg" id="SF7" mimetype="image/jpeg">
<label>Supplementary Figure&#xa0;7</label>
<caption>
<p>Chlomito accurately identifies chloroplast-derived contigs and validates their collinearity with chloroplast reference genome from <italic>Arabidopsis</italic>. <bold>(A)</bold> Identification of chloroplast-derived contigs in <italic>Arabidopsis</italic> assembly based on ALCR and SDR metrics. <bold>(B)</bold> Alignment lengths of <italic>Arabidopsis</italic> contigs with chloroplast genomes assembled using GetOrganelle and downloaded from NCBI database. <bold>(C)</bold> Collinearity analysis of <italic>Arabidopsis</italic> contigs identified by Chlomito against published <italic>Arabidopsis</italic> chloroplast reference genome.</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="Table1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document">
<label>Supplementary Table&#xa0;1</label>
<caption>
<p>Repetitive sequences identified using RepeatMasker in the Plum contigs with low ALCR and high SDR.</p>
</caption>
</supplementary-material>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Allio</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Schomaker-Bastos</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Romiguier</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Prosdocimi</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Nabholz</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Delsuc</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>MitoFinder: Efficient automated large-scale extraction of mitogenomic data in target enrichment phylogenomics</article-title>. <source>Mol. Ecol. Resour</source>. <volume>20</volume>, <fpage>892</fpage>&#x2013;<lpage>905</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/1755-0998.13160</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bae</surname> <given-names>E. K.</given-names>
</name>
<name>
<surname>Kang</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>S. J.</given-names>
</name>
<name>
<surname>Park</surname> <given-names>E. J.</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>K. T.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Chromosome-level genome assembly of the Asian aspen Populus davidiana Dode</article-title>. <source>Sci. Data</source> <volume>10</volume>, <fpage>431</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41597-023-02350-5</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Yao</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>A chromosomal-scale genome assembly of modern cultivated hybrid sugarcane provides insights into origination and evolution</article-title>. <source>Nat. Commun.</source> <volume>15</volume>, <fpage>3041</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41467-024-47390-6</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bendich</surname> <given-names>A. J.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Mitochondrial DNA, chloroplast DNA and the origins of development in eukaryotic organisms</article-title>. <source>Biol. Direct</source> <volume>5</volume>, <elocation-id>42</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/1745-6150-5-42</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cecchin</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Marcolungo</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Rossato</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Girolomoni</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Cosentino</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Cuine</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Chlorella vulgaris genome assembly and annotation reveals the molecular basis for metabolic acclimation to high light conditions</article-title>. <source>Plant J.</source> <volume>100</volume>, <fpage>1289</fpage>&#x2013;<lpage>1305</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/tpj.14508</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cui</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Qiu</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Comparative analysis of nuclear, chloroplast, and mitochondrial genomes of watermelon and melon provides evidence of gene transfer</article-title>. <source>Sci. Rep.</source> <volume>11</volume>, <fpage>1595</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-020-80149-9</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Du</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Song</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Yin</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>N.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Genomic analysis based on chromosome-level genome assembly reveals an expansion of terpene biosynthesis of azadirachta indica</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.853861</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Eddy</surname> <given-names>S. R.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Accelerated profile HMM searches</article-title>. <source>PloS Comput. Biol.</source> <volume>7</volume>, <elocation-id>e1002195</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pcbi.1002195</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fields</surname> <given-names>P. D.</given-names>
</name>
<name>
<surname>Waneka</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Naish</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Schatz</surname> <given-names>M. C.</given-names>
</name>
<name>
<surname>Henderson</surname> <given-names>I. R.</given-names>
</name>
<name>
<surname>Sloan</surname> <given-names>D. B.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Complete sequence of a 641-kb insertion of mitochondrial DNA in the arabidopsis thaliana nuclear genome</article-title>. <source>Genome Biol. Evol.</source> <volume>14</volume> (<issue>5</issue>). doi:&#xa0;<pub-id pub-id-type="doi">10.1093/gbe/evac059</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Giorgashvili</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Reichel</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Caswara</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Kerimov</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Borsch</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Gruenstaeudl</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Software choice and sequencing coverage can impact plastid genome assembly-A case study in the narrow endemic calligonum bakuense</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.779830</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Goodwin</surname> <given-names>S.</given-names>
</name>
<name>
<surname>McPherson</surname> <given-names>J. D.</given-names>
</name>
<name>
<surname>McCombie</surname> <given-names>W. R.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Coming of age: ten years of next-generation sequencing technologies</article-title>. <source>Nat. Rev. Genet.</source> <volume>17</volume>, <fpage>333</fpage>&#x2013;<lpage>351</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nrg.2016.49</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guan</surname> <given-names>D.</given-names>
</name>
<name>
<surname>McCarthy</surname> <given-names>S. A.</given-names>
</name>
<name>
<surname>Wood</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Howe</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Durbin</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Identifying and removing haplotypic duplication in primary genome assemblies</article-title>. <source>Bioinformatics</source> <volume>36</volume>, <fpage>2896</fpage>&#x2013;<lpage>2898</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btaa025</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hao</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Tian</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zong</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Chromosome-level genomes of three key Allium crops and their trait evolution</article-title>. <source>Nat. Genet.</source> <volume>55</volume>, <fpage>1976</fpage>&#x2013;<lpage>1986</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41588-023-01546-0</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Howe</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Chow</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Collins</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Pelan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Pointon</surname> <given-names>D. L.</given-names>
</name>
<name>
<surname>Sims</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Significantly improving the quality of genome assemblies through curation</article-title>. <source>Gigascience</source> <volume>10</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>9</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/gigascience/giaa153</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jackman</surname> <given-names>S. D.</given-names>
</name>
<name>
<surname>Vandervalk</surname> <given-names>B. P.</given-names>
</name>
<name>
<surname>Mohamadi</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Chu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yeo</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Hammond</surname> <given-names>S. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>ABySS 2.0: resource-efficient assembly of large genomes using a Bloom filter</article-title>. <source>Genome Res.</source> <volume>27</volume>, <fpage>768</fpage>&#x2013;<lpage>777</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1101/gr.214346.116</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jain</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Koren</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Miga</surname> <given-names>K. H.</given-names>
</name>
<name>
<surname>Quick</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Rand</surname> <given-names>A. C.</given-names>
</name>
<name>
<surname>Sasani</surname> <given-names>T. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2018</year>). <article-title>Nanopore sequencing and assembly of a human genome with ultra-long reads</article-title>. <source>Nat. Biotechnol.</source> <volume>36</volume>, <fpage>338</fpage>&#x2013;<lpage>345</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nbt.4060</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jin</surname> <given-names>J. J.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>W. B.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>J. B.</given-names>
</name>
<name>
<surname>Song</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>dePamphilis</surname> <given-names>C. W.</given-names>
</name>
<name>
<surname>Yi</surname> <given-names>T. S.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>GetOrganelle: a fast and versatile toolkit for accurate <italic>de novo</italic> assembly of organelle genomes</article-title>. <source>Genome Biol.</source> <volume>21</volume>, <fpage>241</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13059-020-02154-5</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kenny</surname> <given-names>N. J.</given-names>
</name>
<name>
<surname>McCarthy</surname> <given-names>S. A.</given-names>
</name>
<name>
<surname>Dudchenko</surname> <given-names>O.</given-names>
</name>
<name>
<surname>James</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Betteridge</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Corton</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>The gene-rich genome of the scallop Pecten maximus</article-title>. <source>Gigascience</source> <volume>9</volume> (<issue>5</issue>), <fpage>1</fpage>&#x2013;<lpage>13</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/gigascience/giaa037</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kent</surname> <given-names>W. J.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>BLAT&#x2013;the BLAST-like alignment tool</article-title>. <source>Genome Res.</source> <volume>12</volume>, <fpage>656</fpage>&#x2013;<lpage>664</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1101/gr.229202</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kolmogorov</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Yuan</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Pevzner</surname> <given-names>P. A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Assembly of long, error-prone reads using repeat graphs</article-title>. <source>Nat. Biotechnol.</source> <volume>37</volume>, <fpage>540</fpage>&#x2013;<lpage>546</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41587-019-0072-8</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kubo</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Newton</surname> <given-names>K. J.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Angiosperm mitochondrial genomes and mutations</article-title>. <source>Mitochondrion</source> <volume>8</volume>, <fpage>5</fpage>&#x2013;<lpage>14</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.mito.2007.10.006</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Langmead</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Salzberg</surname> <given-names>S. L.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Fast gapped-read alignment with Bowtie 2</article-title>. <source>Nat. Methods</source> <volume>9</volume>, <fpage>357</fpage>&#x2013;<lpage>359</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nmeth.1923</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Laslett</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Canback</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>ARAGORN, a program to detect tRNA genes and tmRNA genes in nucleotide sequences</article-title>. <source>Nucleic Acids Res.</source> <volume>32</volume>, <fpage>11</fpage>&#x2013;<lpage>16</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gkh152</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Minimap2: pairwise alignment for nucleotide sequences</article-title>. <source>Bioinformatics</source> <volume>34</volume>, <fpage>3094</fpage>&#x2013;<lpage>3100</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/bty191</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Handsaker</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Wysoker</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Fennell</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Ruan</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Homer</surname> <given-names>N.</given-names>
</name>
<etal/>
</person-group>. (<year>2009</year>). <article-title>The sequence alignment/map format and SAMtools</article-title>. <source>Bioinformatics</source> <volume>25</volume>, <fpage>2078</fpage>&#x2013;<lpage>2079</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btp352</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Chang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Advancing organelle genome transformation and editing for crop improvement</article-title>. <source>Plant Commun.</source> <volume>2</volume>, <elocation-id>100141</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.xplc.2021.100141</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Peng</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Hao</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Pan</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Chromosome-level draft genome of a diploid plum (Prunus salicina)</article-title>. <source>Gigascience</source> <volume>9</volume> (<issue>12</issue>), <fpage>1</fpage>&#x2013;<lpage>11</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/gigascience/giaa130</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lohse</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Drechsel</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Kahlau</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Bock</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>OrganellarGenomeDRAW&#x2013;a suite of tools for generating physical maps of plastid and mitochondrial genomes and visualizing expression data sets</article-title>. <source>Nucleic Acids Res.</source> <volume>41</volume>, <fpage>W575</fpage>&#x2013;<lpage>W581</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gkt289</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lutz</surname> <given-names>K. A.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Zdepski</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Michael</surname> <given-names>T. P.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Isolation and analysis of high quality nuclear DNA with reduced organellar DNA for plant genome sequencing and resequencing</article-title>. <source>BMC Biotechnol.</source> <volume>11</volume>, <elocation-id>54</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/1472-6750-11-54</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marcais</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Kingsford</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>A fast, lock-free approach for efficient parallel counting of occurrences of k-mers</article-title>. <source>Bioinformatics</source> <volume>27</volume>, <fpage>764</fpage>&#x2013;<lpage>770</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btr011</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Martin</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Nguyen</surname> <given-names>M. K.</given-names>
</name>
<name>
<surname>Lowack</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Heider</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>ODNA: identification of organellar DNA by machine learning</article-title>. <source>Bioinformatics</source> <volume>39</volume> (<issue>5</issue>), <fpage>btad326</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btad326</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Martin</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Gene transfer from organelles to the nucleus: frequent and in big chunks</article-title>. <source>Proc. Natl. Acad. Sci. U.S.A.</source> <volume>100</volume>, <fpage>8612</fpage>&#x2013;<lpage>8614</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1073/pnas.1633606100</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mishra</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Ulaszewski</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Meger</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Aury</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Bodenes</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Lesur-Kupin</surname> <given-names>I.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>A chromosome-level genome assembly of the european beech (Fagus sylvatica) reveals anomalies for organelle DNA integration, repeat content and distribution of SNPs</article-title>. <source>Front. Genet.</source> <volume>12</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fgene.2021.691058</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nath</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Fletcher</surname> <given-names>S. J.</given-names>
</name>
<name>
<surname>Hayward</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Shaw</surname> <given-names>L. M.</given-names>
</name>
<name>
<surname>Masouleh</surname> <given-names>A. K.</given-names>
</name>
<name>
<surname>Furtado</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>A haplotype resolved chromosomal level avocado genome allows analysis of novel avocado genes</article-title>. <source>Hortic. Res.</source> <volume>9</volume>, <elocation-id>uhac157</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/hr/uhac157</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Oldenburg</surname> <given-names>D. J.</given-names>
</name>
<name>
<surname>Bendich</surname> <given-names>A. J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>DNA maintenance in plastids and mitochondria of plants</article-title>. <source>Front. Plant Sci.</source> <volume>6</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2015.00883</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Preuten</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Cincu</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Fuchs</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zoschke</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Liere</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Borner</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Fewer genes than organelles: extremely low and variable gene copy numbers in mitochondria of somatic plant cells</article-title>. <source>Plant J.</source> <volume>64</volume>, <fpage>948</fpage>&#x2013;<lpage>959</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/j.1365-313X.2010.04389.x</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pyke</surname> <given-names>K. A.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>Plastid division and development</article-title>. <source>Plant Cell</source> <volume>11</volume>, <fpage>549</fpage>&#x2013;<lpage>556</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1105/tpc.11.4.549</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Quinlan</surname> <given-names>A. R.</given-names>
</name>
<name>
<surname>Hall</surname> <given-names>I. M.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>BEDTools: a flexible suite of utilities for comparing genomic features</article-title>. <source>Bioinformatics</source> <volume>26</volume>, <fpage>841</fpage>&#x2013;<lpage>842</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btq033</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rhie</surname> <given-names>A.</given-names>
</name>
<name>
<surname>McCarthy</surname> <given-names>S. A.</given-names>
</name>
<name>
<surname>Fedrigo</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Damas</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Formenti</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Koren</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Towards complete and error-free genome assemblies of all vertebrate species</article-title>. <source>Nature</source> <volume>592</volume>, <fpage>737</fpage>&#x2013;<lpage>746</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41586-021-03451-0</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rhoads</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Au</surname> <given-names>K. F.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>PacBio sequencing and its applications</article-title>. <source>Genomics Proteomics Bioinf.</source> <volume>13</volume>, <fpage>278</fpage>&#x2013;<lpage>289</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.gpb.2015.08.002</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Samaniego Castruita</surname> <given-names>J. A.</given-names>
</name>
<name>
<surname>Zepeda Mendoza</surname> <given-names>M. L.</given-names>
</name>
<name>
<surname>Barnett</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Wales</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Gilbert</surname> <given-names>M. T.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Odintifier&#x2013;A computational method for identifying insertions of organellar origin from modern and ancient high-throughput sequencing data based on haplotype phasing</article-title>. <source>BMC Bioinf.</source> <volume>16</volume>, <fpage>232</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12859-015-0682-1</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sandhya</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Srivastava</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Kaila</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Tyagi</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Gaikwad</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Methods and tools for plant organelle genome sequencing, assembly, and downstream analysis</article-title>. <source>Methods Mol. Biol.</source> <volume>2107</volume>, <fpage>49</fpage>&#x2013;<lpage>98</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-1-0716-0235-5_4</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sanita Lima</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Woods</surname> <given-names>L. C.</given-names>
</name>
<name>
<surname>Cartwright</surname> <given-names>M. W.</given-names>
</name>
<name>
<surname>Smith</surname> <given-names>D. R.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>The (in)complete organelle genome: exploring the use and nonuse of available technologies for characterizing mitochondrial and plastid chromosomes</article-title>. <source>Mol. Ecol. Resour.</source> <volume>16</volume>, <fpage>1279</fpage>&#x2013;<lpage>1286</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/1755-0998.12585</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Servant</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Varoquaux</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Lajoie</surname> <given-names>B. R.</given-names>
</name>
<name>
<surname>Viara</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>C. J.</given-names>
</name>
<name>
<surname>Vert</surname> <given-names>J. P.</given-names>
</name>
<etal/>
</person-group>. (<year>2015</year>). <article-title>HiC-Pro: an optimized and flexible pipeline for Hi-C data processing</article-title>. <source>Genome Biol.</source> <volume>16</volume>, <fpage>259</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13059-015-0831-x</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shirasawa</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Itai</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Isobe</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Chromosome-scale genome assembly of Japanese pear (Pyrus pyrifolia) variety 'Nijisseiki'</article-title>. <source>DNA Res.</source> <volume>28</volume> (<issue>2</issue>), <fpage>1</fpage>&#x2013;<lpage>6</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/dnares/dsab001</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sikorskaite</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Rajamaki</surname> <given-names>M. L.</given-names>
</name>
<name>
<surname>Baniulis</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Stanys</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Valkonen</surname> <given-names>J. P.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Protocol: Optimised methodology for isolation of nuclei from leaves of species in the Solanaceae and Rosaceae families</article-title>. <source>Plant Methods</source> <volume>9</volume>, <elocation-id>31</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/1746-4811-9-31</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Song</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Ye</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Du</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Chrom-pro: A user-friendly toolkit for <italic>de-novo</italic> chromosome assembly and genomic analysis</article-title>. <source>bioRxiv</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.1101/2024.03.02.583079</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tillich</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Lehwark</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Pellizzer</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Ulbricht-Jones</surname> <given-names>E. S.</given-names>
</name>
<name>
<surname>Fischer</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Bock</surname> <given-names>R.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>GeSeq - versatile and accurate annotation of organelle genomes</article-title>. <source>Nucleic Acids Res.</source> <volume>45</volume>, <fpage>W6</fpage>&#x2013;<lpage>W11</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gkx391</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Timmis</surname> <given-names>J. N.</given-names>
</name>
<name>
<surname>Ayliffe</surname> <given-names>M. A.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>C. Y.</given-names>
</name>
<name>
<surname>Martin</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Endosymbiotic gene transfer: organelle genomes forge eukaryotic chromosomes</article-title>. <source>Nat. Rev. Genet.</source> <volume>5</volume>, <fpage>123</fpage>&#x2013;<lpage>135</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nrg1271</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vaser</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Sovic</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Nagarajan</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Sikic</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Fast and accurate <italic>de novo</italic> genome assembly from long uncorrected reads</article-title>. <source>Genome Res.</source> <volume>27</volume>, <fpage>737</fpage>&#x2013;<lpage>746</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1101/gr.214270.116</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vurture</surname> <given-names>G. W.</given-names>
</name>
<name>
<surname>Sedlazeck</surname> <given-names>F. J.</given-names>
</name>
<name>
<surname>Nattestad</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Underwood</surname> <given-names>C. J.</given-names>
</name>
<name>
<surname>Fang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Gurtowski</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>GenomeScope: fast reference-free genome profiling from short reads</article-title>. <source>Bioinformatics</source> <volume>33</volume>, <fpage>2202</fpage>&#x2013;<lpage>2204</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btx153</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Walker</surname> <given-names>B. J.</given-names>
</name>
<name>
<surname>Abeel</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Shea</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Priest</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Abouelliel</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Sakthikumar</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2014</year>). <article-title>Pilon: an integrated tool for comprehensive microbial variant detection and genome assembly improvement</article-title>. <source>PloS One</source> <volume>9</volume>, <elocation-id>e112963</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0112963</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Jia</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Jia</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Dang</surname> <given-names>N.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>High-quality arabidopsis thaliana genome assembly with nanopore and HiFi long reads</article-title>. <source>Genomics Proteomics Bioinf.</source> <volume>20</volume>, <fpage>4</fpage>&#x2013;<lpage>13</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.gpb.2021.08.003</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Kan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Liao</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Tembrock</surname> <given-names>L. R.</given-names>
</name>
<name>
<surname>Daniell</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Plant organellar genomes: much done, much more to do</article-title>. <source>Trends Plant Sci.</source> <volume>29</volume> (<issue>7</issue>), <fpage>754</fpage>&#x2013;<lpage>769</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.tplants.2023.12.014</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Luo</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Dang</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>The genome evolution and domestication of tropical fruit mango</article-title>. <source>Genome Biol.</source> <volume>21</volume>, <fpage>60</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13059-020-01959-8</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Rohlsen</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Bi</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2018</year>). <article-title>Organellar genome assembly methods and comparative analysis of horticultural plants</article-title>. <source>Hortic. Res.</source> <volume>5</volume>, <elocation-id>3</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41438-017-0002-1</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wei</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Schon</surname> <given-names>K. R.</given-names>
</name>
<name>
<surname>Elgar</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Orioli</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Tanguy</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Giess</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Nuclear-embedded mitochondrial DNA sequences in 66,083 human genomes</article-title>. <source>Nature</source> <volume>611</volume>, <fpage>105</fpage>&#x2013;<lpage>114</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41586-022-05288-7</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Gu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Peters</surname> <given-names>B. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>TGS-GapCloser: A fast and accurate gap closer for large genomes with low coverage of error-prone long reads</article-title>. <source>Gigascience</source> <volume>9</volume> (<issue>9</issue>), <fpage>1</fpage>&#x2013;<lpage>11</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/gigascience/giaa094</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Bao</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>F.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Chromosome-level genome of spider Pardosa pseudoannulata and cuticle protein genes in environmental stresses</article-title>. <source>Sci. Data</source> <volume>11</volume>, <fpage>121</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41597-024-02966-1</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Hua</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ruan</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Chromosome-level genome assembly and annotation of the yellow grouper, Epinephelus awoara</article-title>. <source>Sci. Data</source> <volume>11</volume>, <fpage>151</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41597-024-02989-8</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Ming</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Tang</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Assembly of allele-aware, chromosomal-scale autopolyploid genomes based on Hi-C data</article-title>. <source>Nat. Plants</source> <volume>5</volume>, <fpage>833</fpage>&#x2013;<lpage>845</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41477-019-0487-8</pub-id>
</citation>
</ref>
<ref id="B62">
<citation citation-type="thesis">
<person-group person-group-type="author">
<name>
<surname>Zhong</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2020</year>). <source>Assembly, annotation and analysis of chloroplast genomes</source>. <publisher-loc>Perth, Western Australia, Australia</publisher-loc>: <publisher-name>The University of Western Australia</publisher-name>. Available online at: <uri xlink:href="https://research-repository.uwa.edu.au/en/publications/assembly-annotation-and-analysis-of-chloroplast-genomes">https://research-repository.uwa.edu.au/en/publications/assembly-annotation-and-analysis-of-chloroplast-genomes</uri>.</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Armijos</surname> <given-names>C. E.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Ruhlman</surname> <given-names>T. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>a). <article-title>Plastid genome assembly using long-read data</article-title>. <source>Mol. Ecol. Resour.</source> <volume>23</volume>, <fpage>1442</fpage>&#x2013;<lpage>1457</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/1755-0998.13787</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Peng</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zeng</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Cai</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zuo</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>b). <article-title>Chromosome-level genome assembly of Niphotrichum japonicum provides new insights into heat stress responses in mosses</article-title>. <source>Front. Plant Sci.</source> <volume>14</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2023.1271357</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Yin</surname> <given-names>Z. T.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Smith</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Martin</surname> <given-names>F.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Three chromosome-level duck genome assemblies provide insights into genomic variation during domestication</article-title>. <source>Nat. Commun.</source> <volume>12</volume>, <fpage>5932</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41467-021-26272-1</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>