<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2021.766389</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Centromere-Specific Retrotransposons and Very-Long-Chain Fatty Acid Biosynthesis in the Genome of Yellowhorn (<italic>Xanthoceras sorbifolium</italic>, Sapindaceae), an Oil-Producing Tree With Significant Drought Resistance</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Liu</surname> <given-names>Hui</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x2020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1498628/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Yan</surname> <given-names>Xue-Mei</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x2020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1542845/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname> <given-names>Xin-rui</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x2020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1480181/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname> <given-names>Dong-Xu</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhou</surname> <given-names>Qingyuan</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Shi</surname> <given-names>Tian-Le</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Jia</surname> <given-names>Kai-Hua</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Tian</surname> <given-names>Xue-Chan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhou</surname> <given-names>Shan-Shan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname> <given-names>Ren-Gang</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Yun</surname> <given-names>Quan-Zheng</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname> <given-names>Qing</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1502301/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Xiang</surname> <given-names>Qiuhong</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Mannapperuma</surname> <given-names>Chanaka</given-names></name>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/601948/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Van Zalen</surname> <given-names>Elena</given-names></name>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Street</surname> <given-names>Nathaniel R.</given-names></name>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/604625/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Porth</surname> <given-names>Ilga</given-names></name>
<xref ref-type="aff" rid="aff7"><sup>7</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/722719/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>El-Kassaby</surname> <given-names>Yousry A.</given-names></name>
<xref ref-type="aff" rid="aff8"><sup>8</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/344616/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhao</surname> <given-names>Wei</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff9"><sup>9</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname> <given-names>Xiao-Ru</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff9"><sup>9</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Guan</surname> <given-names>Wenbin</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x002A;</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Mao</surname> <given-names>Jian-Feng</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/355122/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>National Engineering Laboratory for Tree Breeding, Beijing Advanced Innovation Center for Tree Breeding by Molecular Design, Key Laboratory of Genetics and Breeding in Forest Trees and Ornamental Plants, Ministry of Education, School of Ecology and Nature Conservation, College of Biological Sciences and Technology, Beijing Forestry University</institution>, <addr-line>Beijing</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>Protected Agricultural Technology, R&#x0026;D Center, Shanxi Datong University</institution>, <addr-line>Datong</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>Key Laboratory of Plant Resources, Institute of Botany, Chinese Academy of Sciences</institution>, <addr-line>Beijing</addr-line>, <country>China</country></aff>
<aff id="aff4"><sup>4</sup><institution>Department of Bioinformatics, Ori (Shandong) Gene Science and Technology Co., Ltd.</institution>, <addr-line>Weifang</addr-line>, <country>China</country></aff>
<aff id="aff5"><sup>5</sup><institution>Key Laboratory of Forest Ecology and Environment of the National Forestry and Grassland Administration, Research Institute of Forest Ecology, Environment and Protection, Chinese Academy of Forestry</institution>, <addr-line>Beijing</addr-line>, <country>China</country></aff>
<aff id="aff6"><sup>6</sup><institution>Ume&#x00E5; Plant Science Centre, Department of Plant Physiology, Ume&#x00E5; University</institution>, <addr-line>Ume&#x00E5;</addr-line>, <country>Sweden</country></aff>
<aff id="aff7"><sup>7</sup><institution>D&#x00E9;partment des Sciences du Bois et de la For&#x00EA;t, Facult&#x00E9; de Foresterie, de G&#x00E9;ographie et de G&#x00E9;omatique, Universit&#x00E9; Laval Qu&#x00E9;bec</institution>, <addr-line>Quebec City, QC</addr-line>, <country>Canada</country></aff>
<aff id="aff8"><sup>8</sup><institution>Department of Forest and Conservation Sciences, Faculty of Forestry, University of British Columbia</institution>, <addr-line>Vancouver, BC</addr-line>, <country>Canada</country></aff>
<aff id="aff9"><sup>9</sup><institution>Department of Ecology and Environmental Science, Ume&#x00E5; Plant Science Centre, Ume&#x00E5; University</institution>, <addr-line>Ume&#x00E5;</addr-line>, <country>Sweden</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Pawel Wojciechowski, Pozna&#x0144; University of Technology, Poland</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Liangsheng Zhang, Zhejiang University, China; Guiling Sun, Henan University, China</p></fn>
<corresp id="c001">&#x002A;Correspondence: Jian-Feng Mao, <email>jianfeng.mao@bjfu.edu.cn</email></corresp>
<corresp id="c002">Wenbin Guan, <email>swlab@bjfu.edu.cn</email></corresp>
<fn fn-type="equal" id="fn002"><p><sup>&#x2020;</sup>These authors have contributed equally to this work</p></fn>
<fn fn-type="other" id="fn004"><p>This article was submitted to Plant Systems and Synthetic Biology, a section of the journal Frontiers in Plant Science</p></fn>
</author-notes>
<pub-date pub-type="epub">
<day>22</day>
<month>11</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>12</volume>
<elocation-id>766389</elocation-id>
<history>
<date date-type="received">
<day>02</day>
<month>09</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>18</day>
<month>10</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2021 Liu, Yan, Wang, Zhang, Zhou, Shi, Jia, Tian, Zhou, Zhang, Yun, Wang, Xiang, Mannapperuma, Van Zalen, Street, Porth, El-Kassaby, Zhao, Wang, Guan and Mao.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Liu, Yan, Wang, Zhang, Zhou, Shi, Jia, Tian, Zhou, Zhang, Yun, Wang, Xiang, Mannapperuma, Van Zalen, Street, Porth, El-Kassaby, Zhao, Wang, Guan and Mao</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>In-depth genome characterization is still lacking for most of biofuel crops, especially for centromeres, which play a fundamental role during nuclear division and in the maintenance of genome stability. This study applied long-read sequencing technologies to assemble a highly contiguous genome for yellowhorn (<italic>Xanthoceras sorbifolium</italic>), an oil-producing tree, and conducted extensive comparative analyses to understand centromere structure and evolution, and fatty acid biosynthesis. We produced a reference-level genome of yellowhorn, &#x223C;470 Mb in length with &#x223C;95% of contigs anchored onto 15 chromosomes. Genome annotation identified 22,049 protein-coding genes and 65.7% of the genome sequence as repetitive elements. Long terminal repeat retrotransposons (LTR-RTs) account for &#x223C;30% of the yellowhorn genome, which is maintained by a moderate birth rate and a low removal rate. We identified the centromeric regions on each chromosome and found enrichment of centromere-specific retrotransposons of LINE1 and <italic>Gypsy</italic> in these regions, which have evolved recently (&#x223C;0.7 MYA). We compared the genomes of three cultivars and found frequent inversions. We analyzed the transcriptomes from different tissues and identified the candidate genes involved in very-long-chain fatty acid biosynthesis and their expression profiles. Collinear block analysis showed that yellowhorn shared the gamma (&#x03B3;) hexaploidy event with <italic>Vitis vinifera</italic> but did not undergo any further whole-genome duplication. This study provides excellent genomic resources for understanding centromere structure and evolution and for functional studies in this important oil-producing plant.</p>
</abstract>
<kwd-group>
<kwd>yellowhorn</kwd>
<kwd>centromere</kwd>
<kwd>LINE1</kwd>
<kwd><italic>Gypsy</italic></kwd>
<kwd>very-long-chain fatty acid</kwd>
</kwd-group>
<counts>
<fig-count count="5"/>
<table-count count="2"/>
<equation-count count="0"/>
<ref-count count="123"/>
<page-count count="17"/>
<word-count count="13487"/>
</counts>
</article-meta>
</front>
<body>
<sec id="S1" sec-type="intro">
<title>Introduction</title>
<p>Centromeres are those chromosomal regions that interact with spindle microtubules for the correct segregation of sister chromatids during mitosis and meiosis II, and of homologous chromosomes during meiosis I in eukaryotes (<xref ref-type="bibr" rid="B41">Houben and Schubert, 2003</xref>). Despite the early cytological discovery and rapid growth in the number of sequenced genomes, centromeres have been one of rather mysterious parts of genomes due to their highly repetitive content. Its function for chromosome segregation is highly conserved among species, but the sequences specific to centromeric chromatin are evolving rapidly, which is referred to centromere paradox (<xref ref-type="bibr" rid="B40">Henikoff et al., 2001</xref>). Satellite DNA is one of the dominant centromeric sequences in most species (<xref ref-type="bibr" rid="B20">Csink and Henikoff, 1998</xref>). Additionally, centromeric retrotransposons are found common in the centromeres of <italic>Triticum boeoticum</italic> and <italic>Zea mays</italic> (<xref ref-type="bibr" rid="B123">Zhong et al., 2002</xref>; <xref ref-type="bibr" rid="B67">Liu et al., 2008</xref>). In maize, centromeric retrotransposons include a lineage of <italic>Gypsy</italic> retrotransposons (<xref ref-type="bibr" rid="B83">Neumann et al., 2011</xref>) while, in <italic>Musa acuminata</italic>, they are dominated by long interspersed nuclear elements (LINE) and <italic>Gypsy</italic> (<xref ref-type="bibr" rid="B22">D&#x2019;Hont et al., 2012</xref>; <xref ref-type="bibr" rid="B17">&#x010C;&#x00ED;&#x017E;kov&#x00E1; et al., 2013</xref>; <xref ref-type="bibr" rid="B4">Belser et al., 2021</xref>). The few available reports illustrate that sequence composition in centromeres can be complex and vary among species. However, our understanding of centromere structure, sequence composition, and the mode and the rate of evolution is thus far very limited.</p>
<p>Determining the precise boundaries of centromeres has proved to be difficult, especially for the repeat-rich plant genomes, creating challenges for complete genome assembly (<xref ref-type="bibr" rid="B52">Kumar and Bennetzen, 1999</xref>; <xref ref-type="bibr" rid="B40">Henikoff et al., 2001</xref>; <xref ref-type="bibr" rid="B31">Feschotte et al., 2002</xref>). The advance in long-read sequencing, such as Pacific Biosciences (PacBio) sequencing, and genome scaffolding methods, such as optical mapping and Hi-C sequencing, has vastly improved our ability to obtain unprecedented complete and contiguous genome assemblies (<xref ref-type="bibr" rid="B93">Sedlazeck et al., 2018</xref>). Long-read sequencing is also able to yield contiguous centromeric sequences and thus assemblies of centromeric regions despite their complex repeat structures (<xref ref-type="bibr" rid="B105">VanBuren et al., 2015</xref>; <xref ref-type="bibr" rid="B4">Belser et al., 2021</xref>). Based on the colocalization of centromeres and the patterns it creates in Hi-C contact maps, it is possible to infer the locations of all centromeres for all chromosomes in a genome (<xref ref-type="bibr" rid="B81">Mizuguchi et al., 2014</xref>; <xref ref-type="bibr" rid="B106">Varoquaux et al., 2015</xref>).</p>
<p>Yellowhorn (<italic>Xanthoceras sorbifolium</italic>) is a rare, deciduous tree or shrub in the Sapindaceae family and the only species in the genus <italic>Xanthoceras</italic> native to dryland in northern China (<xref ref-type="fig" rid="F1">Figures 1A&#x2013;D</xref>). This species has a high capacity of saline-alkali tolerance and withstands extreme temperatures. It is thus widely used for afforestation programs for soil and water conservation (<xref ref-type="bibr" rid="B121">Yu et al., 2017</xref>). The seeds of yellowhorn are rich in lipids, proteins, and saponins, with oil contents range from 49.8% to 68.3% and unsaturated fatty acids up to 90.9% of the total fatty acids (<xref ref-type="bibr" rid="B119">Yao et al., 2013</xref>; <xref ref-type="bibr" rid="B107">Venegas-Caler&#x00F3;n et al., 2017</xref>; <xref ref-type="bibr" rid="B121">Yu et al., 2017</xref>), and thus the plant has been identified as an important biofuel crop. Notably, nervonic acid, a very-long-chain fatty acid (VLCFA), which is rarely found in plants, accounts for 1.5-3% of the seed oil of yellowhorn (<xref ref-type="bibr" rid="B92">Ruan et al., 2017</xref>). Nervonic acid is an important component in myelin biosynthesis in the central and peripheral nervous system and an essential nutrient for brain growth and maintenance (<xref ref-type="bibr" rid="B86">Oda et al., 2005</xref>; <xref ref-type="bibr" rid="B1">Amminger et al., 2012</xref>). The increase of nervonic acid content in seeds will become an important target for yellowhorn breeding.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption><p>Images of yellowhorn and its potential distribution range. <bold>(A)</bold> The mature tree (&#x201C;JGXP&#x201D;) sampled for genome sequencing. <bold>(B)</bold> Raceme, hermaphrodite flower (up), and male flower (down). <bold>(C)</bold> Capsular fruits, seeds in ripe fruit, and cross-section of fruit. <bold>(D)</bold> Predicted distribution of yellowhorn based on sampled records and current climate data. Images at the bottom are the yellowhorn tree with flowers (left) and fruits (right), respectively.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-12-766389-g001.tif"/>
</fig>
<p>Previous morphometric analysis has determined the chromosome number of yellowhorn and the karyotype as 2n = 30 (<xref ref-type="bibr" rid="B54">Lang et al., 1980</xref>). Recently, two long-read-based genome assemblies have been reported for yellowhorn, each representing a different cultivar (<xref ref-type="table" rid="T1">Table 1</xref>; <xref ref-type="bibr" rid="B7">Bi et al., 2019</xref>; <xref ref-type="bibr" rid="B62">Liang et al., 2019</xref>). Genome analyses from these two studies show that yellowhorn diverged from its close sister species <italic>Dimocarpus longan</italic> at &#x223C;33 MYA to &#x223C;46 MYA (million years ago), and no whole-genome duplication event is detected in yellowhorn (<xref ref-type="bibr" rid="B7">Bi et al., 2019</xref>; <xref ref-type="bibr" rid="B62">Liang et al., 2019</xref>). However, the identification of centromere regions and their sequence characteristics, genome structural variations, and the biosynthesis of VLCFA were not investigated.</p>
<table-wrap position="float" id="T1">
<label>TABLE 1</label>
<caption><p>Statistics of the three yellowhorn assemblies of &#x201C;JGXP,&#x201D; &#x201C;ZS4,&#x201D; and &#x201C;WF18&#x201D;. N50, shortest sequence length at 50% of the genome.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td/>
<td valign="top" align="center">JGXP</td>
<td valign="top" align="center">ZS4</td>
<td valign="top" align="center">WF18</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Whole genome sequencing reads</td>
<td valign="top" align="center">PacBio and Illumina</td>
<td valign="top" align="center">PacBio and Illumina</td>
<td valign="top" align="left">PacBio, 10&#x00D7; Genomics, and Illumina</td>
</tr>
<tr>
<td valign="top" align="left">Scaffolding sequencing techniques</td>
<td valign="top" align="center">Hi-C</td>
<td valign="top" align="center">Hi-C</td>
<td valign="top" align="left">Hi-C and BioNano optical maps</td>
</tr>
<tr>
<td valign="top" align="left">Estimated genome size (Mb)</td>
<td valign="top" align="center">435<xref ref-type="table-fn" rid="t1fna"><sup>a</sup></xref></td>
<td valign="top" align="center">526<xref ref-type="table-fn" rid="t1fnc"><sup>c</sup></xref>/541<xref ref-type="table-fn" rid="t1fnc"><sup>c</sup></xref></td>
<td valign="top" align="left">434<xref ref-type="table-fn" rid="t1fnc"><sup>c</sup></xref>/442<xref ref-type="table-fn" rid="t1fna"><sup>d</sup></xref></td>
</tr>
<tr>
<td valign="top" align="left">Heterozygosity (%)</td>
<td valign="top" align="center">0.51<xref ref-type="table-fn" rid="t1fna"><sup>a</sup></xref>/0.38<xref ref-type="table-fn" rid="t1fnb"><sup>b</sup></xref></td>
<td valign="top" align="center">0.75<xref ref-type="table-fn" rid="t1fna"><sup>d</sup></xref></td>
<td valign="top" align="left">0.81<xref ref-type="table-fn" rid="t1fna"><sup>d</sup></xref></td>
</tr>
<tr>
<td valign="top" align="left">Number of chromosomes</td>
<td valign="top" align="center">15</td>
<td valign="top" align="center">15</td>
<td valign="top" align="left">15</td>
</tr>
<tr>
<td valign="top" align="left">Assembled genome size (Mb)</td>
<td valign="top" align="center">470</td>
<td valign="top" align="center">504</td>
<td valign="top" align="left">440</td>
</tr>
<tr>
<td valign="top" align="left">Anchored size (Mb)</td>
<td valign="top" align="center">446 (94.9%)</td>
<td valign="top" align="center">489 (97.0%)</td>
<td valign="top" align="left">420 (95.4%)</td>
</tr>
<tr>
<td valign="top" align="left">Number of scaffolds</td>
<td valign="top" align="center">988</td>
<td valign="top" align="center">2,297</td>
<td valign="top" align="left">267</td>
</tr>
<tr>
<td valign="top" align="left">N50 of scaffolds (Mb)</td>
<td valign="top" align="center">30.8</td>
<td valign="top" align="center">32.2</td>
<td valign="top" align="left">29.4</td>
</tr>
<tr>
<td valign="top" align="left">Number of contigs</td>
<td valign="top" align="center">3,302</td>
<td valign="top" align="center">2,836</td>
<td valign="top" align="left">2,002</td>
</tr>
<tr>
<td valign="top" align="left">N50 of contigs (Mb)</td>
<td valign="top" align="center">0.42</td>
<td valign="top" align="center">1.04</td>
<td valign="top" align="left">0.64</td>
</tr>
<tr>
<td valign="top" align="left">GC content (%)</td>
<td valign="top" align="center">34.94</td>
<td valign="top" align="center">36.95</td>
<td valign="top" align="left">32.75</td>
</tr>
<tr>
<td valign="top" align="left">Protein-coding genes</td>
<td valign="top" align="center">22,049</td>
<td valign="top" align="center">24,672</td>
<td valign="top" align="left">21,059/22,046<xref ref-type="table-fn" rid="t1fnb"><sup>b</sup></xref></td>
</tr>
<tr>
<td valign="top" align="left">TE proportion (%)</td>
<td valign="top" align="center">65.7</td>
<td valign="top" align="center">65.0</td>
<td valign="top" align="left">61.5</td>
</tr>
<tr>
<td valign="top" align="left">Complete BUSCOs</td>
<td valign="top" align="center">1361 (94.5%)</td>
<td valign="top" align="center">1,364 (94.7%)</td>
<td valign="top" align="left">1,218 (84.6%)</td>
</tr>
<tr>
<td valign="top" align="left">LAI</td>
<td valign="top" align="center">14.53</td>
<td valign="top" align="center">12.89</td>
<td valign="top" align="left">14.00</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn><p><italic>LAI, LTR assembly index.</italic></p></fn>
<fn id="t1fna"><p><italic><sup>a,d</sup>Estimated by K-mer analysis using PacBio long reads and Illumina paired-end reads, respectively.</italic></p></fn>
<fn id="t1fnb"><p><italic><sup>b</sup>Estimated using Illumina paired-end reads and values are retrieved from the study (<xref ref-type="bibr" rid="B62">Liang et al., 2019</xref>).</italic></p></fn>
<fn id="t1fnc"><p><italic><sup>c</sup>Estimated by flow cytometry analysis.</italic></p></fn>
</table-wrap-foot>
</table-wrap>
<p>Here, we present a high-contiguity chromosome-level genome assembly for another cultivar of yellowhorn by combining PacBio long-reads and Hi-C scaffolding strategies. This high-quality genome assembly allowed us to identify the centromeric regions (Note that the term &#x201C;centromeric&#x201D; is used in this study to refer to both the centromeric and pericentromeric regions, as these are difficult to distinguish from one another) for the 15 chromosomes and characterize their sequence composition and mode of evolution. We further conducted comparative genomic analyses among cultivars and transcriptome analyses to identify candidate genes of VLCFA biosynthesis. The genome resources and investigations presented here enrich our understanding about centromere genetics and promote efficient utilization of this precious bio-resource plant.</p>
</sec>
<sec id="S2" sec-type="results">
<title>Results</title>
<sec id="S2.SS1">
<title>Genome Sequencing and Assembly</title>
<p>A nationally certificated variety, &#x201C;Jinguanxiapei&#x201D; (&#x201C;JGXP&#x201D;) (<xref ref-type="fig" rid="F1">Figure 1A</xref>), was selected to generate &#x223C;60 Gb (&#x223C;120&#x00D7;) PacBio long reads, &#x223C;60 Gb (&#x223C;120&#x00D7;) Hi-C reads, and &#x223C;21 Gb (&#x223C;40&#x00D7;) Illumina paired-end reads (<xref ref-type="supplementary-material" rid="DS1">Supplementary Tables 1</xref>, <xref ref-type="supplementary-material" rid="DS1">2</xref>) for <italic>de novo</italic> genome assembly. The genome size and heterozygosity were estimated to be 435 Mb and 0.51%, respectively, based on 17-bp <italic>K</italic>-mers frequency analysis with corrected PacBio long reads (<xref ref-type="table" rid="T1">Table 1</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 1</xref>). The total assembly length of &#x201C;JGXP&#x201D; was 470 Mb with 988 scaffolds and a scaffold N50 of 30.8 Mb (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 3</xref>), of which 446.2 Mb (94.9%) was anchored to 15 chromosomes (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 2A</xref> and <xref ref-type="table" rid="T1">Table 1</xref>). This assembly of the &#x201C;JGXP&#x201D; genome was smaller than that of the previously reported cultivar &#x201C;ZS4&#x201D; genome (504 Mb) but larger than the cultivar &#x201C;WF18&#x201D; genome (440 Mb) (<xref ref-type="table" rid="T1">Table 1</xref>). We determined the homologous chromosomes among the three yellowhorn genomes based on shared synteny blocks (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 3</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Table 4</xref>). All the 15 chromosomes of the three genomes were in perfect 1:1 synteny (<xref ref-type="fig" rid="F2">Figure 2</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 3</xref>). We also generated the complete plastid (Pt) genome (152,643 bp, <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 4</xref>) and mitochondrial (Mt) genome (389,005 bp, <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 5</xref>) from the sequence data.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption><p>The genomic landscape across chromosomes among the three yellowhorn assemblies of &#x201C;JGXP,&#x201D; &#x201C;ZS4,&#x201D; and &#x201C;WF18.&#x201D; a: The tracks represent 15 assembled chromosomes for each genome of &#x201C;JGXP,&#x201D; &#x201C;ZS4,&#x201D; and &#x201C;WF18.&#x201D; The red rectangles in &#x201C;JGXP&#x201D; represent the centromeric regions. b&#x2013;f: The distribution of the gene density, LINE1 density, <italic>Gypsy</italic> density, <italic>Copia</italic> density, and GC content, respectively, with densities calculated in 100 Kb non-overlap windows. g: The homolog chromosomes among three cultivars of yellowhorn. The chromosomes of &#x201C;JGXP&#x201D; are used as references.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-12-766389-g002.tif"/>
</fig>
<p>We evaluated the quality of the &#x201C;JGXP&#x201D; assembly by several criteria. First, a 94.5% complete BUSCOs score suggests high-gene space completeness of the assembly, which was similar to &#x201C;ZS4&#x201D; (94.7%) but higher than &#x201C;WF18&#x201D; (84.6%) (<xref ref-type="table" rid="T1">Table 1</xref>). Second, the LTR Assembly Index (LAI) (<xref ref-type="bibr" rid="B87">Ou et al., 2018</xref>), a standard for evaluating the assembly using long terminal repeat retrotransposons (LTR-RTs), was 14.53 for our assembly, which classifies it into the &#x201C;reference&#x201D; category (<xref ref-type="bibr" rid="B87">Ou et al., 2018</xref>; <xref ref-type="table" rid="T1">Table 1</xref>). Finally, 99.39% of PacBio long reads, 91.43% of the transcriptome, and 97.66% of Illumina paired-end reads were mapped to the yellowhorn genome, respectively (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 5</xref>).</p>
</sec>
<sec id="S2.SS2">
<title>Genome Annotation</title>
<p>A total of 22,049 high-confidence protein-coding genes were annotated, with 1,341 (93.1%) of complete core eukaryotic BUSCO genes covered (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 6</xref>). We identified 588 small ncRNA genes, 65 rRNA genes, and 708 tRNA genes (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 6</xref>). In addition, we identified a total of 16,386 pseudogenes, including 11,197 FRAGs (Fragment Pseudogenes), 4,120 DUPs (duplicated pseudogenes), and 1,069 PSSDs (retrotransposed pseudogenes) (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 6</xref>).</p>
<p>Nearly all (99.1%) of the protein-coding genes were functionally annotated by sequence and domain architecture similarity searches, with only 193 protein-encoding genes remaining completely uncharacterized (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 7</xref>). We identified 2,887 transcription factors (TFs), transcriptional regulators (TRs), and chromatin regulators (CRs) from 96 gene families in our &#x201C;JGXP&#x201D; assembly, including the major gene families of C2H2, CCHC (Zn), WD40-like, MYB, and PHD, respectively, which contained 457, 239, 236, 212, and 129 genes, respectively (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 8</xref>).</p>
<p>We identified 22,070 gene families among the three assemblies of yellowhorn, 50.9% (11,244) gene families were core gene families (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 6</xref>). The genes of &#x201C;JGXP&#x201D; were clustered into 16,519 (74.8%) gene families, with 14,905 (67.6%) core genes, and only 1,046 (4.7%) private genes (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 6</xref>). We found more dispensable genes in &#x201C;JGXP&#x201D; and &#x201C;ZS4&#x201D; than those in &#x201C;WF18&#x201D; (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 6</xref>).</p>
<p>We identified 65.67% of the &#x201C;JGXP&#x201D; assembly as repetitive sequences (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 9</xref>). LTR-RTs were the most abundant transposable elements (TE), representing 29.64% of the &#x201C;JGXP&#x201D; assembly (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 9</xref>). Among the LTR-RTs, <italic>Gypsy</italic> (16.83%) and <italic>Copia</italic> (11.88%) were predominant (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 9</xref>). LINEs represent 4.06% of the genome, and most of them are LINE1, which represent 3.79% of the &#x201C;JGXP&#x201D; assembly (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 9</xref>). DNA transposons and the uncharacterized category &#x201C;unknown&#x201D; constituted 5.62% and 24.27% of the &#x201C;JGXP&#x201D; assembly, respectively (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 9</xref>). TEs were unevenly distributed along the chromosomes of the &#x201C;JGXP&#x201D; assembly, tending to accumulate in the regions of a low density of genes and high GC content for each chromosome (<xref ref-type="fig" rid="F2">Figure 2</xref>). We re-annotated the repeat elements of the assemblies of &#x201C;ZS4&#x201D; and &#x201C;WF18&#x201D; using our annotation strategy. In general, the number and the length of each repeat element family were similar among the three assemblies of yellowhorn (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figures 7A&#x2013;C</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Table 9</xref>). However, five TE families, including LTR/Cassandra, LTR/DIRS, LINE/LINE1-Tx1, LINE/Penelope, and DNA/PiggyBac, were only present in our &#x201C;JGXP&#x201D; assembly, and 2,664 LTR/Ngaro elements were found in the &#x201C;JGXP&#x201D; assembly, while only 139 in the &#x201C;WF18&#x201D; assembly and absent in the &#x201C;ZS4&#x201D; assembly (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 7C</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Table 9</xref>). To exclude the artificial processing, we further mapped PacBio long-reads from two accessions, &#x201C;JGXP&#x201D; and &#x201C;ZS4,&#x201D; to our &#x201C;JGXP&#x201D; assembly using minimap2 and checked whether the annotated TEs were supported under the mapping quality &#x003E; 30. We found that almost all of the six TE families, including the LTR/Ngaro elements mentioned above, were verified by PacBio long-reads from &#x201C;JGXP&#x201D; and &#x201C;ZS4&#x201D; (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 10</xref>). It suggests these TEs are lost in the genome assemblies of &#x201C;ZS4&#x201D; and &#x201C;WF18&#x201D; during the genome assembly, or the TE annotation pipeline failed to recognize them.</p>
</sec>
<sec id="S2.SS3">
<title>Genome Structural Variation</title>
<p>We compared the genomes of cultivars &#x201C;JGXP,&#x201D; &#x201C;ZS4,&#x201D; and &#x201C;WF18&#x201D; and identified structural variations (inversions, translocations, and duplications) and sequence differences (SNPs, indels) using &#x201C;JGXP&#x201D; as the reference. Genome comparison showed that the three genomes were in general syntenic (<xref ref-type="fig" rid="F3">Figures 3A,C</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 3</xref>). The syntenic regions encompassed 241.5 Mb (51.4%, 3,652 regions) for &#x201C;JGXP <italic>vs.</italic> ZS4&#x201D; and 242.2 Mb (51.5%, 3,027 regions) for &#x201C;JGXP <italic>vs.</italic> WF18,&#x201D; and the inversions were main structural arrangements, including 56.9 Mb (12.1%, 378 regions) for &#x201C;JGXP <italic>vs.</italic> ZS4&#x201D; and 51.4 Mb (10.9%, 426 regions) for &#x201C;JGXP <italic>vs</italic>. WF18&#x201D; (<xref ref-type="fig" rid="F3">Figures 3B,C</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Table 11</xref>). However, we detected 119.9 Mb &#x2013; 129.7 Mb (25.5&#x2013;27.6%) JGXP-specific regions relative to the other two cultivars (<xref ref-type="fig" rid="F3">Figure 3B</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Table 11</xref>).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption><p>Comparative analysis among the three yellowhorn assemblies of &#x201C;JGXP,&#x201D; &#x201C;ZS4,&#x201D; and &#x201C;WF18.&#x201D; <bold>(A)</bold> Structural variations between the reference &#x201C;JGXP&#x201D; and the other two cultivars of yellowhorn genomes. The chromosome in the query genome has been reverse complemented if the majority of alignments between homologous chromosomes were inverted. <bold>(B)</bold> Barplot showing the total length of structural variations. <bold>(C)</bold> Barplot showing the sequence differences in the structural variations of syntenic (upper) and rearranged (lower) regions for &#x201C;JGXP vs. ZS4&#x201D; and &#x201C;JGXP vs. WF18.&#x201D; <bold>(D)</bold> Size distributions of different types of structural variations.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-12-766389-g003.tif"/>
</fig>
<p>Structural variations were found distributed unevenly among the chromosomes (<xref ref-type="fig" rid="F3">Figure 3A</xref>). First, large fragments of structural variation were rare on chromosomes &#x201C;Chr05&#x201D; and &#x201C;Chr11,&#x201D; while they were abundant on &#x201C;Chr02,&#x201D; &#x201C;Chr06,&#x201D; and &#x201C;Chr14&#x201D; in &#x201C;JGXP.&#x201D; Second, large fragments of inversions were enriched in chromosome terminal ends. The size of inversion regions was larger than that of other structural variations, and the longest inversion was 8.1 Mb and found on &#x201C;Chr04&#x201D; (left end) of &#x201C;JGXP&#x201D; (<xref ref-type="fig" rid="F3">Figures 3A,D</xref>).</p>
</sec>
<sec id="S2.SS4">
<title>Centromere Identification</title>
<p>The most abundant tandem repeat is the centromeric sequences for most species (<xref ref-type="bibr" rid="B77">Melters et al., 2013</xref>). We do not find the tandem repeats detecting from PacBio long reads enriched preferentially in specific regions along the 15 chromosomes (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 8</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Data File 1</xref>). We used Centurion (<xref ref-type="bibr" rid="B106">Varoquaux et al., 2015</xref>) with Hi-C data as an alternative approach to predict the centromeres in yellowhorn genome. The centromere of each chromosome was predicted to a genomic point of one base pair (<xref ref-type="table" rid="T2">Table 2</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 2B</xref>). Based on the distribution of different TE families along the chromosomes, we noticed that LINE1 retrotransposons were enriched preferentially in narrow regions, and these regions highly matched the centromeres predicted by Centurion. These regions also contained a high density of <italic>Gypsy</italic> retrotransposons, <italic>Copia</italic> retrotransposons, and high GC content while a low density of genes (<xref ref-type="fig" rid="F2">Figures 2</xref>, <xref ref-type="fig" rid="F4">4A,B</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Figures 2B, 9&#x2013;22</xref>). A similar pattern is also found in the other two yellowhorn genome assemblies (&#x201C;ZS4&#x201D; and &#x201C;WF18&#x201D;) (<xref ref-type="fig" rid="F2">Figure 2</xref>).</p>
<table-wrap position="float" id="T2">
<label>TABLE 2</label>
<caption><p>A summary of centromere regions and chromosome types for each chromosome.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left">Chromosome</td>
<td valign="top" align="center">Predicted position (bp)</td>
<td valign="top" align="center">Start (bp)</td>
<td valign="top" align="center">End (bp)</td>
<td valign="top" align="center">Size (Mb)</td>
<td valign="top" align="center">Arm ratio (<italic>r</italic>)</td>
<td valign="top" align="center">Term</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Chr01</td>
<td valign="top" align="center">11,336,700</td>
<td valign="top" align="center">10,700,001</td>
<td valign="top" align="center">12,100,000</td>
<td valign="top" align="center">1.4</td>
<td valign="top" align="center">2.36</td>
<td valign="top" align="center">sm</td>
</tr>
<tr>
<td valign="top" align="left">Chr02</td>
<td valign="top" align="center">18,486,900</td>
<td valign="top" align="center">17,600,001</td>
<td valign="top" align="center">19,200,000</td>
<td valign="top" align="center">1.6</td>
<td valign="top" align="center">1.02</td>
<td valign="top" align="center">m</td>
</tr>
<tr>
<td valign="top" align="left">Chr03</td>
<td valign="top" align="center">14,643,700</td>
<td valign="top" align="center">13,500,001</td>
<td valign="top" align="center">15,200,000</td>
<td valign="top" align="center">1.7</td>
<td valign="top" align="center">1.40</td>
<td valign="top" align="center">m</td>
</tr>
<tr>
<td valign="top" align="left">Chr04</td>
<td valign="top" align="center">13,374,100</td>
<td valign="top" align="center">12,500,001</td>
<td valign="top" align="center">13,900,000</td>
<td valign="top" align="center">1.4</td>
<td valign="top" align="center">1.55</td>
<td valign="top" align="center">m</td>
</tr>
<tr>
<td valign="top" align="left">Chr05</td>
<td valign="top" align="center">17,994,100</td>
<td valign="top" align="center">16,700,001</td>
<td valign="top" align="center">18,600,000</td>
<td valign="top" align="center">1.9</td>
<td valign="top" align="center">1.24</td>
<td valign="top" align="center">m</td>
</tr>
<tr>
<td valign="top" align="left">Chr06</td>
<td valign="top" align="center">20,311,600</td>
<td valign="top" align="center">19,900,001</td>
<td valign="top" align="center">20,900,000</td>
<td valign="top" align="center">1.0</td>
<td valign="top" align="center">1.70</td>
<td valign="top" align="center">m</td>
</tr>
<tr>
<td valign="top" align="left">Chr07</td>
<td valign="top" align="center">17,351,700</td>
<td valign="top" align="center">17,300,001</td>
<td valign="top" align="center">18,000,000</td>
<td valign="top" align="center">0.7</td>
<td valign="top" align="center">1.29</td>
<td valign="top" align="center">m</td>
</tr>
<tr>
<td valign="top" align="left">Chr08</td>
<td valign="top" align="center">13,037,700</td>
<td valign="top" align="center">11,800,001</td>
<td valign="top" align="center">14,000,000</td>
<td valign="top" align="center">2.2</td>
<td valign="top" align="center">1.28</td>
<td valign="top" align="center">m</td>
</tr>
<tr>
<td valign="top" align="left">Chr09</td>
<td valign="top" align="center">12,381,000</td>
<td valign="top" align="center">11,600,001</td>
<td valign="top" align="center">13,300,000</td>
<td valign="top" align="center">1.7</td>
<td valign="top" align="center">1.34</td>
<td valign="top" align="center">m</td>
</tr>
<tr>
<td valign="top" align="left">Chr10</td>
<td valign="top" align="center">16,802,400</td>
<td valign="top" align="center">16,200,001</td>
<td valign="top" align="center">17,700,000</td>
<td valign="top" align="center">1.5</td>
<td valign="top" align="center">1.48</td>
<td valign="top" align="center">m</td>
</tr>
<tr>
<td valign="top" align="left">Chr11</td>
<td valign="top" align="center">11,513,900</td>
<td valign="top" align="center">11,000,001</td>
<td valign="top" align="center">11,900,000</td>
<td valign="top" align="center">0.9</td>
<td valign="top" align="center">1.39</td>
<td valign="top" align="center">m</td>
</tr>
<tr>
<td valign="top" align="left">Chr12</td>
<td valign="top" align="center">20,460,700</td>
<td valign="top" align="center">19,500,001</td>
<td valign="top" align="center">22,000,000</td>
<td valign="top" align="center">2.5</td>
<td valign="top" align="center">3.22</td>
<td valign="top" align="center">st</td>
</tr>
<tr>
<td valign="top" align="left">Chr13</td>
<td valign="top" align="center">20,418,300</td>
<td valign="top" align="center">20,100,001</td>
<td valign="top" align="center">21,000,000</td>
<td valign="top" align="center">0.9</td>
<td valign="top" align="center">3.26</td>
<td valign="top" align="center">st</td>
</tr>
<tr>
<td valign="top" align="left">Chr14</td>
<td valign="top" align="center">11,055,000</td>
<td valign="top" align="center">10,500,001</td>
<td valign="top" align="center">11,600,000</td>
<td valign="top" align="center">1.1</td>
<td valign="top" align="center">1.30</td>
<td valign="top" align="center">m</td>
</tr>
<tr>
<td valign="top" align="left">Chr15</td>
<td valign="top" align="center">5,226,800</td>
<td valign="top" align="center">3,700,001</td>
<td valign="top" align="center">62,000,00</td>
<td valign="top" align="center">2.5</td>
<td valign="top" align="center">2.67</td>
<td valign="top" align="center">sm</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn><p><italic>Arm ratio (r, long arm/short arm): m = metacentric, r from 1 to 1.7; sm = submetacentric, r from 1.7 to 3; st = subtelocentric, r from 3 to 7.</italic></p></fn>
</table-wrap-foot>
</table-wrap>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption><p>The high enrichment of LINE1 and <italic>Gypsy</italic> elements and their independent accumulation in the centromere of Chromosome 1. <bold>(A)</bold> A heat map view of protein-coding genes, TE (<italic>Copia</italic>, <italic>Gypsy</italic>, LINE1, PIF-Harbinger, and hAT), simple repeat, pseudogene, and GC content density in 100-Kb non-overlap sliding windows along chromosome 1 &#x201C;Chr01.&#x201D; The black triangle represents the predicted location of centromere. <bold>(B)</bold> The zoom in on the centromeric region. <bold>(C,D)</bold> The phylogenetic network of LINE1 and <italic>Gypsy</italic> elements, respectively. Each node in the network represents a single element. The links were defined as blast + alignment &#x201C;bitscore&#x201D; values (red, element in centromeric regions; blue, element in non-centromeric regions).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-12-766389-g004.tif"/>
</fig>
<p>Based on the density distribution of LINE1, we manually defined the borders of the putative centromeric region for each chromosome with a resolution of 100 kb (<xref ref-type="table" rid="T2">Table 2</xref>). The sizes of centromeres we identified ranged from 0.7 Mb to 2.5 Mb, summing up to 23 Mb (4.9% of the length of &#x201C;JGXP&#x201D; genome) (<xref ref-type="table" rid="T2">Table 2</xref>). We also classified the karyotype by calculating the arm ratio (<italic>r</italic>, long arm/short arm) for each chromosome as in the study of <xref ref-type="bibr" rid="B57">Levan et al. (1964)</xref>. The 15 chromosomes were classified into 11 m (metacentric, <italic>r</italic> from 1 to 1.7) terms, 2 sm (submetacentric, <italic>r</italic> from 1.7 to 3) terms, and 2st (subtelocentric, <italic>r</italic> from 3 to 7) terms (<xref ref-type="table" rid="T2">Table 2</xref>). The karyotype of &#x201C;JGXP&#x201D; genome is thus 2n = 30 = 22m + 4sm + 4st.</p>
<p>We found a total of 3,312 (15.0%) LINE1, 6,592 (7.6%) <italic>Gypsy</italic>, 3,567 (5.6%) <italic>Copia</italic> retrotransposons, and 287 (1.3%) genes in the centromeric regions (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 12</xref>). Most of the genes in the centromeric regions are expressed (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 23</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Data File 2</xref>). The lengths of LINE1, <italic>Gypsy</italic>, and <italic>Copia</italic> elements in the centromeric regions were significantly longer than those in the non-centromeric regions (<italic>p</italic> &#x003C; 0.0001, Wilcoxon test) (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 24B</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Table 12</xref>). Additionally, we found 61 (34.9%) intact LINE1, 226 (8.1%) intact <italic>Gypsy</italic>, and 87 (2.3%) intact <italic>Copia</italic> retrotransposons in the centromeric regions (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 24C</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Table 12</xref>). For the intact <italic>Gypsy</italic> retrotransposons in the centromeric regions, 117 (51.8%) were CRM (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 24C</xref>).</p>
<p>The median insertion time of intact LINE1 and <italic>Gypsy</italic> elements in the centromeric regions was 0.67 MYA and 0.66 MYA, respectively; both were significantly younger than those in the non-centromeric regions (<italic>p</italic> &#x003C; 0.01, Wilcoxon test) (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 24A</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Table 13</xref>). However, the median insertion time of intact <italic>Copia</italic> elements in the centromeric regions was 1.36 MYA, which was significantly older than that in the non-centromeric regions (<italic>p</italic> &#x003C; 0.05, Wilcoxon test) (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 24A</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Table 13</xref>).</p>
<p>To examine whether the LINE1, <italic>Gypsy</italic>, and <italic>Copia</italic> in the centromeric regions were centromere-specific sequences, we constructed a sequence similarity-based phylogenetic network using these elements from the whole genome. The network showed that most of the LINE1 in the centromeric regions was clustered into one &#x201C;module&#x201D; (<xref ref-type="fig" rid="F4">Figure 4C</xref>), and the <italic>Gypsy</italic> in the centromeric regions was clustered into two &#x201C;modules&#x201D; (<xref ref-type="fig" rid="F4">Figure 4D</xref>), while the <italic>Copia</italic> in the centromeric regions did not distinguish from those in non-centromeric regions (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 25</xref>). These indicate that the centromeres of yellowhorn are dominated by centromere-specific retrotransposons of LINE1 and <italic>Gypsy</italic>.</p>
</sec>
<sec id="S2.SS5">
<title>Candidate Genes of Very-Long-Chain Fatty Acid Biosynthesis</title>
<p>In plants, VLCFA are important biological components of various lipids such as the triacylglycerols (TAGs), some sphingolipids and phospholipids, the cuticular waxes, and nervonic acid (<xref ref-type="bibr" rid="B45">Joub&#x00E8;s et al., 2008</xref>; <xref ref-type="bibr" rid="B92">Ruan et al., 2017</xref>; <xref ref-type="bibr" rid="B114">Xu et al., 2019</xref>). VLCFA biosynthesis pathways involve four successive reactions and the first reaction, which catalyzes the condensation by the 3-ketoacyl-CoA synthase (KCS) or elongation-defective-like (ELO-like) enzyme of a long chain acyl-CoA with a malonyl-CoA, is the synthesis rate-limiting step (<xref ref-type="bibr" rid="B39">Haslam and Kunst, 2013</xref>). Twenty-one KCS genes were identified in <italic>Arabidopsis thaliana</italic> and classified into eight phylogenetic subclasses: &#x03B1;, &#x03B2;, &#x03B3;, &#x03B4;, &#x03B6;, &#x03B5;, &#x03B7;, and &#x03B8; (<xref ref-type="bibr" rid="B19">Costaglioli et al., 2005</xref>; <xref ref-type="bibr" rid="B45">Joub&#x00E8;s et al., 2008</xref>).</p>
<p>Sequence similarity-based functional annotation identified 38 candidate genes in VLCFA biosynthesis in yellowhorn, of which 18 were KCS genes and two ELO-like genes (<xref ref-type="fig" rid="F5">Figures 5A,B</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Data File 3</xref>). Phylogenetic analysis divided the 18 KCS genes into seven subclasses with the absence of the &#x03B2; subclass: 2 &#x03B1; genes, 1 &#x03B3; gene, 1 &#x03B4; gene, 5 &#x03B6; genes, 2 &#x03B5; genes, 3 &#x03B7; genes, and 4 &#x03B8; genes (<xref ref-type="fig" rid="F5">Figure 5B</xref>). Overall, the domain structure is highly conservation among the subclasses of KCS gene, with 10 of the KCS genes displaying no intron structure (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 26</xref>).</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption><p>The very-long-chain fatty acid biosynthesis pathway and the classification and expression profile of the candidate KCS genes. <bold>(A)</bold> The candidate genes-encoding key enzymes in the four reactions of the very long-chain fatty acid synthesis pathway in yellowhorn genome. <bold>(B)</bold> Phylogenetic tree of the candidate KCS genes in <italic>Arabidopsis</italic> and yellowhorn. <bold>(C)</bold> The candidate KCS genes expression profile. The expression values were normalized by ln (TPM + 1).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-12-766389-g005.tif"/>
</fig>
<p>In <italic>Arabidopsis</italic>, the six KCS genes in the &#x03B1; (<italic>KCS4</italic>, <italic>KCS9</italic>, and <italic>KCS17</italic>) and &#x03B2; (<italic>KCS8</italic>, <italic>KCS16</italic>, and <italic>KCS18</italic>) subclasses are closely related to the seed-specific condensing enzyme that play a role in seed oil production, whereas the other 15 genes have been implicated in the synthesis of wax components (<xref ref-type="bibr" rid="B19">Costaglioli et al., 2005</xref>; <xref ref-type="bibr" rid="B45">Joub&#x00E8;s et al., 2008</xref>). The two KCS candidate genes of the &#x03B1; subclass in yellowhorn, <italic>XS02G0044900.1</italic> and <italic>XS07G0040000.1</italic>, were most similar to <italic>KCS4</italic>, indicating that they may be involved in catalyzing a condensing reaction of VLCFA biosynthesis (<xref ref-type="fig" rid="F5">Figure 5B</xref>). These two genes in yellowhorn showed differential patterns of expression in flower bud, inflorescence, flower, fruit, and leaf tissues (<xref ref-type="fig" rid="F5">Figure 5C</xref>).</p>
</sec>
<sec id="S2.SS6">
<title>Long Terminal Repeat Retrotransposons Evolution</title>
<p>Long terminal repeat retrotransposons are the highest portion of TEs, representing &#x223C;30% of yellowhorn genome (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 7A-B</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Table 9</xref>). To investigate the mode and evolution of the expansion of LTR-RTs in yellowhorn, we identified the intact LTR-RTs, solo-LTRs (the LTRs without <italic>Gag-Pol</italic>), and truncated LTR-RTs in the &#x201C;JGXP&#x201D; genome and 16 other plant genomes (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 14</xref>). A total of 6,749 intact LTR-RTs (<italic>I</italic>) were identified in yellowhorn (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 15</xref>), much more than in the other genomes, indicating that intact LTR-RTs (<italic>I</italic>) are maintained at a higher frequency in yellowhorn (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 26A</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Table 15</xref>). To estimate LTR-RT birth and removal rates, we compared the numbers of solo-LTRs (<italic>S</italic>) and truncated LTR-RTs (<italic>T</italic>). The truncated LTR-RTs (33,692) were far more prevalent than solo-LTRs (10,771) in the yellowhorn genome (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 15</xref>). The <italic>I</italic> + <italic>S</italic> + <italic>T</italic> values of yellowhorn were moderate compared with the other 16 plants (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 27C</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Table 15</xref>), which can represent the birth rate of LTR-RTs (<xref ref-type="bibr" rid="B71">Lyu et al., 2018</xref>). Since the fragmental scaffolds of the genome affect the identification of the three classes of LTRs, we corrected the counting by filtering out short scaffolds and then calculated the ratios of filtered <italic>S:I</italic>, <italic>T:I</italic>, and <italic>(S</italic> + <italic>T):I</italic>, which were 1.53, 4.98, and 5.50, respectively. These ratios are relatively low compared with the other 16 plant genomes (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 15</xref>). We further analyzed the trends of <italic>S</italic>:<italic>I</italic> among clusters of LTR-RT sequences by their similarity. Cluster-level <italic>S</italic>:<italic>I</italic> values reflect the removal rate for a specific family. We considered groups with filtered <italic>S</italic>:<italic>I</italic> &#x003E; 3 to have a high death rate as defined by a previous study (<xref ref-type="bibr" rid="B71">Lyu et al., 2018</xref>). We found 18.66% of the sequence families have high removal rates, which are a low proportion compared with the other 16 plant genomes (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 27E</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Table 15</xref>). Thus, the high proportion of LTR-RTs in yellowhorn is maintained by a moderate birth rate and a low removal rate.</p>
</sec>
<sec id="S2.SS7">
<title>Phylogenetic Inference and Gene Family Evolution</title>
<p>We constructed a phylogenetic tree using a concatenated sequence alignment of 201 single-copy orthologous genes among the yellowhorn genome and 16 other plant genomes. In the phylogenetic tree, yellowhorn and <italic>Dimocarpus longan</italic> were clustered into a group of the Sapindaceae family with an estimated divergence time of &#x223C;53 MYA (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 28A</xref>). Our analysis supports the grouping of <italic>Populus trichocarpa</italic> with malvids rather than fabids and the grouping of myrtales as a sister taxon to the eurosids rather than a taxon in malvids, in agreement with recently published whole-genome studies (<xref ref-type="bibr" rid="B82">Myburg et al., 2014</xref>; <xref ref-type="bibr" rid="B116">Yang et al., 2017</xref>).</p>
<p>Expanded gene families (EGF), regardless of duplication type, provide the raw material for adaptation and trait evolution. We compared 569,169 protein-coding genes from the 17 plant species, yielding a total of 33,631 gene families that comprised 449,645 genes. A total of 181,959 genes belonging to 5,873 gene families were shared among all 17 plant genomes. We found that 282 gene families comprising 830 genes were unique for yellowhorn genome. A total of 964 orthologous groups with 3,107 genes were EGF in the yellowhorn lineage since divergence from <italic>D. longan</italic> (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 28A</xref>). EGF genes were significantly enriched (<italic>FDR</italic> &#x003C; 0.001) in a number of gene ontologies (GO) of the flavonoid metabolic process (GO:0009812) and the flavonoid biosynthetic process (GO:0009813), and more specifically in quercetin 3-O-glucosyltransferase activity (GO:0080043), UDP-glucosyltransferase activity (GO:0035251), and flavonoid glucuronidation (GO:0052696) (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 29</xref>).</p>
</sec>
<sec id="S2.SS8">
<title>The Gamma Hexaploidy Event</title>
<p>The collinear blocks within yellowhorn provided evidence for the gamma (&#x03B3;) hexaploidy event that remained visible in chromosomes 2, 7, and 8 (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 28B</xref>). The distribution of <italic>Ks</italic> (synonymous substitution rate) in yellowhorn and <italic>Vitis vinifera</italic> (grape) was similar, both showed the peaks of <italic>Ks</italic> at around 1.4&#x2013;1.6 (117 MYA-132 MYA), which further support that yellowhorn and grape shared the &#x03B3; event (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 28C</xref>). The dotplot of collinear blocks within the yellowhorn genome and the 1:1 collinear pattern between yellowhorn and grape indicated that the genome has not undergone a whole-genome duplication (WGD) event since its divergence from grape (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 28B</xref>, <xref ref-type="supplementary-material" rid="DS1">27C</xref>).</p>
</sec>
</sec>
<sec id="S3" sec-type="discussion">
<title>Discussion</title>
<p>This study presents a high-quality chromosome-scale genome assembly and extensive comparative analyses on genome diversity and centromere evolution for a valuable oil-producing tree species yellowhorn. Our investigation provided insights into centromere structure, sequence composition, and evolutionary dynamics that contribute to our understanding of centromere biology.</p>
<p>By providing an additional reference genome for yellowhorn, we were able to compare genome variations among three cultivars. The three cultivar genomes are largely syntenic (&#x223C;51%), but genome-specific regions were also substantial, making up to 26&#x2013;27% of the genome between cultivar comparisons. Structural rearrangements were detected among the cultivars with inversions, accounting for 11&#x2013;12%. These suggest that there is substantial genomic variation in the species, and that one specific cultivar was insufficient to capture the entire genome property of yellowhorn. Large-scale re-sequencing study could provide a better understanding about the degree of diversity in different categories/families of sequences, and thus guide effective breeding efforts.</p>
<p>Centromeric tandem repeats are the dominant sequences of centromeres in most species, while, in some species, retrotransposons of <italic>Gypsy</italic> elements are also reported (<xref ref-type="bibr" rid="B20">Csink and Henikoff, 1998</xref>; <xref ref-type="bibr" rid="B83">Neumann et al., 2011</xref>). Centromeres are one of the difficult and mysterious parts of many high-quality genomes. They are comprised of highly repetitive elements and can vary dramatically even among closely related species (<xref ref-type="bibr" rid="B118">Yang et al., 2021</xref>). There were no readily apparent conserved characteristics for the candidate centromere tandem repeats from &#x223C;300 animals and plants (<xref ref-type="bibr" rid="B77">Melters et al., 2013</xref>). Interestingly, the typical tandem centromeric repeats were not found in yellowhorn; instead, we discovered centromere-specific retrotransposons of LINE1 and <italic>Gypsy</italic>, which are dominant centromeres. The size of centromeric regions varies among chromosomes, ranging from 0.4 Mb to 1.4 Mb in <italic>A. thaliana</italic> and from 0.065 Mb to 2 Mb in <italic>O. sativa</italic> (<xref ref-type="bibr" rid="B91">Round et al., 1997</xref>; <xref ref-type="bibr" rid="B18">Copenhaver et al., 1999</xref>; <xref ref-type="bibr" rid="B16">Cheng et al., 2002</xref>). In yellowhorn, centromere size on each chromosome ranged from 0.7 Mb to 2.5 Mb; the total size of centromeres was 23 Mb, of which 4.2 Mb was LINE1 elements and 6.2 Mb <italic>Gypsy</italic> elements. To date, centromere-specific LINE (named <italic>Nanica</italic>) elements are found only in <italic>M. acuminata</italic>, but the origin and evolutionary dynamic of such centromeric LINEs are unclear (<xref ref-type="bibr" rid="B22">D&#x2019;Hont et al., 2012</xref>; <xref ref-type="bibr" rid="B17">&#x010C;&#x00ED;&#x017E;kov&#x00E1; et al., 2013</xref>; <xref ref-type="bibr" rid="B4">Belser et al., 2021</xref>). Our analysis suggests that the insertion time of centromeric LINE1 and <italic>Gypsy</italic> elements (0.67 MYA and.66 MYA) was significantly younger than those in non-centromeric regions. This indicates that the centromeres are going through rapid evolution in yellowhorn. The previous study showed that some centromeres adopt new positions over evolutionary time subsequent to a speciation event by comparing the closely related species human and macaque (<xref ref-type="bibr" rid="B76">McKinley and Cheeseman, 2016</xref>). The recent enrichment of LINE1 and <italic>Gypsy</italic> elements and the lack of typical tandem centromeric repeats indicates that we identified a case of recently evolved centromeres in yellowhorn. Our finding of the enrichment of centromere-specific retrotransposons deserves further verification of centromeric localization by analyses such as the ChIP-seq with an antibody against the fast-evolving CENH3 (Centromere Specific Histone 3) protein.</p>
<p>The seed oil of yellowhorn contains 1.5&#x2013;3.% nervonic acid (<xref ref-type="bibr" rid="B92">Ruan et al., 2017</xref>), which has great potential for production nervonic acid. We identified the biosynthetic pathway of VLCFA in yellowhorn and revealed associated gene expression patterns. KCS enzymes catalyze the synthesis of several VLCFA, including nervonic acid (<xref ref-type="bibr" rid="B79">Millar and Kunst, 1997</xref>; <xref ref-type="bibr" rid="B37">Guo et al., 2009</xref>; <xref ref-type="bibr" rid="B35">Gonz&#x00E1;lez-Mellado et al., 2019</xref>). We assayed the expression of the yellowhorn KCS genes by comparing different tissues at various developmental stages using RNA-Seq. Most KCS genes were highly expressed in flowers and inflorescences, two genes were moderately expressed in leaves, while almost all genes were lowly expressed in fruits. This result directs us to a hypothesis that the VLCFA in the seeds of yellowhorn may be synthesized and accumulated during flowering, or it is synthesized in leaves and then transported to seeds for storage. Our results are important for further investigation and manipulation of nervonic acid synthesis in plants.</p>
<p>In conclusion, the characterization of the reference genome sequence of yellowhorn presented here provides a key resource for further development of hypotheses in plant centromere evolution and functioning, and advancement of plant biotechnology in yellowhorn improvement and breeding, such as molecular marker-assisted selection and genome editing.</p>
</sec>
<sec id="S4" sec-type="materials|methods">
<title>Materials and Methods</title>
<sec id="S4.SS1">
<title>Plant Material and Sequencing</title>
<p>The sequenced individual, &#x201C;Jinguanxiapei&#x201D; (abbreviated with &#x201C;JGXP&#x201D;), was collected from a natural yellowhorn stand in Chengde, Hebei Province, China. DNA was extracted from young leaves of this variety in the early spring using a cetyl trimethyl ammonium bromide (CTAB)-based method (<xref ref-type="bibr" rid="B23">Doyle and Doyle, 1987</xref>).</p>
<p>Three approaches were employed in DNA sequencing. First, 2 &#x00D7; 150 pair-end libraries were sequenced on the Illumina HiSeq X Ten platform. Second, SMART libraries were constructed using PacBio<sup>&#x00AE;</sup> SMRTbell&#x2122; Template Prep Kit 4.0 V2, following the PacBio 20-Kb protocol<sup><xref ref-type="fn" rid="footnote1">1</xref></sup> and sequenced on PacBio RS II and PacBio SEQUEL. Third, a Hi-C library was prepared following a published protocol (<xref ref-type="bibr" rid="B109">Wang et al., 2015</xref>) and sequenced on Illumina HiSeq 2500.</p>
</sec>
<sec id="S4.SS2">
<title>Estimating Genome Size, Heterozygosity, and Repeat Content</title>
<p>The 17-bp <italic>K</italic>-mers were counted using Jellyfish v1.1.11 (<xref ref-type="bibr" rid="B74">Marcais and Kingsford, 2011</xref>) with default parameters using corrected PacBio reads. The genome size, the level of heterozygosity, and repeat content were estimated using gce v1.0.0 (<xref ref-type="bibr" rid="B65">Liu et al., 2013</xref>) using PacBio reads. We also estimated the heterozygosity by mapping Illumina paired-end reads using bowtie 2 (<xref ref-type="bibr" rid="B55">Langmead and Salzberg, 2012</xref>) to the assembled genome and calling the heterozygous variant locus using samtools/bcftools pipeline (<xref ref-type="bibr" rid="B60">Li et al., 2009</xref>).</p>
</sec>
<sec id="S4.SS3">
<title><italic>De novo</italic> Genome Assembly</title>
<p>The <italic>de novo</italic> assembly was prepared as follows in a progressive manner. The primary version v0.1 was assembled by SMART <italic>de novo</italic> v1.0.0 (<xref ref-type="bibr" rid="B66">Liu et al., 2021</xref>) after correction with Canu v1.6. The contigs of assembly v0.1 were polished using arrow v2.2.1 with PacBio long reads, which were further used for scaffolding using SSPACE-LongReadv1.1 (<xref ref-type="bibr" rid="B9">Boetzer and Pirovano, 2014</xref>) and SSPACE-standard v3.0 (<xref ref-type="bibr" rid="B8">Boetzer et al., 2011</xref>) and using GapCloser v1.12 (<xref ref-type="bibr" rid="B70">Luo et al., 2012</xref>) with Illumina paired-end reads. After one round of polishing by arrow v2.2.1 and three rounds of polishing by pilon v1.22 (<xref ref-type="bibr" rid="B108">Walker et al., 2014</xref>), we generated assembly v1.2. We mapped the Hi-C reads to the assembly v1.2 using Juicer v1.5.6 (<xref ref-type="bibr" rid="B25">Durand et al., 2016</xref>) to correct the mis-joined scaffolds using the 3D-DNA pipeline (version 170123) (<xref ref-type="bibr" rid="B24">Dudchenko et al., 2017</xref>) with Hi-C reads. Afterward, we then generated assembly v2.2 after three rounds of polishing using arrow v2.2.1 and three rounds of polishing using pilon v1.22.</p>
<p>We failed to assemble the complete genome of chloroplast (Pt) and mitochondrial (Mt) in the v2.2 assembly. The PacBio long reads of Pt and Mt were enriched by sequence similarity search against 11 Pt genomes of Sapindaceae and 24 Mt genomes of Malvidae available in the NCBI database<sup><xref ref-type="fn" rid="footnote2">2</xref></sup>, and then the two genomes were <italic>de novo</italic> assembly using Canu v1.6.</p>
<p>After merging the assembly v2.2, Pt genome, and Mt genome, we removed redundancy sequence with Redundans v0.13c (<xref ref-type="bibr" rid="B89">Pryszcz and Gabald&#x00F3;n, 2016</xref>), and then generated the final assembly of the &#x201C;JGXP&#x201D; genome.</p>
</sec>
<sec id="S4.SS4">
<title>Assessment of Genome Completeness</title>
<p>Genome completeness was assessed using the plant data set of BUSCO (Benchmarking Universal Single-Copy Orthologs) (<xref ref-type="bibr" rid="B95">Simao et al., 2015</xref>), LTR Assembly Index (LAI) (<xref ref-type="bibr" rid="B87">Ou et al., 2018</xref>), and the mapping rate, including PacBio long reads, Illumina paired-end reads, and the transcriptome assembled in the current study.</p>
</sec>
<sec id="S4.SS5">
<title>Transcriptome Assembly</title>
<p>To construct a comprehensive yellowhorn transcriptome, three methods, including <italic>de novo</italic> and reference genome-guided assembly using Trinity v2.0.6 (<xref ref-type="bibr" rid="B36">Grabherr et al., 2011</xref>), reference genome-guided using StringTie v1.3.5 (<xref ref-type="bibr" rid="B88">Pertea et al., 2015</xref>) and HiSat2 v2.1.0 (<xref ref-type="bibr" rid="B50">Kim et al., 2015</xref>) and were performed using 75 Illumina paired-end samples in the current study (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 16</xref>). These three sets of transcriptomes were merged and further refined using CD-HIT v4.6 (<xref ref-type="bibr" rid="B33">Fu et al., 2012</xref>) with 95% identity and 95% coverage.</p>
</sec>
<sec id="S4.SS6">
<title>Gene Prediction and Functional Annotation</title>
<p>Three approaches, including transcript-based prediction, protein homology-based prediction, and <italic>ab initio</italic> prediction, were employed to predict the protein-coding genes using repeat-masked version genome. Protein sequences of <italic>Arabidopsis thaliana</italic> (<xref ref-type="bibr" rid="B103">Swarbreck et al., 2007</xref>), <italic>Olea europaea</italic> (<xref ref-type="bibr" rid="B30">Fernando et al., 2016</xref>), <italic>Dimocarpus longan</italic> (<xref ref-type="bibr" rid="B64">Lin et al., 2017</xref>), and <italic>Citrus grandis</italic> (<xref ref-type="bibr" rid="B112">Wang X. et al., 2017</xref>), were merged and further refined using CD-HIT v4.6 (<xref ref-type="bibr" rid="B33">Fu et al., 2012</xref>) with 95% identity and 95% coverage. The transcriptome and protein sequences were aligned with the repeat-masked genome using BLAST, respectively, and further optimized the alignment using Exonerate v2.4.0 (<xref ref-type="bibr" rid="B97">Slater and Birney, 2005</xref>). Single-copy genes identified by BUSCO (<xref ref-type="bibr" rid="B95">Simao et al., 2015</xref>) were trained and further used for <italic>ab initio</italic> gene prediction using AUGUSTUS v3.2.3 (<xref ref-type="bibr" rid="B100">Stanke et al., 2008</xref>; <xref ref-type="bibr" rid="B49">Keller et al., 2011</xref>). The transcripts, proteins, and <italic>ab initio</italic> predictions were combined as evidence hints for the input of the MAKER v2.31.9 (<xref ref-type="bibr" rid="B12">Cantarel et al., 2008</xref>) annotation pipeline for final gene model prediction. The completeness of gene annotation was assessed using BUSCO.</p>
<p>The predicted protein-coding genes were functionally annotated using two approaches: (1) the sequence similarity searching method by five functional databases: the NR (NCBI&#x2019;s non-redundant protein) database, the Swiss-Prot protein database, the TrEMBL database, the Pfam database, and the eggNOG database (<xref ref-type="bibr" rid="B43">Jensen et al., 2007</xref>), and (2) the domain architecture similarity searching method by InterProScan v5.27-66.0 (<xref ref-type="bibr" rid="B44">Jones et al., 2014</xref>). In addition, transcription factors, transcriptional regulators, and chromatin regulators were annotated using PlanTFcat (<xref ref-type="bibr" rid="B21">Dai et al., 2013</xref>).</p>
<p>Pseudogenes were identified using Pseudopipe (<xref ref-type="bibr" rid="B122">Zhang et al., 2006</xref>) with default parameters. The tRNA genes and rRNA genes were predicted using tRNAScan-SE v1.3.1 (<xref ref-type="bibr" rid="B68">Lowe and Eddy, 1997</xref>) and RNAMMER v1.2 (<xref ref-type="bibr" rid="B53">Lagesen et al., 2007</xref>), respectively. The small non-coding RNA genes were subjected to similarity searches against the Rfam (11) database using rfam_scan.pl (<xref ref-type="bibr" rid="B11">Burge et al., 2012</xref>). We used GeSeq (<xref ref-type="bibr" rid="B104">Tillich et al., 2017</xref>) to predict the protein-coding genes, rRNA genes, and tRNA genes of Pt genome and Mt genome, respectively.</p>
</sec>
<sec id="S4.SS7">
<title>Expression Quantification</title>
<p>Before mapping the reads to the genome, all reads were filtered for adapter contamination, ambiguous residues (N&#x2019;s), low-quality regions lower than 30, and reads shorter than 60 bp using cutadapt (<xref ref-type="bibr" rid="B75">Martin, 2011</xref>). The clean reads were mapped to the genome using HiSat2 v2.1.0 (<xref ref-type="bibr" rid="B50">Kim et al., 2015</xref>) with the parameter &#x201C;-k 1.&#x201D; We calculated the TPM values of genes using StringTie v1.3.5 (<xref ref-type="bibr" rid="B88">Pertea et al., 2015</xref>).</p>
</sec>
<sec id="S4.SS8">
<title>Genome Comparison</title>
<p>We performed the pairwise alignment among the yellowhorn genome in the current study and the two previously published assembled genomes &#x201C;ZS4&#x201D; (Bioproject accession: <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="PRJNA483857">PRJNA483857</ext-link>) (<xref ref-type="bibr" rid="B7">Bi et al., 2019</xref>) and &#x201C;WF18&#x201D; (Bioproject accession: <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="PRJNA496350">PRJNA496350</ext-link>) (<xref ref-type="bibr" rid="B62">Liang et al., 2019</xref>) using minimap2 (<xref ref-type="bibr" rid="B59">Li, 2018</xref>). The syntenic regions, structural rearrangements (inversions, translocations, and duplications), and the sequence differences (SNPs, indels, and so on) of the pairwise comparison for the three genomes were identified using SyRI v1.3 (<xref ref-type="bibr" rid="B34">Goel et al., 2019</xref>). The pairwise homolog chromosomes among the three genomes were determined by the shared synteny blocks based on the dotplots of the pairwise alignments.</p>
</sec>
<sec id="S4.SS9">
<title>Gene Family Clustering Among Three Cultivars of Yellowhorn</title>
<p>The core and the dispensable gene sets were summarized based on gene family clustering with protein sequences of the three cultivars using OrthoFinder v2.5.2 (<xref ref-type="bibr" rid="B28">Emms and Kelly, 2019</xref>) with default parameters. The BLASTP with <italic>E</italic>-value of 1<italic>E</italic>-10 implemented in diamond v0.9.9.110 (<xref ref-type="bibr" rid="B10">Buchfink et al., 2021</xref>) was performed for homologous searching. The gene families present in all three and two cultivars were defined as core gene families and dispensable gene families, respectively. Those that only existed in one accession were defined as private gene families.</p>
</sec>
<sec id="S4.SS10">
<title>Centromere Identification</title>
<p>As tandem repeats are typical components of centromeric chromosome regions, we first followed Melters&#x2019;s approach (<xref ref-type="bibr" rid="B77">Melters et al., 2013</xref>) to identify the centromeric regions using PacBio long reads. After masking the low complexity of the long reads using DUST implemented in MEME suite v4.11.3 (<xref ref-type="bibr" rid="B2">Bailey et al., 2009</xref>), tandem repeats were detected using TRF v4.09 (<xref ref-type="bibr" rid="B6">Benson, 1999</xref>). Tandem repeats &#x003E; 90% identity were clustered, and the repeats in the top clusters are presumed to be the candidate centromeric repeat. However, we do not find centromeric tandem repeats in the yellowhorn genome (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 8</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Data File 1</xref>).</p>
<p>Centromeres are tethered to the spindle pole body, leading to centromere clustering (<xref ref-type="bibr" rid="B29">Feng et al., 2014</xref>; <xref ref-type="bibr" rid="B81">Mizuguchi et al., 2014</xref>). The spatial proximity reflected by the Hi-C interaction intensity decreased along with the increasing of physical distance between two loci (<xref ref-type="bibr" rid="B63">Lieberman-Aiden et al., 2009</xref>). Thus, we also performed Centurion (<xref ref-type="bibr" rid="B106">Varoquaux et al., 2015</xref>) to identify the location of centromeres using a genome-wide Hi-C contact map. Centurion was performed to call centromere locations in the yellowhorn genome using the Hi-C sequencing data generated in the current study. The centromere location for each chromosome predicted by Centurion was presented as a genomic point of one base pair.</p>
<p>We noted that LINE1 retrotransposons were accumulated preferentially in narrow regions (<xref ref-type="fig" rid="F2">Figure 2</xref>), and these regions highly match the centromeres predicted by Centurion (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 2B</xref>). The density of <italic>Gypsy</italic> retrotransposons and GC content was high, while the density of genes was low in these regions (<xref ref-type="fig" rid="F2">Figures 2</xref>, <xref ref-type="fig" rid="F4">4A,B</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Figures 9&#x2013;22</xref>). Based on these, we manually defined the start and the end of the centromeric regions with a resolution of 100 kb according. We calculated the arm ratio for each chromosome, long arm/short arm, to classify the karyotype according to previous study (<xref ref-type="bibr" rid="B57">Levan et al., 1964</xref>).</p>
</sec>
<sec id="S4.SS11">
<title>Phylogenetic Network of Transposable Elements</title>
<p>To generate weighted links, the sequences of LINE1, <italic>Copia</italic>, and <italic>Gypsy</italic> elements were pairwise aligned using BLASTN v2.2.31 (&#x201C;-strand plus -dust no -max_target_seqs 4000&#x201D;). The link weights were defined as alignment &#x201C;bitscores.&#x201D; We did not set a threshold to remove links to avoid disconnecting whole modules of ancient sequences from the network (<xref ref-type="bibr" rid="B58">Levy et al., 2017</xref>). For efficiency and improved perception, we disconnected the weakest links of each node for <italic>Gypsy</italic> and <italic>Copia</italic> network, and retained the top strongest 3% and 10% of strongest links, respectively. We displayed all of the links for LINE1 network. The network was visualized with Cytoscape (<xref ref-type="bibr" rid="B94">Shannon et al., 2003</xref>).</p>
</sec>
<sec id="S4.SS12">
<title>Insertion Dating of Long Interspersed Nuclear Elements 1 and Long Terminal Repeat Retrotransposons</title>
<p>The LINE1 retrotransposons with its best BLAST hit (<xref ref-type="bibr" rid="B115">Yang and Bennetzen, 2009</xref>) and 5&#x2019;-LTRs and 3&#x2019;-LTRs of the same LTR-RTs were aligned using MAFFT v7.221 (<xref ref-type="bibr" rid="B48">Katoh and Standley, 2013</xref>), and the corresponding divergence <italic>K</italic> was estimated using the Kimura Two-Parameter model (<xref ref-type="bibr" rid="B51">Kimura, 1980</xref>). The insertion time was calculated by the formula: <italic>T</italic> = <italic>K</italic>/(2 &#x00D7; <italic>r</italic>), where <italic>r</italic> refers to a substitution rate of 1.3 &#x00D7; 10<sup>&#x2013;8</sup> per site per year (<xref ref-type="bibr" rid="B72">Ma et al., 2004</xref>).</p>
</sec>
<sec id="S4.SS13">
<title>Intact Long Interspersed Nuclear Elements 1 Elements</title>
<p>We performed getorf (&#x201C;-find 1 -minsize 800&#x201D;) implemented in EMBOSS v6.5.7.0 (<xref ref-type="bibr" rid="B90">Rice et al., 2000</xref>) to identify the ORFs (open reading frames) of the LINE1 elements extended 1-Kb flanking regions. The identified ORFs were annotated using hmmscan v3.2 (<xref ref-type="bibr" rid="B80">Mistry et al., 2013</xref>) with Pfam31 (<xref ref-type="bibr" rid="B32">Finn et al., 2009</xref>). The intact LINE1 elements were screened as the descriptions of previous study (<xref ref-type="bibr" rid="B42">Ivancevic et al., 2016</xref>).</p>
</sec>
<sec id="S4.SS14">
<title>Candidate Genes of the Very Long-Chain Fatty Acids Biosynthesis Pathway</title>
<p>Protein-coding genes were annotated with enzyme function classes using E2P2 (Ensemble Enzyme Prediction Pipeline) v3.1 (<xref ref-type="bibr" rid="B14">Chae et al., 2014</xref>) and then assigned to PLANTCYC v13.0<sup><xref ref-type="fn" rid="footnote3">3</xref></sup> using Pathway Tools v22.5 (<xref ref-type="bibr" rid="B47">Karp et al., 2015</xref>) for the prediction of genes involving in the VLCFA biosynthesis pathway. The KCS genes were annotated using CDD (conserved domain database) (<xref ref-type="bibr" rid="B69">Lu et al., 2020</xref>) and SMART (simple modular architecture research tool) (<xref ref-type="bibr" rid="B56">Letunic et al., 2020</xref>). To construct the maximum likelihood tree of KSC genes, including in yellowhorn and <italic>A. thaliana</italic>, IQ-TREEv1.6.7 (<xref ref-type="bibr" rid="B85">Nguyen et al., 2015</xref>) was performed with the optimal amino acid substitution model of LG + I + G4 with 1,000 ultrafast bootstrapping. The visualization was displayed using TB tools v1.068 (<xref ref-type="bibr" rid="B15">Chen et al., 2020</xref>).</p>
</sec>
<sec id="S4.SS15">
<title>Repetitive Element Identification and Long Terminal Repeat Retrotransposons Evolution</title>
<p>The <italic>de novo</italic> repeat identification approach was employed to annotate the repeat elements. First, RepeatModeler v1.0.10 (<xref ref-type="bibr" rid="B98">Smit and Hubley, 2008</xref>) was performed to train a repeat database by BLAST approach, and then RepeatMasker v4.07 (<xref ref-type="bibr" rid="B99">Smit et al., 2013</xref>) was used to annotate the repeat elements based on the database above.</p>
<p>To accurately identify the LTR-RTs, LTRharvest v1.5.10 (<xref ref-type="bibr" rid="B27">Ellinghaus et al., 2008</xref>) and LTRdigest v1.5.10 (<xref ref-type="bibr" rid="B101">Steinbiss et al., 2009</xref>) were used to <italic>de novo</italic> identify the candidate intact LTR-RTs with a pair of flanking LTRs ranged from 100 bp to 3,000 bp with similarity &#x003E; 80%. The domain-based annotation method implemented in Profrep<sup><xref ref-type="fn" rid="footnote4">4</xref></sup> was performed to annotate the internal sequences of candidate LTR-RTs using the REXdb v3.0 database (<xref ref-type="bibr" rid="B84">Neumann et al., 2019</xref>). An LTR-RT with complete <italic>Gag-Pol</italic> protein sequence was retained as an intact LTR-RT (<italic>I</italic>). If one side of the flanking sequences covered at least 50% of any <italic>Gag-Pol</italic> sequences with <italic>E</italic>-value &#x003C; 1<italic>E</italic>-8 and identity &#x003E; 30%, the corresponding LTR homologies were classified as truncated LTR-RTs (<italic>T</italic>). The LTRs without <italic>Gag-Pol</italic> were considered as solo-LTRs (<italic>S</italic>). SiLiX v1.2.9 (<xref ref-type="bibr" rid="B78">Miele et al., 2011</xref>) was performed to cluster the LTRs with the coverage of 70% and the identity of 60%.</p>
</sec>
<sec id="S4.SS16">
<title>Phylogenetic and Gene Family Analysis</title>
<p>OrthoMCL v2.0.9 (<xref ref-type="bibr" rid="B61">Li et al., 2003</xref>) was used to identify gene family with the protein-coding genes of yellowhorn and the other 16 plants species (<xref ref-type="supplementary-material" rid="DS1">Supplementary Table 14</xref>). A total of 201 single-copy gene families were identified and used for phylogenetic tree reconstruction. Each single-copy gene family was aligned using MUSCLE v3.8.425 (<xref ref-type="bibr" rid="B26">Edgar, 2004</xref>) with default parameters. The alignments of each gene family were concatenated into a single alignment. This alignment was trimmed using trimAl v1.4.rev15 (<xref ref-type="bibr" rid="B13">Capella-Gutierrez et al., 2009</xref>). The trimmed alignment was used for the maximum likelihood phylogenetic tree reconstruction using IQ-TREE v1.6.7 (<xref ref-type="bibr" rid="B85">Nguyen et al., 2015</xref>), with the best-fit model JTT + F + R5 selected by ModelFinder (<xref ref-type="bibr" rid="B46">Kalyaanamoorthy et al., 2017</xref>) and with the 1,000 replications of ultrafast bootstrap and Shimodaira-Hasegawa-like approximate likelihood-ratio (SH-aLRT) test.</p>
<p>The MCMCTree in PAML v4.9h (<xref ref-type="bibr" rid="B117">Yang, 2007</xref>) was run to estimate the divergence time. The divergence time between <italic>O. sativa</italic> and Pentapetalae (other 16 species), representing the monocot-dicot divergence, was fixed at 130 to 135 MYA in the present study (<xref ref-type="bibr" rid="B73">Magall&#x00F3;n et al., 2015</xref>). The divergence of Rosids from other Pentapetalae species was at least 99.6 MYA (<xref ref-type="bibr" rid="B3">Basinger and Dilcher, 1984</xref>; <xref ref-type="bibr" rid="B73">Magall&#x00F3;n et al., 2015</xref>), and the divergence of <italic>C. grandis</italic> from other Sapindales species was at least 65.5 MYA (<xref ref-type="bibr" rid="B73">Magall&#x00F3;n et al., 2015</xref>).</p>
<p>Expansion and contraction of the families were determined using CAFE v4.2 (<xref ref-type="bibr" rid="B38">Han et al., 2013</xref>) with default parameters. Enrichment of gene ontology (GO) terms was summarized using clusterProfiler v3.8.1 (<xref ref-type="bibr" rid="B120">Yu et al., 2012</xref>). We controlled the false discovery rate (<italic>FDR</italic>) of the <italic>P</italic> values using Benjamini-Hochberg procedure (<xref ref-type="bibr" rid="B5">Benjamini and Hochberg, 1995</xref>).</p>
</sec>
<sec id="S4.SS17">
<title>Analysis of Genome Duplication Event</title>
<p>Syntenic blocks containing at least five genes were identified using MCscanX (<xref ref-type="bibr" rid="B113">Wang et al., 2012</xref>) with default parameters. KaKsCalculator v2.0 (<xref ref-type="bibr" rid="B110">Wang et al., 2010</xref>) was used to calculate <italic>Ks</italic> with the YN model. Only the gene pairs with <italic>Ks</italic> &#x2264; 3 were remained for the downstream analysis.</p>
</sec>
<sec id="S4.SS18">
<title>Visualization</title>
<p>Visualization of the predicted distribution of yellowhorn based on sampled records and current climate data (<xref ref-type="bibr" rid="B111">Wang Q. et al., 2017</xref>) was conducted in ArcGIS v9.2. The screens of zoom in on the centromeric regions were generated using JBrowse implemented in PlantGenIE (<xref ref-type="bibr" rid="B96">Skinner et al., 2009</xref>; <xref ref-type="bibr" rid="B102">Sundell et al., 2015</xref>).</p>
</sec>
</sec>
<sec id="S5" sec-type="data-availability">
<title>Data Availability Statement</title>
<p>The raw sequence data have been deposited in the Short Read Archive under NCBI BioProject ID <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="PRJNA694500">PRJNA694500</ext-link>. The Whole Genome Shotgun project has been deposited at DDBJ/ENA/GenBank under the accession <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="JAFEMO000000000">JAFEMO000000000</ext-link>. The version described in this paper is version JAFEMO010000000. Genome assembly, repeat and gene annotation, transcriptome, and gene expression profiles could be downloaded and explored online under URL: <ext-link ext-link-type="uri" xlink:href="https://yellowhorn.plantgenie.org/">https://yellowhorn.plantgenie.org/</ext-link>.</p>
</sec>
<sec id="S6">
<title>Author Contributions</title>
<p>J-FM, HL, and WG conceived and designed the study. HL, X-MY, Xin-ruiW, D-XZ, QZ, T-LS, K-HJ, X-CT, S-SZ, R-GZ, Q-ZY, QW, QX, CM, and EV prepared the materials and conducted the experiments. HL, X-MY, and J-FM wrote the manuscript. Xiao-ruW, J-FM, WZ, NS, IP, and YE-K were involved in structuring and polishing the manuscript. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec id="conf1" sec-type="COI-statement">
<title>Conflict of Interest</title>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="pudiscl1" sec-type="disclaimer">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
</body>
<back>
<sec id="S7" sec-type="funding-information">
<title>Funding</title>
<p>This study was funded by the National Promotion Project of Scientific and Technological Achievements of the National Forestry and Grassland Administration (No. 2020133101), the Project of Construction of World Class Universities in Beijing Forestry University (No. 2019XKJS0308), the National Natural Science Foundation of China (No. U1903103), and Fundamental Research Funds for the Central Universities (No. 2018BLCB08).</p>
</sec>
<ack>
<p>We are most grateful to Nicolas Delhomme, Bastian Schiffthaler, and Alonso Serrano for their kind help in this study.</p>
</ack>
<sec id="S9" sec-type="supplementary-material">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpls.2021.766389/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpls.2021.766389/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.zip" id="DS1" mimetype="application/zip" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Amminger</surname> <given-names>G. P.</given-names></name> <name><surname>Sch&#x00E4;fer</surname> <given-names>M. R.</given-names></name> <name><surname>Klier</surname> <given-names>C. M.</given-names></name> <name><surname>Slavik</surname> <given-names>J. M.</given-names></name> <name><surname>Holzer</surname> <given-names>I.</given-names></name> <name><surname>Holub</surname> <given-names>M.</given-names></name><etal/></person-group> (<year>2012</year>). <article-title>Decreased nervonic acid levels in erythrocyte membranes predict psychosis in help-seeking ultra-high-risk individuals.</article-title> <source><italic>Mol. Psychiatry</italic></source> <volume>17</volume> <fpage>1150</fpage>&#x2013;<lpage>1152</lpage>. <pub-id pub-id-type="doi">10.1038/mp.2011.167</pub-id> <pub-id pub-id-type="pmid">22182937</pub-id></citation></ref>
<ref id="B2"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bailey</surname> <given-names>T. L.</given-names></name> <name><surname>Boden</surname> <given-names>M.</given-names></name> <name><surname>Buske</surname> <given-names>F. A.</given-names></name> <name><surname>Frith</surname> <given-names>M.</given-names></name> <name><surname>Grant</surname> <given-names>C. E.</given-names></name> <name><surname>Clementi</surname> <given-names>L.</given-names></name><etal/></person-group> (<year>2009</year>). <article-title>MEME Suite: Tools for motif discovery and searching.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>37</volume> <fpage>W202</fpage>&#x2013;<lpage>W208</lpage>.</citation></ref>
<ref id="B3"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Basinger</surname> <given-names>J. F.</given-names></name> <name><surname>Dilcher</surname> <given-names>D. L.</given-names></name></person-group> (<year>1984</year>). <article-title>Ancient bisexual flowers.</article-title> <source><italic>Science</italic></source> <volume>224</volume> <fpage>511</fpage>&#x2013;<lpage>513</lpage>. <pub-id pub-id-type="doi">10.1126/science.224.4648.511</pub-id> <pub-id pub-id-type="pmid">17753776</pub-id></citation></ref>
<ref id="B4"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Belser</surname> <given-names>C.</given-names></name> <name><surname>Baurens</surname> <given-names>F.-C.</given-names></name> <name><surname>Noel</surname> <given-names>B.</given-names></name> <name><surname>Martin</surname> <given-names>G.</given-names></name> <name><surname>Cruaud</surname> <given-names>C.</given-names></name> <name><surname>Istace</surname> <given-names>B.</given-names></name><etal/></person-group> (<year>2021</year>). <article-title>Telomere-to-telomere gapless chromosomes of banana using nanopore sequencing.</article-title> <source><italic>Commun. Biol</italic></source> <volume>4</volume>:<issue>1047</issue>. <pub-id pub-id-type="doi">10.1038/s42003-021-02559-3</pub-id> <pub-id pub-id-type="pmid">34493830</pub-id></citation></ref>
<ref id="B5"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Benjamini</surname> <given-names>Y.</given-names></name> <name><surname>Hochberg</surname> <given-names>Y.</given-names></name></person-group> (<year>1995</year>). <article-title>Controlling the false discovery rate: A practical and powerful approach to multiple testing.</article-title> <source><italic>J. R. Stat. Soc. Ser. B</italic></source> <volume>57</volume> <fpage>289</fpage>&#x2013;<lpage>300</lpage>.</citation></ref>
<ref id="B6"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Benson</surname> <given-names>G.</given-names></name></person-group> (<year>1999</year>). <article-title>Tandem repeats finder: A program to analyze DNA sequences.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>27</volume> <fpage>573</fpage>&#x2013;<lpage>580</lpage>. <pub-id pub-id-type="doi">10.1093/nar/27.2.573</pub-id> <pub-id pub-id-type="pmid">9862982</pub-id></citation></ref>
<ref id="B7"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bi</surname> <given-names>Q.</given-names></name> <name><surname>Zhao</surname> <given-names>Y.</given-names></name> <name><surname>Du</surname> <given-names>W.</given-names></name> <name><surname>Lu</surname> <given-names>Y.</given-names></name> <name><surname>Gui</surname> <given-names>L.</given-names></name> <name><surname>Zheng</surname> <given-names>Z.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>Pseudomolecule-level assembly of the Chinese oil tree yellowhorn (<italic>Xanthoceras sorbifolium</italic>) genome.</article-title> <source><italic>GigaScience</italic></source> <volume>8</volume>:<issue>giz070</issue>. <pub-id pub-id-type="doi">10.1093/gigascience/giz070</pub-id> <pub-id pub-id-type="pmid">31241154</pub-id></citation></ref>
<ref id="B8"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Boetzer</surname> <given-names>M.</given-names></name> <name><surname>Henkel</surname> <given-names>C. V.</given-names></name> <name><surname>Jansen</surname> <given-names>H. J.</given-names></name> <name><surname>Butler</surname> <given-names>D.</given-names></name> <name><surname>Pirovano</surname> <given-names>W.</given-names></name></person-group> (<year>2011</year>). <article-title>Scaffolding pre-assembled contigs using SSPACE.</article-title> <source><italic>Bioinformatics</italic></source> <volume>27</volume> <fpage>578</fpage>&#x2013;<lpage>579</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btq683</pub-id> <pub-id pub-id-type="pmid">21149342</pub-id></citation></ref>
<ref id="B9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Boetzer</surname> <given-names>M.</given-names></name> <name><surname>Pirovano</surname> <given-names>W.</given-names></name></person-group> (<year>2014</year>). <article-title>SSPACE-LongRead: Scaffolding bacterial draft genomes using long read sequence information.</article-title> <source><italic>BMC Bioinformatics</italic></source> <volume>15</volume>:<issue>211</issue>. <pub-id pub-id-type="doi">10.1186/1471-2105-15-211</pub-id> <pub-id pub-id-type="pmid">24950923</pub-id></citation></ref>
<ref id="B10"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Buchfink</surname> <given-names>B.</given-names></name> <name><surname>Reuter</surname> <given-names>K.</given-names></name> <name><surname>Drost</surname> <given-names>H.-G.</given-names></name></person-group> (<year>2021</year>). <article-title>Sensitive protein alignments at tree-of-life scale using DIAMOND.</article-title> <source><italic>Nat. Methods</italic></source> <volume>18</volume> <fpage>366</fpage>&#x2013;<lpage>368</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-021-01101-x</pub-id> <pub-id pub-id-type="pmid">33828273</pub-id></citation></ref>
<ref id="B11"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Burge</surname> <given-names>S. W.</given-names></name> <name><surname>Daub</surname> <given-names>J.</given-names></name> <name><surname>Eberhardt</surname> <given-names>R.</given-names></name> <name><surname>Tate</surname> <given-names>J.</given-names></name> <name><surname>Barquist</surname> <given-names>L.</given-names></name> <name><surname>Nawrocki</surname> <given-names>E. P.</given-names></name><etal/></person-group> (<year>2012</year>). <article-title>Rfam 11.0: 10 years of RNA families.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>41</volume> <fpage>D226</fpage>&#x2013;<lpage>D232</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gks1005</pub-id> <pub-id pub-id-type="pmid">23125362</pub-id></citation></ref>
<ref id="B12"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cantarel</surname> <given-names>B. L.</given-names></name> <name><surname>Korf</surname> <given-names>I.</given-names></name> <name><surname>Robb</surname> <given-names>S. M. C.</given-names></name> <name><surname>Parra</surname> <given-names>G.</given-names></name> <name><surname>Ross</surname> <given-names>E.</given-names></name> <name><surname>Moore</surname> <given-names>B.</given-names></name><etal/></person-group> (<year>2008</year>). <article-title>MAKER: An easy-to-use annotation pipeline designed for emerging model organism genomes.</article-title> <source><italic>Genome Res.</italic></source> <volume>18</volume> <fpage>188</fpage>&#x2013;<lpage>196</lpage>. <pub-id pub-id-type="doi">10.1101/gr.6743907</pub-id> <pub-id pub-id-type="pmid">18025269</pub-id></citation></ref>
<ref id="B13"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Capella-Gutierrez</surname> <given-names>S.</given-names></name> <name><surname>Silla-Martinez</surname> <given-names>J. M.</given-names></name> <name><surname>Gabaldon</surname> <given-names>T.</given-names></name></person-group> (<year>2009</year>). <article-title>TrimAl: A tool for automated alignment trimming in large-scale phylogenetic analyses.</article-title> <source><italic>Bioinformatics</italic></source> <volume>25</volume> <fpage>1972</fpage>&#x2013;<lpage>1973</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp348</pub-id> <pub-id pub-id-type="pmid">19505945</pub-id></citation></ref>
<ref id="B14"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chae</surname> <given-names>L.</given-names></name> <name><surname>Kim</surname> <given-names>T.</given-names></name> <name><surname>Nilo-Poyanco</surname> <given-names>R.</given-names></name> <name><surname>Rhee</surname> <given-names>S. Y.</given-names></name></person-group> (<year>2014</year>). <article-title>Genomic signatures of specialized metabolism in plants.</article-title> <source><italic>Science</italic></source> <volume>344</volume> <fpage>510</fpage>&#x2013;<lpage>513</lpage>. <pub-id pub-id-type="doi">10.1126/science.1252076</pub-id> <pub-id pub-id-type="pmid">24786077</pub-id></citation></ref>
<ref id="B15"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>C.</given-names></name> <name><surname>Chen</surname> <given-names>H.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Thomas</surname> <given-names>H. R.</given-names></name> <name><surname>Frank</surname> <given-names>M. H.</given-names></name> <name><surname>He</surname> <given-names>Y.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>TBtools: An integrative toolkit developed for interactive analyses of big biological data.</article-title> <source><italic>Mol. Plant</italic></source> <volume>13</volume> <fpage>1194</fpage>&#x2013;<lpage>1202</lpage>. <pub-id pub-id-type="doi">10.1016/j.molp.2020.06.009</pub-id> <pub-id pub-id-type="pmid">32585190</pub-id></citation></ref>
<ref id="B16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cheng</surname> <given-names>Z.</given-names></name> <name><surname>Dong</surname> <given-names>F.</given-names></name> <name><surname>Langdon</surname> <given-names>T.</given-names></name> <name><surname>Ouyang</surname> <given-names>S.</given-names></name> <name><surname>Buell</surname> <given-names>C. R.</given-names></name> <name><surname>Gu</surname> <given-names>M.</given-names></name><etal/></person-group> (<year>2002</year>). <article-title>Functional rice centromeres are marked by a satellite repeat and a centromere-specific retrotransposon.</article-title> <source><italic>Plant Cell</italic></source> <volume>14</volume> <fpage>1691</fpage>&#x2013;<lpage>1704</lpage>. <pub-id pub-id-type="doi">10.1105/tpc.003079</pub-id> <pub-id pub-id-type="pmid">12172016</pub-id></citation></ref>
<ref id="B17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>&#x010C;&#x00ED;&#x017E;kov&#x00E1;</surname> <given-names>J.</given-names></name> <name><surname>H&#x00F8;ibov&#x00E1;</surname> <given-names>E.</given-names></name> <name><surname>Humpl&#x00ED;kov&#x00E1;</surname> <given-names>L.</given-names></name> <name><surname>Christelov&#x00E1;</surname> <given-names>P.</given-names></name> <name><surname>Such&#x00E1;nkov&#x00E1;</surname> <given-names>P.</given-names></name> <name><surname>Dole&#x017E;el</surname> <given-names>J.</given-names></name></person-group> (<year>2013</year>). <article-title>Molecular analysis and genomic organization of major DNA satellites in Banana (<italic>Musa</italic> spp.).</article-title> <source><italic>PLoS One</italic></source> <volume>8</volume>:<issue>e54808</issue>. <pub-id pub-id-type="doi">10.1371/journal.pone.0054808</pub-id> <pub-id pub-id-type="pmid">23372772</pub-id></citation></ref>
<ref id="B18"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Copenhaver</surname> <given-names>G. P.</given-names></name> <name><surname>Nickel</surname> <given-names>K.</given-names></name> <name><surname>Kuromori</surname> <given-names>T.</given-names></name> <name><surname>Benito</surname> <given-names>M.-I.</given-names></name> <name><surname>Kaul</surname> <given-names>S.</given-names></name> <name><surname>Lin</surname> <given-names>X.</given-names></name><etal/></person-group> (<year>1999</year>). <article-title>Genetic definition and sequence analysis of <italic>Arabidopsis</italic> centromeres.</article-title> <source><italic>Science</italic></source> <volume>286</volume> <fpage>2468</fpage>&#x2013;<lpage>2474</lpage>. <pub-id pub-id-type="doi">10.1126/science.286.5449.2468</pub-id> <pub-id pub-id-type="pmid">10617454</pub-id></citation></ref>
<ref id="B19"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Costaglioli</surname> <given-names>P.</given-names></name> <name><surname>Joub&#x00E8;s</surname> <given-names>J.</given-names></name> <name><surname>Garcia</surname> <given-names>C.</given-names></name> <name><surname>Stef</surname> <given-names>M.</given-names></name> <name><surname>Arveiler</surname> <given-names>B.</given-names></name> <name><surname>Lessire</surname> <given-names>R.</given-names></name><etal/></person-group> (<year>2005</year>). <article-title>Profiling candidate genes involved in wax biosynthesis in <italic>Arabidopsis thaliana</italic> by microarray analysis.</article-title> <source><italic>Biochim. Biophys. Acta Mol. Cell Biol. Lipids</italic></source> <volume>1734</volume> <fpage>247</fpage>&#x2013;<lpage>258</lpage>. <pub-id pub-id-type="doi">10.1016/j.bbalip.2005.04.002</pub-id> <pub-id pub-id-type="pmid">15914083</pub-id></citation></ref>
<ref id="B20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Csink</surname> <given-names>A. K.</given-names></name> <name><surname>Henikoff</surname> <given-names>S.</given-names></name></person-group> (<year>1998</year>). <article-title>Something from nothing: the evolution and utility of satellite repeats.</article-title> <source><italic>Trends Genet.</italic></source> <volume>14</volume> <fpage>200</fpage>&#x2013;<lpage>204</lpage>. <pub-id pub-id-type="doi">10.1016/s0168-9525(98)01444-9</pub-id></citation></ref>
<ref id="B21"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dai</surname> <given-names>X.</given-names></name> <name><surname>Sinharoy</surname> <given-names>S.</given-names></name> <name><surname>Udvardi</surname> <given-names>M.</given-names></name> <name><surname>Zhao</surname> <given-names>P. X.</given-names></name></person-group> (<year>2013</year>). <article-title>PlantTFcat: An online plant transcription factor and transcriptional regulator categorization and analysis tool.</article-title> <source><italic>BMC Bioinform.</italic></source> <volume>14</volume>:<issue>321</issue>. <pub-id pub-id-type="doi">10.1186/1471-2105-14-321</pub-id> <pub-id pub-id-type="pmid">24219505</pub-id></citation></ref>
<ref id="B22"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>D&#x2019;Hont</surname> <given-names>A.</given-names></name> <name><surname>Denoeud</surname> <given-names>F.</given-names></name> <name><surname>Aury</surname> <given-names>J.-M.</given-names></name> <name><surname>Baurens</surname> <given-names>F.-C.</given-names></name> <name><surname>Carreel</surname> <given-names>F.</given-names></name> <name><surname>Garsmeur</surname> <given-names>O.</given-names></name><etal/></person-group> (<year>2012</year>). <article-title>The banana (<italic>Musa acuminata</italic>) genome and the evolution of monocotyledonous plants.</article-title> <source><italic>Nature</italic></source> <volume>488</volume> <fpage>213</fpage>&#x2013;<lpage>217</lpage>. <pub-id pub-id-type="doi">10.1038/nature11241</pub-id> <pub-id pub-id-type="pmid">22801500</pub-id></citation></ref>
<ref id="B23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Doyle</surname> <given-names>J. J.</given-names></name> <name><surname>Doyle</surname> <given-names>J. L.</given-names></name></person-group> (<year>1987</year>). <article-title>A rapid DNA isolation procedure for small quantities of fresh leaf tissue.</article-title> <source><italic>Phytochem. Bull.</italic></source> <volume>19</volume> <fpage>11</fpage>&#x2013;<lpage>15</lpage>.</citation></ref>
<ref id="B24"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dudchenko</surname> <given-names>O.</given-names></name> <name><surname>Batra</surname> <given-names>S. S.</given-names></name> <name><surname>Omer</surname> <given-names>A. D.</given-names></name> <name><surname>Nyquist</surname> <given-names>S. K.</given-names></name> <name><surname>Hoeger</surname> <given-names>M.</given-names></name> <name><surname>Durand</surname> <given-names>N. C.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title><italic>De novo</italic> assembly of the <italic>Aedes aegypti</italic> genome using Hi-C yields chromosome-length scaffolds.</article-title> <source><italic>Science</italic></source> <volume>356</volume> <fpage>92</fpage>&#x2013;<lpage>95</lpage>. <pub-id pub-id-type="doi">10.1126/science.aal3327</pub-id> <pub-id pub-id-type="pmid">28336562</pub-id></citation></ref>
<ref id="B25"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Durand</surname> <given-names>N. C.</given-names></name> <name><surname>Shamim</surname> <given-names>M. S.</given-names></name> <name><surname>Machol</surname> <given-names>I.</given-names></name> <name><surname>Rao</surname> <given-names>S. S.</given-names></name> <name><surname>Huntley</surname> <given-names>M. H.</given-names></name> <name><surname>Lander</surname> <given-names>E. S.</given-names></name><etal/></person-group> (<year>2016</year>). <article-title>Juicer provides a one-click system for analyzing loop-resolution Hi-C experiments.</article-title> <source><italic>Cell Syst.</italic></source> <volume>3</volume> <fpage>95</fpage>&#x2013;<lpage>98</lpage>. <pub-id pub-id-type="doi">10.1016/j.cels.2016.07.002</pub-id> <pub-id pub-id-type="pmid">27467249</pub-id></citation></ref>
<ref id="B26"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Edgar</surname> <given-names>R. C.</given-names></name></person-group> (<year>2004</year>). <article-title>MUSCLE: Multiple sequence alignment with high accuracy and high throughput.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>32</volume> <fpage>1792</fpage>&#x2013;<lpage>1797</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkh340</pub-id> <pub-id pub-id-type="pmid">15034147</pub-id></citation></ref>
<ref id="B27"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ellinghaus</surname> <given-names>D.</given-names></name> <name><surname>Kurtz</surname> <given-names>S.</given-names></name> <name><surname>Willhoeft</surname> <given-names>U.</given-names></name></person-group> (<year>2008</year>). <article-title>LTRharvest, an efficient and flexible software for <italic>de novo</italic> detection of LTR retrotransposons.</article-title> <source><italic>BMC Bioinformatics</italic></source> <volume>9</volume>:<issue>18</issue>. <pub-id pub-id-type="doi">10.1186/1471-2105-9-18</pub-id> <pub-id pub-id-type="pmid">18194517</pub-id></citation></ref>
<ref id="B28"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Emms</surname> <given-names>D. M.</given-names></name> <name><surname>Kelly</surname> <given-names>S.</given-names></name></person-group> (<year>2019</year>). <article-title>OrthoFinder: phylogenetic orthology inference for comparative genomics.</article-title> <source><italic>Genome Biol.</italic></source> <volume>20</volume>:<issue>238</issue>. <pub-id pub-id-type="doi">10.1186/s13059-019-1832-y</pub-id> <pub-id pub-id-type="pmid">31727128</pub-id></citation></ref>
<ref id="B29"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Feng</surname> <given-names>S.</given-names></name> <name><surname>Cokus</surname> <given-names>S. J.</given-names></name> <name><surname>Schubert</surname> <given-names>V.</given-names></name> <name><surname>Zhai</surname> <given-names>J.</given-names></name> <name><surname>Pellegrini</surname> <given-names>M.</given-names></name> <name><surname>Jacobsen</surname> <given-names>S. E.</given-names></name></person-group> (<year>2014</year>). <article-title>Genome-wide Hi-C analyses in wild-type and mutants reveal high-resolution chromatin interactions in <italic>Arabidopsis</italic>.</article-title> <source><italic>Mol. Cell.</italic></source> <volume>55</volume> <fpage>694</fpage>&#x2013;<lpage>707</lpage>. <pub-id pub-id-type="doi">10.1016/j.molcel.2014.07.008</pub-id> <pub-id pub-id-type="pmid">25132175</pub-id></citation></ref>
<ref id="B30"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fernando</surname> <given-names>C.</given-names></name> <name><surname>Irene</surname> <given-names>J.</given-names></name> <name><surname>J&#x00E8;ssica</surname> <given-names>G. G.</given-names></name> <name><surname>Damian</surname> <given-names>L.</given-names></name> <name><surname>Marina</surname> <given-names>M. H.</given-names></name> <name><surname>Emilio</surname> <given-names>C.</given-names></name><etal/></person-group> (<year>2016</year>). <article-title>Genome sequence of the olive tree, <italic>Olea europaea</italic>.</article-title> <source><italic>GigaScience</italic></source> <volume>5</volume>:<issue>29</issue>.</citation></ref>
<ref id="B31"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Feschotte</surname> <given-names>C.</given-names></name> <name><surname>Jiang</surname> <given-names>N.</given-names></name> <name><surname>Wessler</surname> <given-names>S. R.</given-names></name></person-group> (<year>2002</year>). <article-title>Plant transposable elements: where genetics meets genomics.</article-title> <source><italic>Nat. Rev. Genet.</italic></source> <volume>3</volume> <fpage>329</fpage>&#x2013;<lpage>341</lpage>. <pub-id pub-id-type="doi">10.1038/nrg793</pub-id> <pub-id pub-id-type="pmid">11988759</pub-id></citation></ref>
<ref id="B32"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Finn</surname> <given-names>R. D.</given-names></name> <name><surname>Mistry</surname> <given-names>J.</given-names></name> <name><surname>Tate</surname> <given-names>J.</given-names></name> <name><surname>Coggill</surname> <given-names>P.</given-names></name> <name><surname>Heger</surname> <given-names>A.</given-names></name> <name><surname>Pollington</surname> <given-names>J. E.</given-names></name><etal/></person-group> (<year>2009</year>). <article-title>The Pfam protein families database.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>38</volume> <fpage>D211</fpage>&#x2013;<lpage>D222</lpage>.</citation></ref>
<ref id="B33"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fu</surname> <given-names>L.</given-names></name> <name><surname>Niu</surname> <given-names>B.</given-names></name> <name><surname>Zhu</surname> <given-names>Z.</given-names></name> <name><surname>Wu</surname> <given-names>S.</given-names></name> <name><surname>Li</surname> <given-names>W.</given-names></name></person-group> (<year>2012</year>). <article-title>CD-HIT: Accelerated for clustering the next-generation sequencing data.</article-title> <source><italic>Bioinformatics</italic></source> <volume>28</volume> <fpage>3150</fpage>&#x2013;<lpage>3152</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bts565</pub-id> <pub-id pub-id-type="pmid">23060610</pub-id></citation></ref>
<ref id="B34"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Goel</surname> <given-names>M.</given-names></name> <name><surname>Sun</surname> <given-names>H.</given-names></name> <name><surname>Jiao</surname> <given-names>W.-B.</given-names></name> <name><surname>Schneeberger</surname> <given-names>K.</given-names></name></person-group> (<year>2019</year>). <article-title>SyRI: Finding genomic rearrangements and local sequence differences from whole-genome assemblies.</article-title> <source><italic>Genome Biol.</italic></source> <volume>20</volume>:<issue>277</issue>. <pub-id pub-id-type="doi">10.1186/s13059-019-1911-0</pub-id> <pub-id pub-id-type="pmid">31842948</pub-id></citation></ref>
<ref id="B35"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gonz&#x00E1;lez-Mellado</surname> <given-names>D.</given-names></name> <name><surname>Salas</surname> <given-names>J. J.</given-names></name> <name><surname>Venegas-Caler&#x00F3;n</surname> <given-names>M.</given-names></name> <name><surname>Moreno-P&#x00E9;rez</surname> <given-names>A. J.</given-names></name> <name><surname>Garc&#x00E9;s</surname> <given-names>R.</given-names></name> <name><surname>Mart&#x00ED;nez-Force</surname> <given-names>E.</given-names></name></person-group> (<year>2019</year>). <article-title>Functional characterization and structural modelling of <italic>Helianthus annuus</italic> (sunflower) ketoacyl-CoA synthases and their role in seed oil composition.</article-title> <source><italic>Planta</italic></source> <volume>249</volume> <fpage>1823</fpage>&#x2013;<lpage>1836</lpage>. <pub-id pub-id-type="doi">10.1007/s00425-019-03126-1</pub-id> <pub-id pub-id-type="pmid">30847571</pub-id></citation></ref>
<ref id="B36"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Grabherr</surname> <given-names>M. G.</given-names></name> <name><surname>Haas</surname> <given-names>B. J.</given-names></name> <name><surname>Yassour</surname> <given-names>M.</given-names></name> <name><surname>Levin</surname> <given-names>J. Z.</given-names></name> <name><surname>Thompson</surname> <given-names>D. A.</given-names></name> <name><surname>Amit</surname> <given-names>I.</given-names></name><etal/></person-group> (<year>2011</year>). <article-title>Full-length transcriptome assembly from RNA-Seq data without a reference genome.</article-title> <source><italic>Nat. Biotechnol.</italic></source> <volume>29</volume> <fpage>644</fpage>&#x2013;<lpage>652</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.1883</pub-id> <pub-id pub-id-type="pmid">21572440</pub-id></citation></ref>
<ref id="B37"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guo</surname> <given-names>Y.</given-names></name> <name><surname>Mietkiewska</surname> <given-names>E.</given-names></name> <name><surname>Francis</surname> <given-names>T.</given-names></name> <name><surname>Katavic</surname> <given-names>V.</given-names></name> <name><surname>Brost</surname> <given-names>J. M.</given-names></name> <name><surname>Giblin</surname> <given-names>M.</given-names></name><etal/></person-group> (<year>2009</year>). <article-title>Increase in nervonic acid content in transformed yeast and transgenic plants by introduction of a <italic>Lunaria annua</italic> L. 3-ketoacyl-CoA synthase (KCS) gene.</article-title> <source><italic>Plant Mol. Biol.</italic></source> <volume>69</volume> <fpage>565</fpage>&#x2013;<lpage>575</lpage>. <pub-id pub-id-type="doi">10.1007/s11103-008-9439-9</pub-id> <pub-id pub-id-type="pmid">19082744</pub-id></citation></ref>
<ref id="B38"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Han</surname> <given-names>M. V.</given-names></name> <name><surname>Thomas</surname> <given-names>G. W. C.</given-names></name> <name><surname>Lugo-Martinez</surname> <given-names>J.</given-names></name> <name><surname>Hahn</surname> <given-names>M. W.</given-names></name></person-group> (<year>2013</year>). <article-title>Estimating gene gain and loss rates in the presence of error in genome assembly and annotation using CAFE 3.</article-title> <source><italic>Mol. Biol. Evol.</italic></source> <volume>30</volume> <fpage>1987</fpage>&#x2013;<lpage>1997</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/mst100</pub-id> <pub-id pub-id-type="pmid">23709260</pub-id></citation></ref>
<ref id="B39"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Haslam</surname> <given-names>T. M.</given-names></name> <name><surname>Kunst</surname> <given-names>L.</given-names></name></person-group> (<year>2013</year>). <article-title>Extending the story of very-long-chain fatty acid elongation.</article-title> <source><italic>Plant Sci.</italic></source> <volume>210</volume> <fpage>93</fpage>&#x2013;<lpage>107</lpage>. <pub-id pub-id-type="doi">10.1016/j.plantsci.2013.05.008</pub-id> <pub-id pub-id-type="pmid">23849117</pub-id></citation></ref>
<ref id="B40"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Henikoff</surname> <given-names>S.</given-names></name> <name><surname>Ahmad</surname> <given-names>K.</given-names></name> <name><surname>Malik</surname> <given-names>H. S.</given-names></name></person-group> (<year>2001</year>). <article-title>The centromere paradox: Stable inheritance with rapidly evolving DNA.</article-title> <source><italic>Science</italic></source> <volume>293</volume> <fpage>1098</fpage>&#x2013;<lpage>1102</lpage>. <pub-id pub-id-type="doi">10.1126/science.1062939</pub-id> <pub-id pub-id-type="pmid">11498581</pub-id></citation></ref>
<ref id="B41"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Houben</surname> <given-names>A.</given-names></name> <name><surname>Schubert</surname> <given-names>I.</given-names></name></person-group> (<year>2003</year>). <article-title>DNA and proteins of plant centromeres.</article-title> <source><italic>Curr. Opin. Plant Biol.</italic></source> <volume>6</volume> <fpage>554</fpage>&#x2013;<lpage>560</lpage>. <pub-id pub-id-type="doi">10.1016/j.pbi.2003.09.007</pub-id> <pub-id pub-id-type="pmid">14611953</pub-id></citation></ref>
<ref id="B42"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ivancevic</surname> <given-names>A. M.</given-names></name> <name><surname>Kortschak</surname> <given-names>R. D.</given-names></name> <name><surname>Bertozzi</surname> <given-names>T.</given-names></name> <name><surname>Adelson</surname> <given-names>D. L.</given-names></name></person-group> (<year>2016</year>). <article-title>LINEs between pecies: Evolutionary dynamics of LINE-1 retrotransposons across the Eukaryotic tree of life.</article-title> <source><italic>Genome Biol. Evol.</italic></source> <volume>8</volume> <fpage>3301</fpage>&#x2013;<lpage>3322</lpage>. <pub-id pub-id-type="doi">10.1093/gbe/evw243</pub-id> <pub-id pub-id-type="pmid">27702814</pub-id></citation></ref>
<ref id="B43"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jensen</surname> <given-names>L. J.</given-names></name> <name><surname>Julien</surname> <given-names>P.</given-names></name> <name><surname>Kuhn</surname> <given-names>M.</given-names></name> <name><surname>Von Mering</surname> <given-names>C.</given-names></name> <name><surname>Muller</surname> <given-names>J.</given-names></name> <name><surname>Doerks</surname> <given-names>T.</given-names></name><etal/></person-group> (<year>2007</year>). <article-title>eggNOG: Automated construction and annotation of orthologous groups of genes.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>36</volume> <fpage>D250</fpage>&#x2013;<lpage>D254</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkm796</pub-id> <pub-id pub-id-type="pmid">17942413</pub-id></citation></ref>
<ref id="B44"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jones</surname> <given-names>P.</given-names></name> <name><surname>Binns</surname> <given-names>D.</given-names></name> <name><surname>Chang</surname> <given-names>H. Y.</given-names></name> <name><surname>Fraser</surname> <given-names>M.</given-names></name> <name><surname>Li</surname> <given-names>W.</given-names></name> <name><surname>Mcanulla</surname> <given-names>C.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>InterProScan 5: Genome-scale protein function classification.</article-title> <source><italic>Bioinformatics</italic></source> <volume>30</volume> <fpage>1236</fpage>&#x2013;<lpage>1240</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btu031</pub-id> <pub-id pub-id-type="pmid">24451626</pub-id></citation></ref>
<ref id="B45"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Joub&#x00E8;s</surname> <given-names>J.</given-names></name> <name><surname>Raffaele</surname> <given-names>S.</given-names></name> <name><surname>Bourdenx</surname> <given-names>B.</given-names></name> <name><surname>Garcia</surname> <given-names>C.</given-names></name> <name><surname>Laroche-Traineau</surname> <given-names>J.</given-names></name> <name><surname>Moreau</surname> <given-names>P.</given-names></name><etal/></person-group> (<year>2008</year>). <article-title>The VLCFA elongase gene family in <italic>Arabidopsis thaliana</italic>: phylogenetic analysis, 3D modelling and expression profiling.</article-title> <source><italic>Plant Mol. Biol.</italic></source> <volume>67</volume>:<issue>547</issue>. <pub-id pub-id-type="doi">10.1007/s11103-008-9339-z</pub-id> <pub-id pub-id-type="pmid">18465198</pub-id></citation></ref>
<ref id="B46"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kalyaanamoorthy</surname> <given-names>S.</given-names></name> <name><surname>Minh</surname> <given-names>B. Q.</given-names></name> <name><surname>Wong</surname> <given-names>T. K. F.</given-names></name> <name><surname>Von Haeseler</surname> <given-names>A.</given-names></name> <name><surname>Jermiin</surname> <given-names>L. S.</given-names></name></person-group> (<year>2017</year>). <article-title>ModelFinder: Fast model selection for accurate phylogenetic estimates.</article-title> <source><italic>Nat. Methods</italic></source> <volume>14</volume> <fpage>587</fpage>&#x2013;<lpage>589</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.4285</pub-id> <pub-id pub-id-type="pmid">28481363</pub-id></citation></ref>
<ref id="B47"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Karp</surname> <given-names>P. D.</given-names></name> <name><surname>Latendresse</surname> <given-names>M.</given-names></name> <name><surname>Paley</surname> <given-names>S. M.</given-names></name> <name><surname>Krummenacker</surname> <given-names>M.</given-names></name> <name><surname>Ong</surname> <given-names>Q. D.</given-names></name> <name><surname>Billington</surname> <given-names>R.</given-names></name><etal/></person-group> (<year>2015</year>). <article-title>Pathway tools version 19.0 update: Software for pathway/genome informatics and systems biology.</article-title> <source><italic>Brief. Bioinform.</italic></source> <volume>17</volume> <fpage>877</fpage>&#x2013;<lpage>890</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbv079</pub-id> <pub-id pub-id-type="pmid">26454094</pub-id></citation></ref>
<ref id="B48"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Katoh</surname> <given-names>K.</given-names></name> <name><surname>Standley</surname> <given-names>D. M.</given-names></name></person-group> (<year>2013</year>). <article-title>MAFFT multiple sequence alignment software version 7: Improvements in performance and usability.</article-title> <source><italic>Mol. Biol. Evol.</italic></source> <volume>30</volume> <fpage>772</fpage>&#x2013;<lpage>780</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/mst010</pub-id> <pub-id pub-id-type="pmid">23329690</pub-id></citation></ref>
<ref id="B49"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Keller</surname> <given-names>O.</given-names></name> <name><surname>Kollmar</surname> <given-names>M.</given-names></name> <name><surname>Stanke</surname> <given-names>M.</given-names></name> <name><surname>Waack</surname> <given-names>S.</given-names></name></person-group> (<year>2011</year>). <article-title>A novel hybrid gene prediction method employing protein multiple sequence alignments.</article-title> <source><italic>Bioinformatics</italic></source> <volume>27</volume> <fpage>757</fpage>&#x2013;<lpage>763</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btr010</pub-id> <pub-id pub-id-type="pmid">21216780</pub-id></citation></ref>
<ref id="B50"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname> <given-names>D.</given-names></name> <name><surname>Langmead</surname> <given-names>B.</given-names></name> <name><surname>Salzberg</surname> <given-names>S. L.</given-names></name></person-group> (<year>2015</year>). <article-title>HISAT: A fast spliced aligner with low memory requirements.</article-title> <source><italic>Nat. Methods</italic></source> <volume>12</volume> <fpage>357</fpage>&#x2013;<lpage>360</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.3317</pub-id> <pub-id pub-id-type="pmid">25751142</pub-id></citation></ref>
<ref id="B51"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kimura</surname> <given-names>M.</given-names></name></person-group> (<year>1980</year>). <article-title>A simple method for estimating evolutionary rates of base substitutions through comparative studies of nucleotide sequences.</article-title> <source><italic>J. Mol. Evol.</italic></source> <volume>16</volume> <fpage>111</fpage>&#x2013;<lpage>120</lpage>.</citation></ref>
<ref id="B52"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kumar</surname> <given-names>A.</given-names></name> <name><surname>Bennetzen</surname> <given-names>J. L.</given-names></name></person-group> (<year>1999</year>). <article-title>Plant retrotransposons.</article-title> <source><italic>Annu. Rev. Genet.</italic></source> <volume>33</volume> <fpage>479</fpage>&#x2013;<lpage>532</lpage>.</citation></ref>
<ref id="B53"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lagesen</surname> <given-names>K.</given-names></name> <name><surname>Hallin</surname> <given-names>P.</given-names></name> <name><surname>R&#x00F8;dland</surname> <given-names>E. A.</given-names></name> <name><surname>St&#x00E6;rfeldt</surname> <given-names>H.-H.</given-names></name> <name><surname>Rognes</surname> <given-names>T.</given-names></name> <name><surname>Ussery</surname> <given-names>D. W.</given-names></name></person-group> (<year>2007</year>). <article-title>RNAmmer: Consistent and rapid annotation of ribosomal RNA genes.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>35</volume> <fpage>3100</fpage>&#x2013;<lpage>3108</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkm160</pub-id> <pub-id pub-id-type="pmid">17452365</pub-id></citation></ref>
<ref id="B54"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lang</surname> <given-names>K.</given-names></name> <name><surname>Ma</surname> <given-names>L.</given-names></name> <name><surname>Liu</surname> <given-names>W.</given-names></name> <name><surname>Tong</surname> <given-names>C.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name></person-group> (<year>1980</year>). <article-title>Chromosome number of <italic>Xanthoceras sorbifolium</italic>.</article-title> <source><italic>For. Sci. Technol.</italic></source> <volume>2013</volume> <fpage>9</fpage>&#x2013;<lpage>10</lpage>.</citation></ref>
<ref id="B55"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Langmead</surname> <given-names>B.</given-names></name> <name><surname>Salzberg</surname> <given-names>S. L.</given-names></name></person-group> (<year>2012</year>). <article-title>Fast gapped-read alignment with Bowtie 2.</article-title> <source><italic>Nat. Methods</italic></source> <volume>9</volume> <fpage>357</fpage>&#x2013;<lpage>359</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.1923</pub-id> <pub-id pub-id-type="pmid">22388286</pub-id></citation></ref>
<ref id="B56"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Letunic</surname> <given-names>I.</given-names></name> <name><surname>Khedkar</surname> <given-names>S.</given-names></name> <name><surname>Bork</surname> <given-names>P.</given-names></name></person-group> (<year>2020</year>). <article-title>SMART: Recent updates, new developments and status in 2020.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>49</volume> <fpage>D458</fpage>&#x2013;<lpage>D460</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkaa937</pub-id> <pub-id pub-id-type="pmid">33104802</pub-id></citation></ref>
<ref id="B57"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Levan</surname> <given-names>A.</given-names></name> <name><surname>Fredga</surname> <given-names>K.</given-names></name> <name><surname>Sandberg</surname> <given-names>A. A.</given-names></name></person-group> (<year>1964</year>). <article-title>Nomenclature for centromeric position on chromosomes.</article-title> <source><italic>Hereditas</italic></source> <volume>52</volume> <fpage>201</fpage>&#x2013;<lpage>220</lpage>.</citation></ref>
<ref id="B58"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Levy</surname> <given-names>O.</given-names></name> <name><surname>Knisbacher</surname> <given-names>B. A.</given-names></name> <name><surname>Levanon</surname> <given-names>E. Y.</given-names></name> <name><surname>Havlin</surname> <given-names>S.</given-names></name></person-group> (<year>2017</year>). <article-title>Integrating networks and comparative genomics reveals retroelement proliferation dynamics in hominid genomes.</article-title> <source><italic>Sci. Adv.</italic></source> <volume>3</volume>:<issue>e1701256</issue>. <pub-id pub-id-type="doi">10.1126/sciadv.1701256</pub-id> <pub-id pub-id-type="pmid">29043294</pub-id></citation></ref>
<ref id="B59"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name></person-group> (<year>2018</year>). <article-title>Minimap2: Pairwise alignment for nucleotide sequences.</article-title> <source><italic>Bioinformatics</italic></source> <volume>34</volume> <fpage>3094</fpage>&#x2013;<lpage>3100</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty191</pub-id> <pub-id pub-id-type="pmid">29750242</pub-id></citation></ref>
<ref id="B60"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Handsaker</surname> <given-names>B.</given-names></name> <name><surname>Wysoker</surname> <given-names>A.</given-names></name> <name><surname>Fennell</surname> <given-names>T.</given-names></name> <name><surname>Ruan</surname> <given-names>J.</given-names></name> <name><surname>Homer</surname> <given-names>N.</given-names></name><etal/></person-group> (<year>2009</year>). <article-title>The sequence alignment/map format and SAMtools.</article-title> <source><italic>Bioinformatics</italic></source> <volume>25</volume> <fpage>2078</fpage>&#x2013;<lpage>2079</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp352</pub-id> <pub-id pub-id-type="pmid">19505943</pub-id></citation></ref>
<ref id="B61"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>L.</given-names></name> <name><surname>Stoeckert</surname> <given-names>C. J.</given-names></name> <name><surname>Roos</surname> <given-names>D. S.</given-names></name></person-group> (<year>2003</year>). <article-title>OrthoMCL: Identification of ortholog groups for eukaryotic genomes.</article-title> <source><italic>Genome Res.</italic></source> <volume>13</volume> <fpage>2178</fpage>&#x2013;<lpage>2189</lpage>.</citation></ref>
<ref id="B62"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liang</surname> <given-names>Q.</given-names></name> <name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Li</surname> <given-names>S.</given-names></name> <name><surname>Yuan</surname> <given-names>F.</given-names></name> <name><surname>Sun</surname> <given-names>J.</given-names></name> <name><surname>Duan</surname> <given-names>Q.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>The genome assembly and annotation of yellowhorn (<italic>Xanthoceras sorbifolium</italic> Bunge).</article-title> <source><italic>GigaScience</italic></source> <volume>8</volume>:<issue>giz071</issue>. <pub-id pub-id-type="doi">10.1093/gigascience/giz071</pub-id> <pub-id pub-id-type="pmid">31241155</pub-id></citation></ref>
<ref id="B63"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lieberman-Aiden</surname> <given-names>E.</given-names></name> <name><surname>Van Berkum</surname> <given-names>N. L.</given-names></name> <name><surname>Williams</surname> <given-names>L.</given-names></name> <name><surname>Imakaev</surname> <given-names>M.</given-names></name> <name><surname>Ragoczy</surname> <given-names>T.</given-names></name> <name><surname>Telling</surname> <given-names>A.</given-names></name><etal/></person-group> (<year>2009</year>). <article-title>Comprehensive mapping of long-range interactions reveals folding principles of the human genome.</article-title> <source><italic>Science</italic></source> <volume>326</volume> <fpage>289</fpage>&#x2013;<lpage>293</lpage>. <pub-id pub-id-type="doi">10.1126/science.1181369</pub-id> <pub-id pub-id-type="pmid">19815776</pub-id></citation></ref>
<ref id="B64"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lin</surname> <given-names>Y.</given-names></name> <name><surname>Min</surname> <given-names>J.</given-names></name> <name><surname>Lai</surname> <given-names>R.</given-names></name> <name><surname>Wu</surname> <given-names>Z.</given-names></name> <name><surname>Chen</surname> <given-names>Y.</given-names></name> <name><surname>Yu</surname> <given-names>L.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Genome-wide sequencing of longan (<italic>Dimocarpus longan</italic> Lour.) provides insights into molecular basis of its polyphenol-rich characteristics.</article-title> <source><italic>Gigascience</italic></source> <volume>6</volume> <fpage>1</fpage>&#x2013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1093/gigascience/gix023</pub-id> <pub-id pub-id-type="pmid">28368449</pub-id></citation></ref>
<ref id="B65"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>B.</given-names></name> <name><surname>Shi</surname> <given-names>Y.</given-names></name> <name><surname>Yuan</surname> <given-names>J.</given-names></name> <name><surname>Hu</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>H.</given-names></name> <name><surname>Li</surname> <given-names>N.</given-names></name><etal/></person-group> (<year>2013</year>). <article-title>Estimation of genomic characteristics by analyzing k-mer frequency in <italic>de novo</italic> genome projects.</article-title> <source><italic>arXiv</italic> [Preprint]</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/1308.2012">https://arxiv.org/abs/1308.2012</ext-link> <comment>(accessed October 01, 2019)</comment></citation></ref>
<ref id="B66"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>H.</given-names></name> <name><surname>Wu</surname> <given-names>S.</given-names></name> <name><surname>Li</surname> <given-names>A.</given-names></name> <name><surname>Ruan</surname> <given-names>J.</given-names></name></person-group> (<year>2021</year>). <article-title>SMARTdenovo: A de novo assembler using long noisy reads.</article-title> <source><italic>Gigabyte</italic></source>. <pub-id pub-id-type="doi">10.46471/gigabyte.15</pub-id>.</citation></ref>
<ref id="B67"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Yue</surname> <given-names>W.</given-names></name> <name><surname>Li</surname> <given-names>D.</given-names></name> <name><surname>Wang</surname> <given-names>R. R. C.</given-names></name> <name><surname>Kong</surname> <given-names>X.</given-names></name> <name><surname>Lu</surname> <given-names>K.</given-names></name><etal/></person-group> (<year>2008</year>). <article-title>Structure and dynamics of retrotransposons at wheat centromeres and pericentromeres.</article-title> <source><italic>Chromosoma</italic></source> <volume>117</volume> <fpage>445</fpage>&#x2013;<lpage>456</lpage>. <pub-id pub-id-type="doi">10.1007/s00412-008-0161-9</pub-id> <pub-id pub-id-type="pmid">18496705</pub-id></citation></ref>
<ref id="B68"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lowe</surname> <given-names>T. M.</given-names></name> <name><surname>Eddy</surname> <given-names>S. R.</given-names></name></person-group> (<year>1997</year>). <article-title>tRNAscan-SE: A program for improved detection of transfer RNA genes in genomic gequence.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>25</volume> <fpage>955</fpage>&#x2013;<lpage>964</lpage>. <pub-id pub-id-type="doi">10.1093/nar/25.5.955</pub-id> <pub-id pub-id-type="pmid">9023104</pub-id></citation></ref>
<ref id="B69"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lu</surname> <given-names>S.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Chitsaz</surname> <given-names>F.</given-names></name> <name><surname>Derbyshire</surname> <given-names>M. K.</given-names></name> <name><surname>Geer</surname> <given-names>R. C.</given-names></name> <name><surname>Gonzales</surname> <given-names>N. R.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>CDD/SPARCLE: The conserved domain database in 2020.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>48</volume> <fpage>D265</fpage>&#x2013;<lpage>D268</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkz991</pub-id> <pub-id pub-id-type="pmid">31777944</pub-id></citation></ref>
<ref id="B70"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>R.</given-names></name> <name><surname>Liu</surname> <given-names>B.</given-names></name> <name><surname>Xie</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>Z.</given-names></name> <name><surname>Huang</surname> <given-names>W.</given-names></name> <name><surname>Yuan</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2012</year>). <article-title>SOAPdenovo2: An empirically improved memory-efficient short-read <italic>de novo</italic> assembler.</article-title> <source><italic>GigaScience</italic></source> <volume>1</volume> <fpage>2047</fpage>&#x2013;<lpage>2117</lpage>.</citation></ref>
<ref id="B71"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lyu</surname> <given-names>H.</given-names></name> <name><surname>He</surname> <given-names>Z.</given-names></name> <name><surname>Wu</surname> <given-names>C. I.</given-names></name> <name><surname>Shi</surname> <given-names>S.</given-names></name></person-group> (<year>2018</year>). <article-title>Convergent adaptive evolution in marginal environments: Unloading transposable elements as a common strategy among mangrove genomes.</article-title> <source><italic>New Phytol.</italic></source> <volume>217</volume> <fpage>428</fpage>&#x2013;<lpage>438</lpage>. <pub-id pub-id-type="doi">10.1111/nph.14784</pub-id> <pub-id pub-id-type="pmid">28960318</pub-id></citation></ref>
<ref id="B72"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ma</surname> <given-names>J.</given-names></name> <name><surname>Devos</surname> <given-names>K. M.</given-names></name> <name><surname>Bennetzen</surname> <given-names>J. L.</given-names></name></person-group> (<year>2004</year>). <article-title>Analyses of LTR-retrotransposon structures reveal recent and rapid genomic DNA loss in rice.</article-title> <source><italic>Genome Res.</italic></source> <volume>14</volume> <fpage>860</fpage>&#x2013;<lpage>869</lpage>. <pub-id pub-id-type="doi">10.1101/gr.1466204</pub-id> <pub-id pub-id-type="pmid">15078861</pub-id></citation></ref>
<ref id="B73"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Magall&#x00F3;n</surname> <given-names>S.</given-names></name> <name><surname>G&#x00F3;mez-Acevedo</surname> <given-names>S.</given-names></name> <name><surname>S&#x00E1;nchez-Reyes</surname> <given-names>L. L.</given-names></name> <name><surname>Hern&#x00E1;ndez-Hern&#x00E1;ndez</surname> <given-names>T.</given-names></name></person-group> (<year>2015</year>). <article-title>A metacalibrated time-tree documents the early rise of flowering plant phylogenetic diversity.</article-title> <source><italic>New Phytol.</italic></source> <volume>207</volume> <fpage>437</fpage>&#x2013;<lpage>453</lpage>. <pub-id pub-id-type="doi">10.1111/nph.13264</pub-id> <pub-id pub-id-type="pmid">25615647</pub-id></citation></ref>
<ref id="B74"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Marcais</surname> <given-names>G.</given-names></name> <name><surname>Kingsford</surname> <given-names>C.</given-names></name></person-group> (<year>2011</year>). <article-title>A fast, lock-free approach for efficient parallel counting of occurrences of k-mers.</article-title> <source><italic>Bioinformatics</italic></source> <volume>27</volume> <fpage>764</fpage>&#x2013;<lpage>770</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btr011</pub-id> <pub-id pub-id-type="pmid">21217122</pub-id></citation></ref>
<ref id="B75"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Martin</surname> <given-names>M.</given-names></name></person-group> (<year>2011</year>). <article-title>Cutadapt removes adapter sequences from high-throughput sequencing reads.</article-title> <source><italic>EMBnet J.</italic></source> <volume>17</volume> <fpage>10</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1089/cmb.2017.0096</pub-id> <pub-id pub-id-type="pmid">28715235</pub-id></citation></ref>
<ref id="B76"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>McKinley</surname> <given-names>K. L.</given-names></name> <name><surname>Cheeseman</surname> <given-names>I. M.</given-names></name></person-group> (<year>2016</year>). <article-title>The molecular basis for centromere identity and function.</article-title> <source><italic>Nat. Rev. Mol. Cell Biol.</italic></source> <volume>17</volume> <fpage>16</fpage>&#x2013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.1038/nrm.2015.5</pub-id> <pub-id pub-id-type="pmid">26601620</pub-id></citation></ref>
<ref id="B77"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Melters</surname> <given-names>D. P.</given-names></name> <name><surname>Bradnam</surname> <given-names>K. R.</given-names></name> <name><surname>Young</surname> <given-names>H. A.</given-names></name> <name><surname>Telis</surname> <given-names>N.</given-names></name> <name><surname>May</surname> <given-names>M. R.</given-names></name> <name><surname>Ruby</surname> <given-names>J. G.</given-names></name><etal/></person-group> (<year>2013</year>). <article-title>Comparative analysis of tandem repeats from hundreds of species reveals unique insights into centromere evolution.</article-title> <source><italic>Genome Biol.</italic></source> <volume>14</volume>:<issue>R10</issue>. <pub-id pub-id-type="doi">10.1186/gb-2013-14-1-r10</pub-id> <pub-id pub-id-type="pmid">23363705</pub-id></citation></ref>
<ref id="B78"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Miele</surname> <given-names>V.</given-names></name> <name><surname>Penel</surname> <given-names>S.</given-names></name> <name><surname>Duret</surname> <given-names>L.</given-names></name></person-group> (<year>2011</year>). <article-title>Ultra-fast sequence clustering from similarity networks with SiLiX.</article-title> <source><italic>BMC Bioinform.</italic></source> <volume>12</volume>:<issue>116</issue>. <pub-id pub-id-type="doi">10.1186/1471-2105-12-116</pub-id> <pub-id pub-id-type="pmid">21513511</pub-id></citation></ref>
<ref id="B79"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Millar</surname> <given-names>A. A.</given-names></name> <name><surname>Kunst</surname> <given-names>L.</given-names></name></person-group> (<year>1997</year>). <article-title>Very-long-chain fatty acid biosynthesis is controlled through the expression and specificity of the condensing enzyme.</article-title> <source><italic>Plant J.</italic></source> <volume>12</volume> <fpage>121</fpage>&#x2013;<lpage>131</lpage>. <pub-id pub-id-type="doi">10.1046/j.1365-313x.1997.12010121.x</pub-id> <pub-id pub-id-type="pmid">9263455</pub-id></citation></ref>
<ref id="B80"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mistry</surname> <given-names>J.</given-names></name> <name><surname>Finn</surname> <given-names>R. D.</given-names></name> <name><surname>Eddy</surname> <given-names>S. R.</given-names></name> <name><surname>Bateman</surname> <given-names>A.</given-names></name> <name><surname>Punta</surname> <given-names>M.</given-names></name></person-group> (<year>2013</year>). <article-title>Challenges in homology search: HMMER3 and convergent evolution of coiled-coil regions.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>41</volume>:<issue>e121</issue>. <pub-id pub-id-type="doi">10.1093/nar/gkt263</pub-id> <pub-id pub-id-type="pmid">23598997</pub-id></citation></ref>
<ref id="B81"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mizuguchi</surname> <given-names>T.</given-names></name> <name><surname>Fudenberg</surname> <given-names>G.</given-names></name> <name><surname>Mehta</surname> <given-names>S.</given-names></name> <name><surname>Belton</surname> <given-names>J.-M.</given-names></name> <name><surname>Taneja</surname> <given-names>N.</given-names></name> <name><surname>Folco</surname> <given-names>H. D.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>Cohesin-dependent globules and heterochromatin shape 3D genome architecture in <italic>S. pombe</italic>.</article-title> <source><italic>Nature</italic></source> <volume>516</volume> <fpage>432</fpage>&#x2013;<lpage>435</lpage>. <pub-id pub-id-type="doi">10.1038/nature13833</pub-id> <pub-id pub-id-type="pmid">25307058</pub-id></citation></ref>
<ref id="B82"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Myburg</surname> <given-names>A. A.</given-names></name> <name><surname>Grattapaglia</surname> <given-names>D.</given-names></name> <name><surname>Tuskan</surname> <given-names>G. A.</given-names></name> <name><surname>Hellsten</surname> <given-names>U.</given-names></name> <name><surname>Hayes</surname> <given-names>R. D.</given-names></name> <name><surname>Grimwood</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>The genome of <italic>Eucalyptus grandis</italic>.</article-title> <source><italic>Nature</italic></source> <volume>510</volume> <fpage>356</fpage>&#x2013;<lpage>362</lpage>.</citation></ref>
<ref id="B83"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Neumann</surname> <given-names>P.</given-names></name> <name><surname>Navr&#x00E1;tilov&#x00E1;</surname> <given-names>A.</given-names></name> <name><surname>Kobl&#x00ED;&#x017E;kov&#x00E1;</surname> <given-names>A.</given-names></name> <name><surname>Kejnovsk&#x0131;</surname> <given-names>E.</given-names></name> <name><surname>H&#x00F8;ibov&#x00E1;</surname> <given-names>E.</given-names></name> <name><surname>Hobza</surname> <given-names>R.</given-names></name><etal/></person-group> (<year>2011</year>). <article-title>Plant centromeric retrotransposons: a structural and cytogenetic perspective.</article-title> <source><italic>Mobile DNA</italic></source> <volume>2</volume>:<issue>4</issue>. <pub-id pub-id-type="doi">10.1186/1759-8753-2-4</pub-id> <pub-id pub-id-type="pmid">21371312</pub-id></citation></ref>
<ref id="B84"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Neumann</surname> <given-names>P.</given-names></name> <name><surname>Nov&#x00E1;k</surname> <given-names>P.</given-names></name> <name><surname>Ho&#x0161;t&#x00E1;kov&#x00E1;</surname> <given-names>N.</given-names></name> <name><surname>Macas</surname> <given-names>J.</given-names></name></person-group> (<year>2019</year>). <article-title>Systematic survey of plant LTR-retrotransposons elucidates phylogenetic relationships of their polyprotein domains and provides a reference for element classification.</article-title> <source><italic>Mobile DNA</italic></source> <volume>10</volume>:<issue>1</issue>. <pub-id pub-id-type="doi">10.1186/s13100-018-0144-1</pub-id> <pub-id pub-id-type="pmid">30622655</pub-id></citation></ref>
<ref id="B85"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nguyen</surname> <given-names>L.-T.</given-names></name> <name><surname>Schmidt</surname> <given-names>H. A.</given-names></name> <name><surname>Von Haeseler</surname> <given-names>A.</given-names></name> <name><surname>Minh</surname> <given-names>B. Q.</given-names></name></person-group> (<year>2015</year>). <article-title>IQ-TREE: A fast and effective stochastic algorithm for estimating maximum-likelihood phylogenies.</article-title> <source><italic>Mol. Biol. Evol.</italic></source> <volume>32</volume> <fpage>268</fpage>&#x2013;<lpage>274</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msu300</pub-id> <pub-id pub-id-type="pmid">25371430</pub-id></citation></ref>
<ref id="B86"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Oda</surname> <given-names>E.</given-names></name> <name><surname>Hatada</surname> <given-names>K.</given-names></name> <name><surname>Kimura</surname> <given-names>J.</given-names></name> <name><surname>Aizawa</surname> <given-names>Y.</given-names></name> <name><surname>Thanikachalam</surname> <given-names>P. V.</given-names></name> <name><surname>Watanabe</surname> <given-names>K.</given-names></name></person-group> (<year>2005</year>). <article-title>Relationships between serum unsaturated fatty acids and coronary risk factors: Negative relations between nervonic acid and obesity-related risk factors.</article-title> <source><italic>Int. Heart J.</italic></source> <volume>46</volume> <fpage>975</fpage>&#x2013;<lpage>985</lpage>. <pub-id pub-id-type="doi">10.1536/ihj.46.975</pub-id> <pub-id pub-id-type="pmid">16394593</pub-id></citation></ref>
<ref id="B87"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ou</surname> <given-names>S.</given-names></name> <name><surname>Chen</surname> <given-names>J.</given-names></name> <name><surname>Jiang</surname> <given-names>N.</given-names></name></person-group> (<year>2018</year>). <article-title>Assessing genome assembly quality using the LTR assembly index (LAI).</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>46</volume>:<issue>e126</issue>. <pub-id pub-id-type="doi">10.1093/nar/gky730</pub-id> <pub-id pub-id-type="pmid">30107434</pub-id></citation></ref>
<ref id="B88"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pertea</surname> <given-names>M.</given-names></name> <name><surname>Pertea</surname> <given-names>G. M.</given-names></name> <name><surname>Antonescu</surname> <given-names>C. M.</given-names></name> <name><surname>Chang</surname> <given-names>T.-C.</given-names></name> <name><surname>Mendell</surname> <given-names>J. T.</given-names></name> <name><surname>Salzberg</surname> <given-names>S. L.</given-names></name></person-group> (<year>2015</year>). <article-title>StringTie enables improved reconstruction of a transcriptome from RNA-seq reads.</article-title> <source><italic>Nat. Biotechnol.</italic></source> <volume>33</volume> <fpage>290</fpage>&#x2013;<lpage>295</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.3122</pub-id> <pub-id pub-id-type="pmid">25690850</pub-id></citation></ref>
<ref id="B89"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pryszcz</surname> <given-names>L. P.</given-names></name> <name><surname>Gabald&#x00F3;n</surname> <given-names>T.</given-names></name></person-group> (<year>2016</year>). <article-title>Redundans: an assembly pipeline for highly heterozygous genomes.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>44</volume>:<issue>e113</issue>. <pub-id pub-id-type="doi">10.1093/nar/gkw294</pub-id> <pub-id pub-id-type="pmid">27131372</pub-id></citation></ref>
<ref id="B90"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rice</surname> <given-names>P.</given-names></name> <name><surname>Longden</surname> <given-names>I.</given-names></name> <name><surname>Bleasby</surname> <given-names>A.</given-names></name></person-group> (<year>2000</year>). <article-title>EMBOSS: The European molecular biology open software suite.</article-title> <source><italic>Trends Genet.</italic></source> <volume>16</volume> <fpage>276</fpage>&#x2013;<lpage>277</lpage>.</citation></ref>
<ref id="B91"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Round</surname> <given-names>E. K.</given-names></name> <name><surname>Flowers</surname> <given-names>S. K.</given-names></name> <name><surname>Richards</surname> <given-names>E. J.</given-names></name></person-group> (<year>1997</year>). <article-title><italic>Arabidopsis thaliana</italic> centromere regions: Genetic map positions and repetitive DNA structure.</article-title> <source><italic>Genome Res.</italic></source> <volume>7</volume> <fpage>1045</fpage>&#x2013;<lpage>1053</lpage>. <pub-id pub-id-type="doi">10.1101/gr.7.11.1045</pub-id> <pub-id pub-id-type="pmid">9371740</pub-id></citation></ref>
<ref id="B92"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ruan</surname> <given-names>C. J.</given-names></name> <name><surname>Yan</surname> <given-names>R.</given-names></name> <name><surname>Wang</surname> <given-names>B. X.</given-names></name> <name><surname>Mopper</surname> <given-names>S.</given-names></name> <name><surname>Guan</surname> <given-names>W. K.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name></person-group> (<year>2017</year>). <article-title>The importance of yellow horn (<italic>Xanthoceras sorbifolia</italic>) for restoration of arid habitats and production of bioactive seed oils.</article-title> <source><italic>Ecol. Eng.</italic></source> <volume>99</volume> <fpage>504</fpage>&#x2013;<lpage>512</lpage>. <pub-id pub-id-type="doi">10.1016/j.ecoleng.2016.11.073</pub-id></citation></ref>
<ref id="B93"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sedlazeck</surname> <given-names>F. J.</given-names></name> <name><surname>Lee</surname> <given-names>H.</given-names></name> <name><surname>Darby</surname> <given-names>C. A.</given-names></name> <name><surname>Schatz</surname> <given-names>M. C.</given-names></name></person-group> (<year>2018</year>). <article-title>Piercing the dark matter: Bioinformatics of long-range sequencing and mapping.</article-title> <source><italic>Nat. Rev. Genet.</italic></source> <volume>19</volume> <fpage>329</fpage>&#x2013;<lpage>346</lpage>. <pub-id pub-id-type="doi">10.1038/s41576-018-0003-4</pub-id> <pub-id pub-id-type="pmid">29599501</pub-id></citation></ref>
<ref id="B94"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shannon</surname> <given-names>P.</given-names></name> <name><surname>Markiel</surname> <given-names>A.</given-names></name> <name><surname>Ozier</surname> <given-names>O.</given-names></name> <name><surname>Baliga</surname> <given-names>N. S.</given-names></name> <name><surname>Wang</surname> <given-names>J. T.</given-names></name> <name><surname>Ramage</surname> <given-names>D.</given-names></name><etal/></person-group> (<year>2003</year>). <article-title>Cytoscape: A software environment for integrated models of biomolecular interaction networks.</article-title> <source><italic>Genome Res.</italic></source> <volume>13</volume> <fpage>2498</fpage>&#x2013;<lpage>2504</lpage>. <pub-id pub-id-type="doi">10.1101/gr.1239303</pub-id> <pub-id pub-id-type="pmid">14597658</pub-id></citation></ref>
<ref id="B95"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Simao</surname> <given-names>F. A.</given-names></name> <name><surname>Waterhouse</surname> <given-names>R. M.</given-names></name> <name><surname>Ioannidis</surname> <given-names>P.</given-names></name> <name><surname>Kriventseva</surname> <given-names>E. V.</given-names></name> <name><surname>Zdobnov</surname> <given-names>E. M.</given-names></name></person-group> (<year>2015</year>). <article-title>BUSCO: Assessing genome assembly and annotation completeness with single-copy orthologs.</article-title> <source><italic>Bioinformatics</italic></source> <volume>31</volume> <fpage>3210</fpage>&#x2013;<lpage>3212</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btv351</pub-id> <pub-id pub-id-type="pmid">26059717</pub-id></citation></ref>
<ref id="B96"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Skinner</surname> <given-names>M. E.</given-names></name> <name><surname>Uzilov</surname> <given-names>A. V.</given-names></name> <name><surname>Stein</surname> <given-names>L. D.</given-names></name> <name><surname>Mungall</surname> <given-names>C. J.</given-names></name> <name><surname>Holmes</surname> <given-names>I. H.</given-names></name></person-group> (<year>2009</year>). <article-title>JBrowse: A next-generation genome browser.</article-title> <source><italic>Genome Res.</italic></source> <volume>19</volume> <fpage>1630</fpage>&#x2013;<lpage>1638</lpage>.</citation></ref>
<ref id="B97"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Slater</surname> <given-names>G. S.</given-names></name> <name><surname>Birney</surname> <given-names>E.</given-names></name></person-group> (<year>2005</year>). <article-title>Automated generation of heuristics for biological sequence comparison.</article-title> <source><italic>BMC Bioinformatics</italic></source> <volume>6</volume> <issue>31</issue>. <pub-id pub-id-type="doi">10.1186/1471-2105-6-31</pub-id> <pub-id pub-id-type="pmid">15713233</pub-id></citation></ref>
<ref id="B98"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Smit</surname> <given-names>A.</given-names></name> <name><surname>Hubley</surname> <given-names>R.</given-names></name></person-group> (<year>2008</year>). <source><italic>RepeatModeler Open-1.0. 2008&#x2013;2015.</italic></source> Available online at: <ext-link ext-link-type="uri" xlink:href="http://www.repeatmasker.org">http://www.repeatmasker.org</ext-link> <comment>(accessed October 01, 2019)</comment></citation></ref>
<ref id="B99"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Smit</surname> <given-names>A.</given-names></name> <name><surname>Hubley</surname> <given-names>R.</given-names></name> <name><surname>Green</surname> <given-names>P.</given-names></name></person-group> (<year>2013</year>). <source><italic>RepeatMasker Open-4.0. 2013&#x2013;2015.</italic></source> Available online at: <ext-link ext-link-type="uri" xlink:href="http://www.repeatmasker.org">http://www.repeatmasker.org</ext-link> <comment>(accessed October 01, 2019)</comment></citation></ref>
<ref id="B100"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Stanke</surname> <given-names>M.</given-names></name> <name><surname>Diekhans</surname> <given-names>M.</given-names></name> <name><surname>Baertsch</surname> <given-names>R.</given-names></name> <name><surname>Haussler</surname> <given-names>D.</given-names></name></person-group> (<year>2008</year>). <article-title>Using native and syntenically mapped cDNA alignments to improve <italic>de novo</italic> gene finding.</article-title> <source><italic>Bioinformatics</italic></source> <volume>24</volume> <fpage>637</fpage>&#x2013;<lpage>644</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btn013</pub-id> <pub-id pub-id-type="pmid">18218656</pub-id></citation></ref>
<ref id="B101"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Steinbiss</surname> <given-names>S.</given-names></name> <name><surname>Willhoeft</surname> <given-names>U.</given-names></name> <name><surname>Gremme</surname> <given-names>G.</given-names></name> <name><surname>Kurtz</surname> <given-names>S.</given-names></name></person-group> (<year>2009</year>). <article-title>Fine-grained annotation and classification of <italic>de novo</italic> predicted LTR retrotransposons.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>37</volume> <fpage>7002</fpage>&#x2013;<lpage>7013</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkp759</pub-id> <pub-id pub-id-type="pmid">19786494</pub-id></citation></ref>
<ref id="B102"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sundell</surname> <given-names>D.</given-names></name> <name><surname>Mannapperuma</surname> <given-names>C.</given-names></name> <name><surname>Netotea</surname> <given-names>S.</given-names></name> <name><surname>Delhomme</surname> <given-names>N.</given-names></name> <name><surname>Lin</surname> <given-names>Y.-C.</given-names></name> <name><surname>Sj&#x00F6;din</surname> <given-names>A.</given-names></name><etal/></person-group> (<year>2015</year>). <article-title>The plant genome integrative explorer resource: PlantGenIE.org.</article-title> <source><italic>New Phytol.</italic></source> <volume>208</volume> <fpage>1149</fpage>&#x2013;<lpage>1156</lpage>. <pub-id pub-id-type="doi">10.1111/nph.13557</pub-id> <pub-id pub-id-type="pmid">26192091</pub-id></citation></ref>
<ref id="B103"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Swarbreck</surname> <given-names>D.</given-names></name> <name><surname>Wilks</surname> <given-names>C.</given-names></name> <name><surname>Lamesch</surname> <given-names>P.</given-names></name> <name><surname>Berardini</surname> <given-names>T. Z.</given-names></name> <name><surname>Garcia-Hernandez</surname> <given-names>M.</given-names></name> <name><surname>Foerster</surname> <given-names>H.</given-names></name><etal/></person-group> (<year>2007</year>). <article-title>The Arabidopsis information resource (TAIR): Gene structure and function annotation.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>36</volume> <fpage>D1009</fpage>&#x2013;<lpage>D1014</lpage>.</citation></ref>
<ref id="B104"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tillich</surname> <given-names>M.</given-names></name> <name><surname>Lehwark</surname> <given-names>P.</given-names></name> <name><surname>Pellizzer</surname> <given-names>T.</given-names></name> <name><surname>Ulbricht-Jones</surname> <given-names>E. S.</given-names></name> <name><surname>Fischer</surname> <given-names>A.</given-names></name> <name><surname>Bock</surname> <given-names>R.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>GeSeq &#x2013; versatile and accurate annotation of organelle genomes.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>45</volume> <fpage>W6</fpage>&#x2013;<lpage>W11</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkx391</pub-id> <pub-id pub-id-type="pmid">28486635</pub-id></citation></ref>
<ref id="B105"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>VanBuren</surname> <given-names>R.</given-names></name> <name><surname>Bryant</surname> <given-names>D.</given-names></name> <name><surname>Edger</surname> <given-names>P. P.</given-names></name> <name><surname>Tang</surname> <given-names>H.</given-names></name> <name><surname>Burgess</surname> <given-names>D.</given-names></name> <name><surname>Challabathula</surname> <given-names>D.</given-names></name><etal/></person-group> (<year>2015</year>). <article-title>Single-molecule sequencing of the desiccation-tolerant grass <italic>Oropetium thomaeum</italic>.</article-title> <source><italic>Nature</italic></source> <volume>527</volume> <fpage>508</fpage>&#x2013;<lpage>511</lpage>. <pub-id pub-id-type="doi">10.1038/nature15714</pub-id> <pub-id pub-id-type="pmid">26560029</pub-id></citation></ref>
<ref id="B106"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Varoquaux</surname> <given-names>N.</given-names></name> <name><surname>Liachko</surname> <given-names>I.</given-names></name> <name><surname>Ay</surname> <given-names>F.</given-names></name> <name><surname>Burton</surname> <given-names>J. N.</given-names></name> <name><surname>Shendure</surname> <given-names>J.</given-names></name> <name><surname>Dunham</surname> <given-names>M. J.</given-names></name><etal/></person-group> (<year>2015</year>). <article-title>Accurate identification of centromere locations in yeast genomes using Hi-C.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>43</volume> <fpage>5331</fpage>&#x2013;<lpage>5339</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkv424</pub-id> <pub-id pub-id-type="pmid">25940625</pub-id></citation></ref>
<ref id="B107"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Venegas-Caler&#x00F3;n</surname> <given-names>M.</given-names></name> <name><surname>Ru&#x00ED;z-M&#x00E9;ndez</surname> <given-names>M. V.</given-names></name> <name><surname>Mart&#x00ED;nez-Force</surname> <given-names>E.</given-names></name> <name><surname>Garc&#x00E9;s</surname> <given-names>R.</given-names></name> <name><surname>Salas</surname> <given-names>J. J.</given-names></name></person-group> (<year>2017</year>). <article-title>Characterization of <italic>Xanthoceras sorbifolium</italic> Bunge seeds: Lipids, proteins and saponins content.</article-title> <source><italic>Ind. Crops Product.</italic></source> <volume>109</volume> <fpage>192</fpage>&#x2013;<lpage>198</lpage>. <pub-id pub-id-type="doi">10.1016/j.indcrop.2017.08.022</pub-id></citation></ref>
<ref id="B108"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Walker</surname> <given-names>B. J.</given-names></name> <name><surname>Abeel</surname> <given-names>T.</given-names></name> <name><surname>Shea</surname> <given-names>T.</given-names></name> <name><surname>Priest</surname> <given-names>M.</given-names></name> <name><surname>Abouelliel</surname> <given-names>A.</given-names></name> <name><surname>Sakthikumar</surname> <given-names>S.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>Pilon: An integrated tool for comprehensive microbial variant detection and genome assembly improvement.</article-title> <source><italic>PLoS One</italic></source> <volume>9</volume>:<issue>e112963</issue>. <pub-id pub-id-type="doi">10.1371/journal.pone.0112963</pub-id> <pub-id pub-id-type="pmid">25409509</pub-id></citation></ref>
<ref id="B109"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>C.</given-names></name> <name><surname>Liu</surname> <given-names>C.</given-names></name> <name><surname>Roqueiro</surname> <given-names>D.</given-names></name> <name><surname>Grimm</surname> <given-names>D.</given-names></name> <name><surname>Schwab</surname> <given-names>R.</given-names></name> <name><surname>Becker</surname> <given-names>C.</given-names></name><etal/></person-group> (<year>2015</year>). <article-title>Genome-wide analysis of local chromatin packing in <italic>Arabidopsis thaliana</italic>.</article-title> <source><italic>Genome Res.</italic></source> <volume>25</volume> <fpage>246</fpage>&#x2013;<lpage>256</lpage>. <pub-id pub-id-type="doi">10.1101/gr.170332.113</pub-id> <pub-id pub-id-type="pmid">25367294</pub-id></citation></ref>
<ref id="B110"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>D.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Zhu</surname> <given-names>J.</given-names></name> <name><surname>Yu</surname> <given-names>J.</given-names></name></person-group> (<year>2010</year>). <article-title>KaKs_Calculator 2.0: a toolkit incorporating gamma-series methods and sliding window strategies.</article-title> <source><italic>Genom. Proteom. Bioinform.</italic></source> <volume>8</volume> <fpage>77</fpage>&#x2013;<lpage>80</lpage>. <pub-id pub-id-type="doi">10.1016/S1672-0229(10)60008-3</pub-id></citation></ref>
<ref id="B111"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Q.</given-names></name> <name><surname>Yang</surname> <given-names>L.</given-names></name> <name><surname>Ranjitkar</surname> <given-names>S.</given-names></name> <name><surname>Wang</surname> <given-names>J.-J.</given-names></name> <name><surname>Wang</surname> <given-names>X.-R.</given-names></name> <name><surname>Zhang</surname> <given-names>D.-X.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Distribution and in situ conservation of a relic Chinese oil woody species <italic>Xanthoceras sorbifolium</italic> (yellowhorn).</article-title> <source><italic>Can. J. For. Res.</italic></source> <volume>47</volume> <fpage>1450</fpage>&#x2013;<lpage>1456</lpage>. <pub-id pub-id-type="doi">10.1139/cjfr-2017-0210</pub-id> <pub-id pub-id-type="pmid">33356898</pub-id></citation></ref>
<ref id="B112"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Xu</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>S.</given-names></name> <name><surname>Cao</surname> <given-names>L.</given-names></name> <name><surname>Huang</surname> <given-names>Y.</given-names></name> <name><surname>Cheng</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Genomic analyses of primitive, wild and cultivated citrus provide insights into asexual reproduction.</article-title> <source><italic>Nat. Genet.</italic></source> <volume>49</volume> <fpage>765</fpage>&#x2013;<lpage>772</lpage>. <pub-id pub-id-type="doi">10.1038/ng.3839</pub-id> <pub-id pub-id-type="pmid">28394353</pub-id></citation></ref>
<ref id="B113"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Tang</surname> <given-names>H.</given-names></name> <name><surname>Debarry</surname> <given-names>J. D.</given-names></name> <name><surname>Tan</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name><etal/></person-group> (<year>2012</year>). <article-title>MCScanX: A toolkit for detection and evolutionary analysis of gene synteny and collinearity.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>40</volume>:<issue>e49</issue>. <pub-id pub-id-type="doi">10.1093/nar/gkr1293</pub-id> <pub-id pub-id-type="pmid">22217600</pub-id></citation></ref>
<ref id="B114"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>C.-Q.</given-names></name> <name><surname>Liu</surname> <given-names>H.</given-names></name> <name><surname>Zhou</surname> <given-names>S.-S.</given-names></name> <name><surname>Zhang</surname> <given-names>D.-X.</given-names></name> <name><surname>Zhao</surname> <given-names>W.</given-names></name> <name><surname>Wang</surname> <given-names>S.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>Genome sequence of <italic>Malania oleifera</italic>, a tree with great value for nervonic acid production.</article-title> <source><italic>GigaScience</italic></source> <volume>8</volume>:<issue>giy164</issue>.</citation></ref>
<ref id="B115"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>L.</given-names></name> <name><surname>Bennetzen</surname> <given-names>J. L.</given-names></name></person-group> (<year>2009</year>). <article-title>Distribution, diversity, evolution, and survival of Helitrons in the maize genome.</article-title> <source><italic>Proc. Natl. Acad. Sci. U.S.A.</italic></source> <volume>106</volume> <fpage>19922</fpage>&#x2013;<lpage>19927</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0908008106</pub-id> <pub-id pub-id-type="pmid">19926865</pub-id></citation></ref>
<ref id="B116"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>X.</given-names></name> <name><surname>Hu</surname> <given-names>R.</given-names></name> <name><surname>Yin</surname> <given-names>H.</given-names></name> <name><surname>Jenkins</surname> <given-names>J.</given-names></name> <name><surname>Shu</surname> <given-names>S.</given-names></name> <name><surname>Tang</surname> <given-names>H.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>The Kalancho&#x00EB; genome provides insights into convergent evolution and building blocks of crassulacean acid metabolism.</article-title> <source><italic>Nat. Commun.</italic></source> <volume>8</volume>:<issue>1899</issue>. <pub-id pub-id-type="doi">10.1038/s41467-017-01491-7</pub-id> <pub-id pub-id-type="pmid">29196618</pub-id></citation></ref>
<ref id="B117"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>Z.</given-names></name></person-group> (<year>2007</year>). <article-title>PAML 4: Phylogenetic analysis by maximum likelihood.</article-title> <source><italic>Mol. Biol. Evol.</italic></source> <volume>24</volume> <fpage>1586</fpage>&#x2013;<lpage>1591</lpage>.</citation></ref>
<ref id="B118"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>Z.</given-names></name> <name><surname>Ge</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>W.</given-names></name> <name><surname>Jin</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>L.</given-names></name> <name><surname>Hu</surname> <given-names>W.</given-names></name><etal/></person-group> (<year>2021</year>). <article-title>Cotton D genome assemblies built with long-read data unveil mechanisms of centromere evolution and stress tolerance divergence.</article-title> <source><italic>BMC Biol.</italic></source> <volume>19</volume>:<issue>115</issue>. <pub-id pub-id-type="doi">10.1186/s12915-021-01041-0</pub-id> <pub-id pub-id-type="pmid">34082735</pub-id></citation></ref>
<ref id="B119"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yao</surname> <given-names>Z.</given-names></name> <name><surname>Qi</surname> <given-names>J.</given-names></name> <name><surname>Yin</surname> <given-names>L.</given-names></name></person-group> (<year>2013</year>). <article-title>Biodiesel production from <italic>Xanthoceras sorbifolia</italic> in China: Opportunities and challenges.</article-title> <source><italic>Renew. Sustain. Energy Rev.</italic></source> <volume>24</volume> <fpage>57</fpage>&#x2013;<lpage>65</lpage>. <pub-id pub-id-type="doi">10.1016/j.rser.2013.03.047</pub-id></citation></ref>
<ref id="B120"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>G.</given-names></name> <name><surname>Wang</surname> <given-names>L.</given-names></name> <name><surname>Han</surname> <given-names>Y.</given-names></name> <name><surname>He</surname> <given-names>Q.</given-names></name></person-group> (<year>2012</year>). <article-title>ClusterProfiler: An R package for comparing biological themes among gene clusters.</article-title> <source><italic>OMICS J. Integr. Biol.</italic></source> <volume>16</volume> <fpage>284</fpage>&#x2013;<lpage>287</lpage>. <pub-id pub-id-type="doi">10.1089/omi.2011.0118</pub-id> <pub-id pub-id-type="pmid">22455463</pub-id></citation></ref>
<ref id="B121"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>H.</given-names></name> <name><surname>Fan</surname> <given-names>S.</given-names></name> <name><surname>Bi</surname> <given-names>Q.</given-names></name> <name><surname>Wang</surname> <given-names>S.</given-names></name> <name><surname>Hu</surname> <given-names>X.</given-names></name> <name><surname>Chen</surname> <given-names>M.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Seed morphology, oil content and fatty acid composition variability assessment in yellow horn (<italic>Xanthoceras sorbifolium</italic> Bunge) germplasm for optimum biodiesel production.</article-title> <source><italic>Ind. Crops Product.</italic></source> <volume>97</volume> <fpage>425</fpage>&#x2013;<lpage>430</lpage>.</citation></ref>
<ref id="B122"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Carriero</surname> <given-names>N.</given-names></name> <name><surname>Zheng</surname> <given-names>D.</given-names></name> <name><surname>Karro</surname> <given-names>J.</given-names></name> <name><surname>Harrison</surname> <given-names>P. M.</given-names></name> <name><surname>Gerstein</surname> <given-names>M.</given-names></name></person-group> (<year>2006</year>). <article-title>PseudoPipe: An automated pseudogene identification pipeline.</article-title> <source><italic>Bioinformatics</italic></source> <volume>22</volume> <fpage>1437</fpage>&#x2013;<lpage>1439</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btl116</pub-id> <pub-id pub-id-type="pmid">16574694</pub-id></citation></ref>
<ref id="B123"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhong</surname> <given-names>C. X.</given-names></name> <name><surname>Marshall</surname> <given-names>J. B.</given-names></name> <name><surname>Topp</surname> <given-names>C.</given-names></name> <name><surname>Mroczek</surname> <given-names>R.</given-names></name> <name><surname>Kato</surname> <given-names>A.</given-names></name> <name><surname>Nagaki</surname> <given-names>K.</given-names></name><etal/></person-group> (<year>2002</year>). <article-title><italic>Centromeric retroelements</italic> and satellites Interact with maize kinetochore protein CENH3.</article-title> <source><italic>Plant Cell</italic></source> <volume>14</volume> <fpage>2825</fpage>&#x2013;<lpage>2836</lpage>. <pub-id pub-id-type="doi">10.1105/tpc.006106</pub-id> <pub-id pub-id-type="pmid">12417704</pub-id></citation></ref>
</ref-list>
<fn-group>
<fn id="footnote1">
<label>1</label>
<p><ext-link ext-link-type="uri" xlink:href="https://www.pacb.com/">https://www.pacb.com/</ext-link></p></fn>
<fn id="footnote2">
<label>2</label>
<p><ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/">https://www.ncbi.nlm.nih.gov/</ext-link></p></fn>
<fn id="footnote3">
<label>3</label>
<p><ext-link ext-link-type="uri" xlink:href="https://pmn.plantcyc.org/organism-summary?object=PLANT">https://pmn.plantcyc.org/organism-summary?object=PLANT</ext-link></p></fn>
<fn id="footnote4">
<label>4</label>
<p><ext-link ext-link-type="uri" xlink:href="https://bitbucket.org/nina_h/profrep/wiki/Home">https://bitbucket.org/nina_h/profrep/wiki/Home</ext-link></p></fn>
</fn-group>
</back>
</article>
