<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2023.1248780</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Chromosome-scale assemblies of <italic>S. malaccense, S. aqueum, S. jambos</italic>, and <italic>S. syzygioides</italic> provide insights into the evolution of <italic>Syzygium</italic> genomes</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Ouadi</surname>
<given-names>Sonia</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1228126"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sierro</surname>
<given-names>Nicolas</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1951513"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Kessler</surname>
<given-names>Felix</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/65691"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Ivanov</surname>
<given-names>Nikolai V.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/899070"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Faculty of Sciences, Laboratory of Plant Physiology, University of Neuch&#xe2;tel</institution>, <addr-line>Neuch&#xe2;tel</addr-line>, <country>Switzerland</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Philip Morris International R&amp;D, Philip Morris Products S.A.</institution>, <addr-line>Neuch&#xe2;tel</addr-line>, <country>Switzerland</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Agnieszka Zmienko, Polish Academy of Sciences, Poland</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Pawel Wojciechowski, Pozna&#x144; University of Technology, Poland; Xiaojun Nie, Northwest A&amp;F University, China; Jian-Feng Mao, Beijing Forestry University, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Nikolai V. Ivanov, <email xlink:href="mailto:nikolai.ivanov@unine.ch">nikolai.ivanov@unine.ch</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>06</day>
<month>10</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>14</volume>
<elocation-id>1248780</elocation-id>
<history>
<date date-type="received">
<day>27</day>
<month>06</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>28</day>
<month>08</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Ouadi, Sierro, Kessler and Ivanov</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Ouadi, Sierro, Kessler and Ivanov</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>
<italic>Syzygium</italic> is a large and diverse tree genus in the Myrtaceae family. Genome assemblies for clove (<italic>Syzygium aromaticum</italic>, 370 Mb) and sea apple (<italic>Syzygium grande</italic>, 405 Mb) provided the first insights into the genomic features and evolution of the <italic>Syzygium</italic> genus. Here, we present additional <italic>de novo</italic> chromosome-scale genome assemblies for <italic>Syzygium malaccense, Syzygium aqueum, Syzygium jambos</italic>, and <italic>Syzygium syzygioides.</italic> Genome profiling analyses show that <italic>S. malaccense</italic>, like <italic>S. aromaticum</italic> and <italic>S. grande</italic>, is diploid (2n = 2x = 22), while the <italic>S. aqueum</italic>, <italic>S. jambos</italic>, and <italic>S. syzygioides</italic> specimens are autotetraploid (2n = 4x = 44). The genome assemblies of <italic>S. malaccense</italic> (430 Mb), <italic>S. aqueum</italic> (392 Mb), <italic>S. jambos</italic> (426 Mb), and <italic>S. syzygioides</italic> (431 Mb) are highly complete (BUSCO scores of 98%). Comparative genomics analyses showed conserved organization of the 11 chromosomes with <italic>S. aromaticum</italic> and <italic>S. grande</italic>, and revealed species-specific evolutionary dynamics of the long terminal repeat retrotransposon elements belonging to the Gypsy and Copia lineages. This set of <italic>Syzygium</italic> genomes is a valuable resource for future structural and functional comparative genomic studies on Myrtaceae species.</p>
</abstract>
<kwd-group>
<kwd>
<italic>Syzygium</italic>
</kwd>
<kwd>Myrtaceae</kwd>
<kwd>
<italic>de novo</italic> assembly</kwd>
<kwd>comparative genomics</kwd>
<kwd>synteny</kwd>
<kwd>long terminal repeat retrotransposons</kwd>
</kwd-group>
<contract-sponsor id="cn001">Philip Morris International<named-content content-type="fundref-id">10.13039/100014729</named-content>
</contract-sponsor>
<counts>
<fig-count count="7"/>
<table-count count="2"/>
<equation-count count="0"/>
<ref-count count="73"/>
<page-count count="15"/>
<word-count count="7436"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Plant Systems and Synthetic Biology</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>
<italic>Syzygium</italic> is the largest tree genus with about 1,200 species naturally occurring from the Old World tropics and subtropics to the Pacific (<xref ref-type="bibr" rid="B50">POWO, 2023</xref>; <xref ref-type="bibr" rid="B9">Craven and Biffin, 2010</xref>; <xref ref-type="bibr" rid="B2">Beech et&#xa0;al., 2017</xref>). In addition to their ecological importance, the genus includes several species grown for their edible fruit, medicinal properties, timber, and for the horticulture industry (e.g., <italic>S. malaccense, S. aqueum, S. jambos</italic>, and <italic>S. cumini</italic>), the most important economically being the clove tree (<italic>S. aromaticum)</italic> (<xref ref-type="bibr" rid="B46">Parnell et&#xa0;al., 2007</xref>; <xref ref-type="bibr" rid="B41">Nurdjannah and Bermawie, 2012</xref>; <xref ref-type="bibr" rid="B38">Nair, 2017</xref>; <xref ref-type="bibr" rid="B8">Cock and Cheesman, 2018</xref>).</p>
<p>The <italic>Syzygium</italic> genus belongs to the Myrtaceae family&#x2014;the eighth largest family of flowering plants&#x2014;and includes economically important species such as eucalyptus, myrtle, and guava (<xref ref-type="bibr" rid="B19">Grattapaglia et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B7">Christenhusz and Byng, 2016</xref>; <xref ref-type="bibr" rid="B55">Saber et&#xa0;al., 2023</xref>). Although the majority of species of the Myrtaceae family are diploids (2n = 22) with small to intermediate genome sizes (234&#x2013;1785 Mb), occasional polyploids derived from the most conserved chromosome number x = 11 were also reported (e.g., within the <italic>Eugenia, Syzygium</italic>, and <italic>Psidium</italic> genera) (<xref ref-type="bibr" rid="B69">Wilson, 2010</xref>; <xref ref-type="bibr" rid="B19">Grattapaglia et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B63">Tuler et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B48">Pellicer and Leitch, 2020</xref>; <xref ref-type="bibr" rid="B34">Machado and Forni-Martins, 2022</xref>). The <italic>Eucalyptus grandis</italic> genome was released in 2014 as the first reference genome for the Myrtales order and the Myrtaceae family (<xref ref-type="bibr" rid="B37">Myburg et&#xa0;al., 2014</xref>). New chromosome-scale assemblies were subsequently published, enabling comparative genomics analyses within the family. Published chromosome-scale genome assemblies for the Myrtaceae currently represent major tribes of the family: Eucalypteae (<italic>Eucalyptus grandis, Corymbia citriodora</italic>, <italic>Eucalyptus urophylla &#xd7; Eucalyptus grandis</italic>), Leptospermeae (<italic>Leptospermum scoparium</italic>), Myrteae (<italic>Psidium guajava</italic>, <italic>Rhodomyrtus tomentosa</italic>), Metrosidereae (<italic>Metrosideros polymorpha</italic>), Melaleuceae (<italic>Melaleuca alternifolia</italic>), and Syzygieae (<italic>S. aromaticum, Syzygium grande</italic>). These assemblies were generated from diploid specimens, and their size ranged from 297 Mb to 690 Mb (<xref ref-type="bibr" rid="B37">Myburg et&#xa0;al., 2014</xref>; <xref ref-type="bibr" rid="B24">Izuno et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B62">Thrimawithana et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B13">Feng et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B21">Healey et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B33">Low et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B44">Ouadi et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B71">Zheng et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B31">Li et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B58">Shen et&#xa0;al., 2023</xref>).</p>
<p>The clove (<italic>S. aromaticum</italic> (L.) Merr. &amp; L.M. Perry) and sea apple (<italic>S. grande</italic>) genomes were constructed using a combination of Oxford Nanopore Technologies long-reads and Illumina short-reads and anchored on 11 chromosomes using Hi-C technologies (<xref ref-type="bibr" rid="B33">Low et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B44">Ouadi et&#xa0;al., 2022</xref>). The sea apple genome assembly (405 Mb), 182 re-sequenced <italic>Syzygium</italic> species and 58 re-sequenced unidentified taxa were used to generate whole genome-level phylogenies of the <italic>Syzygium</italic> genus, thus providing new insights into the infrageneric classification of <italic>Syzygium</italic>, as well as into the genus diversification patterns and their drivers. The clove genome assembly (370 Mb) was exploited to investigate the genetic basis of the biosynthesis of eugenol, the major biocompound of clove products (<xref ref-type="bibr" rid="B25">Kamatou et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B43">Otunola, 2022</xref>). To provide insights into the clove genome evolution, comparative genomics analyses were also performed between <italic>S. aromaticum</italic> and <italic>E. grandis</italic>. The synteny analysis performed between these two Myrtaceae species&#x2019; genomes assemblies revealed good genome structure conservation. The structures of chromosomes 1, 3, 5, and 7 were found to be highly conserved between <italic>E. grandis</italic> and <italic>S. aromaticum</italic>, and 10 intrachromosomal rearrangements occurring on the 7 other chromosomes were observed (chromosomes 2, 4, 6, 8, 9, 10, and 11). Interestingly, the intrachromosomal rearrangements detected between the two eucalypt species, <italic>E. grandis</italic> and <italic>C. citriodora</italic>, were located on the same seven chromosomes (<xref ref-type="bibr" rid="B4">Butler et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B21">Healey et&#xa0;al., 2021</xref>). Long terminal repeat retrotransposons (LTR-RTs) are transposable elements (TEs) that move through the genome via a copy-and-paste mechanism using an RNA intermediate. They are considered the most abundant TE component in plant genomes and important drivers of genome size variation and diversification (<xref ref-type="bibr" rid="B68">Wicker et&#xa0;al., 2007</xref>; <xref ref-type="bibr" rid="B73">Zhou et&#xa0;al., 2021</xref>). Comparing the LTR-RTs repertoires of <italic>S. aromaticum</italic> and <italic>E. grandis</italic> revealed a differential accumulation of the LTR-RTs belonging to the superfamilies Copia and Gypsy between the two species. In <italic>S. aromaticum</italic> genome assembly, the LTR-RTs belonging to the Gypsy superfamily were more abundant than those belonging to the Copia superfamily. In contrast, a higher number of LTR-RTs Copia versus Gypsy was found in the <italic>E. grandis</italic> genome assembly.</p>
<p>No infrageneric comparison of chromosome-scale assemblies has been performed for the <italic>Syzygium</italic> genus. To further investigate the evolution of the genome architecture of <italic>Syzygium</italic> species and verify whether the rearrangements found between <italic>S. aromaticum</italic> and <italic>E. grandis</italic> chromosomes were the consequences of evolutionary events or due to sequencing and assembly artifacts, we generated additional chromosome-scale genome assemblies for <italic>Syzygium malaccense</italic> (L.) Merr. &amp; L.M. Perry, <italic>Syzygium aqueum</italic> (Burm.f.) Alston, <italic>Syzygium jambos</italic> (L.) Alston, and <italic>Syzygium syzygioides</italic> (Miq.) Merr. &amp; L.M. Perry. Like <italic>S. aromaticum</italic> and <italic>S. grande</italic>, the four species belong to the subgenus <italic>Syzygium</italic>, the largest of the five <italic>Syzygium</italic> subgenera for which the crown age was estimated at 9.4 Mya by Low et&#xa0;al. (<xref ref-type="bibr" rid="B33">Low et&#xa0;al., 2022</xref>). Previous karyotype studies indicated that <italic>S. malaccense</italic> is a diploid with 2n = 22 chromosomes (<xref ref-type="bibr" rid="B47">Pedrosa et&#xa0;al., 1999</xref>) and that <italic>S. jambos</italic> is a tetraploid (2n = 44); however, different chromosome numbers were also reported in the literature for the species (2n = 28, 33, 46, ~54, 66) (<xref ref-type="bibr" rid="B65">Van Lingen, 1991</xref>; <xref ref-type="bibr" rid="B42">Oginuma et&#xa0;al., 1993</xref>). The chromosomal numbers reported in the literature indicate that <italic>S. aqueum</italic> is also a tetraploid (2n = 44) (<xref ref-type="bibr" rid="B45">Panggabean, 1991</xref>).</p>
<p>Here, we describe the <italic>de novo</italic> assembly and annotation for <italic>S. malaccense, S. aqueum</italic>, <italic>S. jambos</italic>, and <italic>S. syzygioides.</italic> To enable subsequent comparative genomic analyses, the four genomes consisting of monoploid consensus (11 chromosomes and unplaced sequences) were generated to achieve the same level of quality for the four species&#x2019; genome assemblies and comparable to those of published chromosome-scale assemblies of their Myrtaceae relatives. Then, we compared the genome architecture of the four newly <italic>Syzygium</italic> assembled genomes with those of <italic>S. aromaticum</italic> and <italic>S. grande</italic> and their genome features (gene sets and LTR-RTs repertoires) with those of <italic>S. aromaticum</italic> to investigate genomic evolution from their common ancestors.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Biological materials</title>
<p>The <italic>S. malaccense</italic>, <italic>S. aqueum</italic>, <italic>S. jambos</italic>, and <italic>S. syzygioides</italic> genome assemblies were generated from trees growing in the Masoala Hall of the Zurich Zoo in Switzerland. Voucher specimens were deposited in the Z&#xfc;rich herbarium (<italic>S. malaccense</italic> (ZT-00170996), <italic>S. aqueum</italic> (ZT-00170994), <italic>S. jambos</italic> (ZT-00170999), and <italic>S. syzygioides</italic> (ZT-00170991)). Samples collected from the trees were stored at -80&#xb0;C until nucleic acid extraction.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>DNA and RNA isolation</title>
<p>High-molecular-weight genomic DNA was isolated from frozen leaves using the &#x201c;ONT high-molecular-weight gDNA extraction from plant leaves&#x201d; protocol (Oxford Nanopore Technologies, Oxford, UK). Following the extraction, we performed a size selection step using the Circulomics Nanobind Plant Nuclei Big DNA Kit from PacBio (Menlo Park, CA, USA). (NB-900-801-001).</p>
<p>Total RNA from <italic>S. malaccense</italic>, <italic>S. aqueum</italic>, <italic>S. jambos</italic>, and <italic>S. syzygioides</italic> were isolated in triplicate from whole leaves (young and mature), lamina of mature leaves, and stems. Total RNA was also isolated in triplicate from <italic>S. syzygioides</italic>&#x2019; buds (in the fruiting stage) and <italic>S. jambos</italic>&#x2019; buds (before and after flowering) and flowers.</p>
<p>Total RNA was extracted from frozen powder using Ambion PureLink Plant RNA Reagent (Ambion by Life Technologies, Carlsbad, CA, USA). The concentration and quality of the total RNA were assessed with an Agilent Bioanalyzer using the Agilent RNA 6000 Nano Kit (Agilent, Santa Clara, CA, USA).</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Illumina sequencing library preparation and sequencing</title>
<p>DNAseq libraries were prepared from total gDNA using the Celero PCR workflow with an enzymatic fragmentation kit from Tecan (M&#xe4;nnedorf, Switzerland). DNAseq libraries were loaded on an Illumina S2 flow cell and sequenced on the Illumina Novaseq 6000 instrument (Illumina, San Diego, CA, USA) as 2 x 151 bp paired-end reads.</p>
<p>Hi-C libraries were prepared from 0.2&#xa0;g of frozen leaves using the Proximo Hi-C Kit following the manufacturer&#x2019;s instructions (Phase Genomics, Seattle, WA, USA) and sequenced on an Illumina HiSeq 4000 instrument (Illumina) as 2 x 151 bp paired-end reads.</p>
<p>mRNA stranded libraries were prepared from 500 ng of total RNA using the Tecan Universal Plus mRNA-Seq library preparation kit with NuQuant<sup>&#xae;</sup> and sequenced on an Illumina HiSeq 4000 instrument as 2 x 151 bp paired-end reads.</p>
<p>Illumina raw reads generated from DNAseq libraries and Hi-C libraries were cleaned using fastp 0.23.2 (--length_required 75 --low_complexity_filter) (<xref ref-type="bibr" rid="B5">Chen et&#xa0;al., 2018</xref>).10.1038/s41597-021-00968-x</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>ONT sequencing library preparation and sequencing</title>
<p>Sequencing libraries were generated from high-molecular-weight gDNA and prepared for sequencing on PromethION flow cells (FLO-R0002) by using the ligation sequencing (SQK-LSK109) and flow cell priming (EXP-FLP002) kits (Oxford Nanopore Technologies, Oxford, UK). The base calling was performed by using Guppy 6.1.1 and the super accuracy plant model. Raw ONT reads were cleaned using seqkit 2.2.0 (--min-qual 9 --min-len 5000) (<xref ref-type="bibr" rid="B57">Shen et&#xa0;al., 2016</xref>) to discard reads shorter than 5,000 bp or with quality scores lower than 9.</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Genome profiling</title>
<p>Cleaned Illumina paired-end reads from DNAseq libraries were analyzed by GenomeScope 2.0 and smudgeplot 0.2.4 to estimate the genome size, percentage of heterozygosity, and the ploidy level using a k-mer size equal to 21 (<xref ref-type="bibr" rid="B53">Ranallo-Benavidez et&#xa0;al., 2020</xref>).</p>
</sec>
<sec id="s2_6">
<label>2.6</label>
<title>
<italic>De Novo</italic> genome assembly</title>
<p>ONT cleaned reads were corrected with fmlrc2 0.1.7 (--cache_size 13 &#x2013;K 21 59 79) (<xref ref-type="bibr" rid="B35">Mak et&#xa0;al., 2023</xref>) using cleaned Illumina paired-end short-reads from DNAseq libraries. The corrected ONT reads were then assembled using flye 2.9 (--read-error 0.01 --nano-hq) (<xref ref-type="bibr" rid="B26">Kolmogorov et&#xa0;al., 2019</xref>) and iteratively polished with ntedit 1.3.5 (-m 2 -i 3 -d 3 -X 0.5 -Y 0.5) using kmer profiles created with nthits 0.0.1 (--solid --outbloom -b 36) for kmers of lengths 60, 50, 40 and 30 (<xref ref-type="bibr" rid="B67">Warren et&#xa0;al., 2019</xref>) using Illumina paired-end short reads from DNAseq libraries. Haplotigs were detected and removed from the polished contigs using purge_dups 1.2.5 (<xref ref-type="bibr" rid="B20">Guan et&#xa0;al., 2020</xref>) using cutoff of 10, 315 and 645 for <italic>S. malaccense</italic>, 70, 440 and 960 for <italic>S. aqueum</italic>, and 60, 410, 960 for <italic>S. jambos</italic>, 10, 410 and 960 for <italic>S. syzygioides</italic>.</p>
<p>Cleaned Illumina read pairs generated from Hi-C libraries were mapped to the genomes to remove reads with low mapping scores, duplicated reads, and paired-end reads. Illumina Hi-C read pairs were mapped to the haplotig-purged contigs using minimap2 2.24 (<xref ref-type="bibr" rid="B30">Li, 2018</xref>) rather than bwa (<xref ref-type="bibr" rid="B29">Li, 2013</xref>) since we noticed that it results in assemblies of equivalent qualities in a shorter time. The scaffolding to a chromosome-scale assembly was performed using yahs 1.1a2 (-r 1000,2000,5000,10000,20000,50000,100000,200000,500000,1000000,2000000,5000000) (<xref ref-type="bibr" rid="B72">Zhou et&#xa0;al., 2022</xref>). Hi-C map files were generated with PretextMap 0.1.9 (<ext-link ext-link-type="uri" xlink:href="https://github.com/wtsi-hpag/PretextMap">https://github.com/wtsi-hpag/PretextMap</ext-link>) and used to manually curate the assemblies using PretextView 0.2.5 (<ext-link ext-link-type="uri" xlink:href="https://github.com/wtsi-hpag/PretextView">https://github.com/wtsi-hpag/PretextView</ext-link>).</p>
<p>The curated genome assemblies were mapped to the <italic>S. aromaticum</italic> genome (<xref ref-type="bibr" rid="B44">Ouadi et&#xa0;al., 2022</xref>) using minimap2 2.24, visualized using a custom R script, and the orientation and names of the chromosomes were set in accordance with those of <italic>S. aromaticum</italic>. Chromosome-scale assembly completeness was assessed by using the genome evaluation mode of BUSCO 5.4.4 and the eudicots_odb10 lineage dataset (<xref ref-type="bibr" rid="B60">Sim&#xe3;o et&#xa0;al., 2015</xref>). The QVs of the final assemblies were estimated using yak 0.1 (qv -K 2000000000) with kmer profiles created using yak 0.1 (count -k 31 -K 2000000000 -b37) (<xref ref-type="bibr" rid="B6">Cheng et&#xa0;al., 2021</xref>).</p>
</sec>
<sec id="s2_7">
<label>2.7</label>
<title>Gene annotation</title>
<p>The Illumina RNAseq reads from <italic>S. malaccense</italic>, <italic>S. aqueum</italic>, <italic>S. jambos</italic> and <italic>S. syzygioides</italic> as well as those used for the clove genome annotation were cleaned, and overlapping paired-reads were merged using fastp 0.23.2 (--length_required 75 --low_complexity_filter --merge) (<xref ref-type="bibr" rid="B5">Chen et&#xa0;al., 2018</xref>) before being mapped as single cDNA reads to the assemblies using minimap2 2.24 (-ax splice:hq -G5K -N50) (<xref ref-type="bibr" rid="B30">Li, 2018</xref>). Gene models were then created for each RNASeq sample using scallop 0.10.5 (--min_transcript_coverage 5 --min_single_exon_coverage 50 --min_splice_bundary_hits 5 --min_mapping_quality 0) (<xref ref-type="bibr" rid="B56">Shao and Kingsford, 2017</xref>).This approach was used for the annotation of the clove genome, where it was observed to produce better gene models than by directly mapping paired-reads with a dedicated mapper.</p>
<p>To obtain models for genes that are not expressed in the RNAseq samples, the transcripts from <italic>S. aromaticum</italic> and <italic>E. grandis</italic> gene annotations were mapped to the assemblies using minimap2 2.24 (-ax splice:hq -I5G -G5K -N50 -uf) (<xref ref-type="bibr" rid="B30">Li, 2018</xref>), and gene models created using bedtools 2.30.0 (bamtobed -bed12) (<xref ref-type="bibr" rid="B52">Quinlan and Hall, 2010</xref>) and custom gawk scripts to convert the obtained bed file into a gtf file.</p>
<p>The final gene models were obtained by merging the RNAseq, <italic>S. aromaticum</italic>, and <italic>E. grandis</italic> gene models using taco 0.7.3 (--gtf-expr-attr TPM --filter-min-expr 10) (<xref ref-type="bibr" rid="B40">Niknafs et&#xa0;al., 2017</xref>) and adding coding sequences using Transdecoder 5.5.0 (LongOrfs -S -m 64; Predict --single_best_only --retain_blastp_hits dmd.tsv) (<ext-link ext-link-type="uri" xlink:href="https://github.com/TransDecoder/TransDecoder/wiki">https://github.com/TransDecoder/TransDecoder/wiki</ext-link>), diamond 2.0.15 (blastp --query longest_orfs.pep --db uniref-malvids.dmnd --max-target-seqs 1 --outfmt 6 --evalue 1e-6) (<xref ref-type="bibr" rid="B3">Buchfink et&#xa0;al., 2015</xref>) and gffread 0.12.7 (<xref ref-type="bibr" rid="B49">Pertea and Pertea, 2020</xref>).</p>
<p>The eudicotyledons portion of UniProt filtered to remove proteins with poor descriptions was used to annotate the gene models with their best hit using diamond 2.0.15 (blastx --query tx.fa --db eudicotyledons.filtered.dmnd --top 10 --min-score 200 --ultra-sensitive --iterate). The illustration of the regions where genes encoding for putative eugenol synthase were predicted was generated using gggenes 0.4.0 (<ext-link ext-link-type="uri" xlink:href="https://github.com/wilkox/gggenes">https://github.com/wilkox/gggenes</ext-link>).</p>
</sec>
<sec id="s2_8">
<label>2.8</label>
<title>Repeat annotation</title>
<p>Annotation of transposable elements was carried out using TE-greedy-nester 1.0.0 (--discovery_tool LTRharvest) (<xref ref-type="bibr" rid="B28">Lexa et&#xa0;al., 2020</xref>), genometools LTRharvest 1.6.2 (<xref ref-type="bibr" rid="B11">Ellinghaus et&#xa0;al., 2008</xref>) and TEsorter 1.3.0 (-db rexdb-plant --min-coverage 10 --max-evalue 0.01 --pass2-rule 70-30-80) (<xref ref-type="bibr" rid="B70">Zhang et&#xa0;al., 2022</xref>) with REXdb (<xref ref-type="bibr" rid="B39">Neumann et&#xa0;al., 2019</xref>). The insertion age of the predicted transposable elements was then calculated as previously reported (<xref ref-type="bibr" rid="B36">Marcon et&#xa0;al., 2015</xref>). In addition, Red 2.0 (<xref ref-type="bibr" rid="B16">Girgis, 2015</xref>), GRF 1.0 (<xref ref-type="bibr" rid="B59">Shi and Liang, 2019</xref>) and cd-hit 4.8.1 (grf-main -i genome.fa -c 1 -o genome.MITE --min_tr 10; cd-hit-est -i genome.MITE/candidate.fasta -o genome.MITE/clusteredCandidate.fasta -c 0.90 -n 5 -d 0 -aL 0.99 -s 0.8 -M 0; grf-mite-cluster -i genome.MITE/clusteredCandidate.fasta.clstr -g genome.fa -o genome.MITE) (<xref ref-type="bibr" rid="B15">Fu et&#xa0;al., 2012</xref>), EAHelitron (<xref ref-type="bibr" rid="B22">Hu et&#xa0;al., 2019</xref>), and tantan 39 (-f4) (<xref ref-type="bibr" rid="B14">Frith, 2011</xref>) were used to predict repeats, Miniature Inverted-repeat Transposable Elements (MITEs), helitron, and tandem repeats, respectively.</p>
</sec>
<sec id="s2_9">
<label>2.9</label>
<title>Synteny analyses</title>
<p>Synteny between the <italic>Syzygium</italic> species was done by pairwise mapping whole genomes using minimap2 2.24 (<xref ref-type="bibr" rid="B30">Li, 2018</xref>), identifying structural variants using syri 1.6 (<xref ref-type="bibr" rid="B18">Goel et&#xa0;al., 2019</xref>), and plotting syntenic blocks larger than 20 kb using plotsr 0.5.4 (<xref ref-type="bibr" rid="B17">Goel and Schneeberger, 2022</xref>).</p>
</sec>
<sec id="s2_10">
<label>2.10</label>
<title>Orthologue analyses</title>
<p>Orthologous genes were clustered into HOGs with OrthoFinder 2.5.4 (<xref ref-type="bibr" rid="B12">Emms and Kelly, 2019</xref>) using the set of predicted protein sequences from the five species assemblies.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Genome profiling</title>
<p>Smudgeplot and GenomeScope 2.0 were used to perform a genome profiling step using Illumina PE short-reads from DNAseq libraries as input and a K-mer length of 21 bp (<xref ref-type="bibr" rid="B53">Ranallo-Benavidez et&#xa0;al., 2020</xref>) (<xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>; <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;1</bold>
</xref>; <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figures&#xa0;1</bold>
</xref>, <xref ref-type="supplementary-material" rid="SM1">
<bold>2</bold>
</xref>). The ploidy level predicted by Smudgeplot was in accordance with previous karyotype studies for the studied <italic>S. malaccense</italic> and <italic>S. jambos</italic> specimens (<xref ref-type="bibr" rid="B42">Oginuma et&#xa0;al., 1993</xref>; <xref ref-type="bibr" rid="B47">Pedrosa et&#xa0;al., 1999</xref>). <italic>S. malaccense</italic> was predicted to be a diploid specimen (2n = 2x = 22) like <italic>S. aromaticum</italic> and <italic>S. grande</italic>. The <italic>S. aqueum</italic>, <italic>S. jambos</italic>, and  <italic>S. syzygioides</italic> specimens were predicted as being autotetraploid (2n = 4x = 44). The estimated monoploid genome sizes were similar among the four <italic>Syzygium</italic> species (343&#x2013;372 Mb), a size range consistent with the small genome assembly sizes of <italic>S. aromaticum</italic> (370 Mb) and <italic>S. grande</italic> (405 Mb) (<xref ref-type="bibr" rid="B33">Low et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B44">Ouadi et&#xa0;al., 2022</xref>). The heterozygosity rate estimated by the GenomeScope 2.0 ranged from 2.3% for the diploid specimen <italic>S. malaccense</italic> to 4.3% for the autotetraploid specimen <italic>S. aqueum.</italic> These heterozygosity rates appeared to be higher than for <italic>S. aromaticum</italic> (0.18%) (<xref ref-type="bibr" rid="B44">Ouadi et&#xa0;al., 2022</xref>) and the average reported by Ellestad et&#xa0;al., who performed a literature review of the genome-wide heterozygosity values estimated using the software GenomeScope and GenomeScope 2.0 (<xref ref-type="bibr" rid="B10">Ellestad et&#xa0;al., 2022</xref>). They found that the average value inferred for all plant species assessed was 1.59% (1.10% for diploid plants only) noting that over half of the plant species considered were cultivated for human usage, which could affect the average value accuracy.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Genome profiling summary.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left"/>
<th valign="middle" align="center">
<italic>S. aromaticum</italic> (<xref ref-type="bibr" rid="B44">Ouadi et&#xa0;al., 2022</xref>)</th>
<th valign="middle" align="center">
<italic>S. malaccense</italic>
</th>
<th valign="middle" align="center">
<italic>S. aqueum</italic>
</th>
<th valign="middle" align="center">
<italic>S. jambos</italic>
</th>
<th valign="middle" align="center">
<italic>S. syzygioides</italic>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="right">Predicted ploidy</td>
<td valign="top" align="center">2n = 2x = 22</td>
<td valign="top" align="center">2n = 2x = 22</td>
<td valign="top" align="center">2n = 4x = 44</td>
<td valign="top" align="center">2n = 4x = 44</td>
<td valign="top" align="center">2n = 4x = 44</td>
</tr>
<tr>
<td valign="top" align="right">Estimated genome (1x) size</td>
<td valign="top" align="center">343 Mb</td>
<td valign="top" align="center">372 Mb</td>
<td valign="top" align="center">345 Mb</td>
<td valign="top" align="center">361 Mb</td>
<td valign="top" align="center">372 Mb</td>
</tr>
<tr>
<td valign="top" align="right">Estimated heterozygosity rate</td>
<td valign="top" align="center">0.18%</td>
<td valign="top" align="center">2.30%</td>
<td valign="top" align="center">4.30%</td>
<td valign="top" align="center">3.60%</td>
<td valign="top" align="center">4.10%</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Genome <italic>De Novo</italic> assembly</title>
<p>The four <italic>de novo</italic> chromosome-scale assemblies were constructed using long-reads from Oxford Nanopore Technologies (ONT), short paired-end reads from Illumina DNAseq libraries, and Hi-C libraries generated for each <italic>Syzygium</italic> species (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Tables&#xa0;1, 2</bold>
</xref>).</p>
<p>To prevent assembly artifacts possibly caused by heterozygosity and polyploidy of the <italic>Syzygium</italic> specimens, haplotigs were detected and removed from the polished contigs. The effect of the haplotig removal step was assessed using BUSCO (Benchmarking Universal Single-Copy Orthologs) in genome mode (<xref ref-type="bibr" rid="B60">Sim&#xe3;o et&#xa0;al., 2015</xref>). After the haplotig removal step, the number of complete and duplicated BUSCOs genes was considerably reduced in the haplotig-purged contigs (3.3% to 6.1%) when compared to the polished contigs (93.6% to 97.1%) (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1A</bold>
</xref>). Hi-C data enabled the scaffolding of contigs into 11 chromosomes. On the Hi-C contact matrices, a strong intra-chromosomal signal indicates efficient scaffolding, with the 11 chromosomes of each <italic>Syzygium</italic> assembly supported by a high number of their respective Hi-C reads (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1B</bold>
</xref>).</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Assessment of the efficiency of the haplotig removal step and Hi-C scaffolding. <bold>(A)</bold> BUSCO completeness score comparison of the polished contigs before and after the haplotig removal step for <italic>S. malaccense</italic> (Smal), <italic>S. aqueum</italic> (Saqu), <italic>S. jambos</italic> (Sjam), and <italic>S. syzygioides</italic> (Ssyz) (BUSCO version 5.4.4 - dataset: eudicots_odb10 (n = 2326)). <bold>(B)</bold> Hi-C contact maps showing the Hi-C interactions among the 11 assembled chromosomes and unplaced scaffolds (un) for each species.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1248780-g001.tif"/>
</fig>
<p>The final chromosome-scale assemblies for <italic>S. malaccense</italic> (430 Mb), <italic>S. aqueum</italic> (392 Mb), <italic>S. jambos</italic> (426 Mb), and <italic>S. syzygioides</italic> (431 Mb) consisted of monoploid consensus (11 chromosomes and unplaced sequences) with comparable quality metrics. A high level of quality at the base-scale (quality value [QV] between 44.006 and 45.114), of contiguity (97.5% to 99.8% of the assemblies length anchored on 11 chromosomes) and completeness (BUSCO complete genes scores of 98%) was reached for the four new assembled <italic>Syzygium</italic> genomes (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>; <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>; <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Tables&#xa0;3, 4</bold>
</xref>).</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>BUSCO completeness assessment. Assessment of the final genome assembly, transcript set, and protein set of <italic>S. aromaticum</italic> (Saro), <italic>S. malaccense</italic> (Smal), <italic>S. aqueum</italic> (Saqu), <italic>S. jambos</italic> (Sjam), and <italic>S. syzygioides</italic> (Ssyz) (BUSCO version 5.4.4 - dataset: eudicots_odb10 (n = 2326)).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1248780-g002.tif"/>
</fig>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Assembly and annotation statistics.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left"/>
<th valign="middle" align="center">
<italic>S. malaccense</italic>
</th>
<th valign="middle" align="center">
<italic>S. aqueum</italic>
</th>
<th valign="middle" align="center">
<italic>S. jambos</italic>
</th>
<th valign="middle" align="center">
<italic>S. syzygioides</italic>
</th>
</tr>
</thead>
<tbody>
<tr>
<th valign="middle" align="left">Assembly</th>
<th valign="middle" align="center"/>
<th valign="middle" align="center"/>
<th valign="middle" align="center"/>
<th valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">Number of scaffolds</td>
<td valign="middle" align="center">23</td>
<td valign="middle" align="center">54</td>
<td valign="middle" align="center">117</td>
<td valign="middle" align="center">101</td>
</tr>
<tr>
<td valign="middle" align="left">Number of chromosome-scale scaffolds</td>
<td valign="middle" align="center">11</td>
<td valign="middle" align="center">11</td>
<td valign="middle" align="center">11</td>
<td valign="middle" align="center">11</td>
</tr>
<tr>
<td valign="middle" align="left">Proportion of undetermined bases (N)</td>
<td valign="middle" align="center">0.01%</td>
<td valign="middle" align="center">0.01%</td>
<td valign="middle" align="center">0.02%</td>
<td valign="middle" align="center">0.02%</td>
</tr>
<tr>
<td valign="middle" align="left">QV<sup>1</sup> of the assembly</td>
<td valign="middle" align="center">45.114</td>
<td valign="middle" align="center">44.006</td>
<td valign="middle" align="center">44.028</td>
<td valign="middle" align="center">44.292</td>
</tr>
<tr>
<td valign="middle" align="left">Length of assembly (bp)</td>
<td valign="middle" align="center">429,836,287</td>
<td valign="middle" align="center">391,897,832</td>
<td valign="middle" align="center">426,159,599</td>
<td valign="middle" align="center">431,079,378</td>
</tr>
<tr>
<td valign="middle" align="left">Length of chromosome-scale scaffolds (bp)</td>
<td valign="middle" align="center">429,008,219</td>
<td valign="middle" align="center">386,536,673</td>
<td valign="middle" align="center">415,622,982</td>
<td valign="middle" align="center">424,827,227</td>
</tr>
<tr>
<th valign="middle" align="left">Gene annotation</th>
<th valign="middle" align="center"/>
<th valign="middle" align="center"/>
<th valign="middle" align="center"/>
<th valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">Number of predicted genes</td>
<td valign="middle" align="center">30,842</td>
<td valign="middle" align="center">29,879</td>
<td valign="middle" align="center">31,611</td>
<td valign="middle" align="center">32,142</td>
</tr>
<tr>
<td valign="middle" align="left">Number of predicted transcripts</td>
<td valign="middle" align="center">57,144</td>
<td valign="middle" align="center">55,010</td>
<td valign="middle" align="center">57,897</td>
<td valign="middle" align="center">59,495</td>
</tr>
<tr>
<td valign="middle" align="left">Average transcript length (bp)</td>
<td valign="middle" align="center">2010.89</td>
<td valign="middle" align="center">2008.09</td>
<td valign="middle" align="center">1991.19</td>
<td valign="middle" align="center">2007.31</td>
</tr>
<tr>
<td valign="middle" align="left">Average CDS<sup>2</sup> length (bp)</td>
<td valign="middle" align="center">1122.42</td>
<td valign="middle" align="center">1124.09</td>
<td valign="middle" align="center">1116.93</td>
<td valign="middle" align="center">1100.23</td>
</tr>
<tr>
<td valign="middle" align="left">Average exon per transcript</td>
<td valign="middle" align="center">5.62</td>
<td valign="middle" align="center">5.67</td>
<td valign="middle" align="center">5.59</td>
<td valign="middle" align="center">5.66</td>
</tr>
<tr>
<th valign="middle" align="left">Repeat annotation</th>
<th valign="middle" align="center"/>
<th valign="middle" align="center"/>
<th valign="middle" align="center"/>
<th valign="middle" align="center"/>
</tr>
<tr>
<td valign="middle" align="left">Repeat sequences (bp)</td>
<td valign="middle" align="center">184,916,857<break/>(43.02%)</td>
<td valign="middle" align="center">162,020,435<break/>(41.34%)</td>
<td valign="middle" align="center">180,563,593<break/>(42.37%)</td>
<td valign="middle" align="center">184,003,101<break/>(42.68%)</td>
</tr>
<tr>
<td valign="middle" align="left">LTR<sup>3</sup> retrotransposons (bp)</td>
<td valign="middle" align="center">96,086,564<break/>(22.35%)</td>
<td valign="middle" align="center">74,914,968<break/>(19.12%)</td>
<td valign="middle" align="center">77,407,268<break/>(18.16%)</td>
<td valign="middle" align="center">73,171,928<break/>(16.97%)</td>
</tr>
<tr>
<td valign="middle" align="left">LTR Gypsy (bp)</td>
<td valign="middle" align="center">62,668,430<break/>(14.58%)</td>
<td valign="middle" align="center">48,141,612<break/>(12.28%)</td>
<td valign="middle" align="center">45,090,450<break/>(10.58%)</td>
<td valign="middle" align="center">40,369,474<break/>(9.36%)</td>
</tr>
<tr>
<td valign="middle" align="left">LTR Copia (bp)</td>
<td valign="middle" align="center">31,769,467<break/>(7.39%)</td>
<td valign="middle" align="center">25,148,956<break/>(6.42%)</td>
<td valign="middle" align="center">30,040,740<break/>(7.05%)</td>
<td valign="middle" align="center">30,532,813<break/>(7.08%)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<sup>1</sup> QV, Quality value.</p>
</fn>
<fn>
<p>
<sup>2</sup>CDS, Coding sequence.</p>
</fn>
<fn>
<p>
<sup>3</sup>LTR, Long Terminal Repeat.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Despite their high heterozygosity rate, the quality metrics for the genome assemblies of the diploid specimen <italic>S. malaccense</italic> and the autotetraploids <italic>S. aqueum</italic>, <italic>S. jambos</italic>, and <italic>S. syzygioides</italic> were comparable to those reported for <italic>S. aromaticum</italic> assembly (370 Mb) (<xref ref-type="bibr" rid="B44">Ouadi et&#xa0;al., 2022</xref>). Nevertheless, BUSCO scores revealed a higher percentage of complete and duplicated BUSCOs in the four new assemblies compared to <italic>S</italic>. <italic>aromaticum</italic> (2.2%), principally in the genome assembly of the three autotetraploid specimens (3.3% to 5.5%) (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>).</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Genome annotation</title>
<p>The average number of protein-coding genes predicted for the four newly assembled genomes is 31,119, representing 26.52% of the genome assemblies&#x2019; size (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>).</p>
<p>The annotation completeness was assessed using the BUSCO method in transcriptome and protein modes and by selecting the whole set of predicted transcripts and proteins for each gene as inputs, respectively (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>; <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;3</bold>
</xref>). BUSCO results indicated that the annotation completeness is comparable among the four newly assembled <italic>Syzygium</italic> species, with complete BUSCO scores ranging from 91.9% in <italic>S. aqueum</italic> assembly to 93.5% in <italic>S. malaccense</italic> assembly in transcript mode and from 89.3% in <italic>S. aqueum</italic> assembly to 90.9% in <italic>S. malaccense</italic> assembly in protein mode. BUSCO scores obtained for <italic>S. aromaticum</italic> by using the same assessment methods (95% in transcriptome mode and 93.7% in protein mode) were slightly superior to those of newly assembled genomes but still comparable. The loss of complete BUSCOs between the genome and protein mode assessments ranged from 7.2% in <italic>S. malaccense</italic> assembly to 8.7% in <italic>S. aqueum</italic> assembly, indicating acceptable quality of the predicted gene models and protein sets.</p>
<p>The genome assembly of <italic>S. aromaticum</italic> comprised multiple copies of a gene encoding for putative eugenol synthase (EGS), the enzyme that catalyzes the synthesis of eugenol from coniferyl acetate. In total, 15 copies split into 2 loci were reported: a first locus on chromosome 10 comprising 14 copies and a second locus on chromosome 11 with 1 copy (<xref ref-type="bibr" rid="B44">Ouadi et&#xa0;al., 2022</xref>). The functional annotation of the four newly assembled <italic>Syzygium</italic> species genomes revealed fewer genes encoding for putative EGS. One gene encoding for putative EGS was identified in the genome assembly of <italic>S. malaccense</italic>, two in the genome assembly of <italic>S. aqueum</italic>, and three copies were found in the genome assemblies of <italic>S. jambos</italic> and <italic>S. syzygioides</italic>. All putative EGS genes were located on chromosome 10 except for one of the three copies of <italic>S. syzygioides</italic> located on chromosome 11 (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>; <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;5</bold>
</xref>).</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Illustration of the regions of chromosomes 10 and 11 of <italic>S. aromaticum</italic> (Saro), <italic>S. malaccense</italic> (Smal), <italic>S. aqueum</italic> (Saqu), <italic>S. jambos</italic> (Sjam), and <italic>S. syzygioides</italic> (Ssyz) where genes encoding for EGS were predicted. The position (bp) and orientation of the predicted genes on the chromosomes are indicated by arrows colored according to the functional annotation. EGS, accelerated cell death (ACD1), Protochlorophyllide-dependent translocon component Tic52 (PTC52), leucine-rich repeat receptor-like protein kinase (LRR-RLK), Pre-mRNA-processing protein 40C-like (PRP40C), TATA-binding protein-associated factor 7 (TBP-associated factor 7), LTR-RTs.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1248780-g003.tif"/>
</fig>
<p>Effective lengths of repeat elements, which are different from their genomic length, were calculated by removing the length of the nested elements they contained. The proportions of genome assembly length occupied by predicted genes (25.97% to 27.37%) and repeat sequences (41.34% to 43.02%) appear to be conserved among the four newly sequenced <italic>Syzygium</italic> genomes (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>). Using the same method, repeat elements in <italic>Syzygium aromaticum</italic> genome assembly represents 39.98%. The most abundant repeat elements identified in the four newly sequenced <italic>Syzygium</italic> genomes were the LTR-RTs spanning 16.97% of the assembly length for <italic>S. syzygioides</italic> to 22.35% for <italic>S. malaccense</italic>. As reported for <italic>S. aromaticum</italic> and <italic>S. grande</italic>, LTR-RTs belonging to the Gypsy superfamily were more abundant than elements belonging to the Copia superfamily in the four newly sequenced genomes (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>; <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Tables&#xa0;6&#x2013;9</bold>
</xref>) (<xref ref-type="bibr" rid="B33">Low et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B44">Ouadi et&#xa0;al., 2022</xref>).</p>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Synteny analyses</title>
<p>To identify evolutionary structural changes among the <italic>Syzygium</italic> species chromosomes, we performed a synteny analysis on the four newly assembled genomes, <italic>S. aromaticum</italic> and <italic>S. grande</italic>. The alignment of the 11 chromosomes&#x2019; DNA sequences of the 6 <italic>Syzygium</italic> species revealed a high conservation of the chromosomal organization (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4A</bold>
</xref>).</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Identification of syntenic and rearranged regions between the 11 chromosomes of <italic>S. aromaticum</italic> (Saro), <italic>S. malaccense</italic> (Smal), <italic>S. aqueum</italic> (Saqu), <italic>S. jambos</italic> (Sjam), <italic>S. syzygioides</italic> (Ssyz), and <italic>S. grande</italic> (Sgra). <bold>(A)</bold> Representation of the alignment of the chromosomal DNA sequences showing syntenic regions, interchromosomal, and intrachromosomal rearrangements larger than 20 kb (inversions, translocations, and duplications). Grey arrows indicate regions where rearrangements were reported between chromosomes of <italic>E grandis</italic> and <italic>S. aromaticum</italic>. <bold>(B)</bold> Pairwise comparison of the percentage of chromosome length occupied by syntenic regions and rearrangements between the chromosome-scale assembly (Chr01-Chr11) and 11 chromosomes (Chr01 to Chr11) of <italic>S. aromaticum</italic> with those of <italic>S. malaccense</italic>, <italic>S. aqueum</italic>, <italic>S. jambos</italic>, <italic>S. syzygioides</italic>, and <italic>S. grande</italic>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1248780-g004.tif"/>
</fig>
<p>No large interchromosomal rearrangements were detected between the chromosomes of the six <italic>Syzygium</italic> species. A high percentage of the five species&#x2019; chromosome lengths were syntenic with <italic>S. aromaticum</italic>, ranging from 68.45% between <italic>S. aromaticum</italic> and <italic>S. jambos</italic> to 73.02% between <italic>S. aromaticum</italic> and <italic>S. aqueum</italic>. Intrachromosomal rearrangements such as inversions, translocations, and duplications between the chromosomes of <italic>S. aromaticum</italic> and those of the other five <italic>Syzygium</italic> species represented 5% of their 11 chromosomes length on average. In terms of number, the most frequent rearrangements observed between <italic>S. aromaticum</italic> and the five other species were duplications and translocations with average numbers of 1348 and 1325, respectively, spanning an average of 0.85% to 1.43% of the 11 chromosome lengths. Inversions were found less frequently for all species but occupied a larger fraction of the genome assemblies&#x2019; length than duplications and translocations except for <italic>S. syzygioides</italic>. The percentage of assembly lengths comprising inversions between <italic>S. aromaticum</italic> and the five other <italic>Syzygium</italic> species ranged from 0.68% between <italic>S. aromaticum</italic> and <italic>S. syzygioides</italic> to 4.83% between <italic>S. aromaticum</italic> and <italic>S. grande</italic>. Overall, the size of the inversions was relatively small. For instance, 11 inversions were detected, between chromosome 5 of <italic>S. aromaticum</italic> and <italic>S. grande</italic>, representing 17.32% of the chromosome length of <italic>S. grande</italic> (41,797,999 bp) and 1.87% of its 11 chromosomes (387,620,547 bp) (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4B</bold>
</xref>; <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;4</bold>
</xref>). In contrast, the synteny analysis performed between <italic>S. aromaticum</italic> and <italic>E. grandis</italic> revealed 10 intrachromosomal rearrangements on chromosomes 2, 4, 6, 8, 9, and 10 that included large terminal inversions representing up to 40% of the chromosome length of <italic>S. aromaticum</italic>. The other four chromosomes (1, 3, 5, and 7) of the two Myrtaceae species were highly syntenic (<xref ref-type="bibr" rid="B44">Ouadi et&#xa0;al., 2022</xref>). To further investigate the chromosomal architecture evolution of the <italic>Syzygium</italic> species and verify that these rearrangements were due to biological events rather than assembly artifacts, we also performed DNA alignment of the chromosome sequences of <italic>E. grandis</italic> with the those of <italic>S. malaccense</italic>, <italic>S. aqueum</italic>, <italic>S. jambos</italic>, and <italic>S. syzygioides</italic>. Chromosomes 1, 3, 5, and 7 of <italic>E. grandis</italic> and those of the four newly assembled species were also highly syntenic, and we observed the same 10 rearrangements on chromosomes 2, 4, 6, 8, 9, 10 and 11 (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4A</bold>
</xref>; <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;3</bold>
</xref>).</p>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Gene orthology</title>
<p>To investigate the phylogenetic relationships among gene sequences of <italic>S. aromaticum</italic>, <italic>S. malaccense</italic>, <italic>S. aqueum</italic>, <italic>S. jambos</italic>, and <italic>S. syzygioide</italic>s, the sets of predicted protein sequences from the five species assemblies were analyzed using OrthoFinder (<xref ref-type="bibr" rid="B12">Emms and Kelly, 2019</xref>).</p>
<p>A total of 49,269 hierarchical orthogroups (HOGs) were identified, including 93.7 to 95.2% of each species gene set (<xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5A</bold>
</xref>). Of these, 18,963 (38.5%) HOGs contained genes from all five species, and 4,928 (10%) were species specific. In more detail, 789 were specific to <italic>S. aromaticum</italic>, 950 were specific to <italic>S. malaccense</italic>, 940 HOGs were specific to <italic>S. aqueum</italic>, 1009 HOGs were specific to <italic>S. jambos</italic>, and 1240 HOGs were specific to <italic>S. syzygioides</italic>. Pairwise, <italic>S. aromaticum</italic> and <italic>S. aqueum</italic> appear to share the lowest number of orthogroups (625). The highest number of shared HOGs inferred between each pair of studied species was found between <italic>S. aqueum</italic> and <italic>S. syzygioides</italic> (1218), followed by <italic>S. aqueum</italic> and <italic>S. malaccense</italic> (1152), and <italic>S. jambos</italic> and <italic>S. malaccense</italic> (1027). The species tree resulting from the analysis of the HOGs divided the <italic>Syzygium</italic> species studied into two groups based on closer relationships: the first group comprising <italic>S. aromaticum</italic> and <italic>S. aqueum</italic> and a second group comprising <italic>S. jambos</italic>, <italic>S. malaccense</italic>, <italic>and S. syzygioides</italic> (<xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5B</bold>
</xref>).</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Hierarchical orthogroups (HOGs) inferred by OrthoFinder between S<italic>. aromaticum</italic> (Saro)<italic>, S. malaccense</italic> (Smal), <italic>S. aqueum</italic> (Saqu), <italic>S. jambos</italic> (Sjam), and <italic>S. syzygioides</italic> (Ssyz). <bold>(A)</bold> Number of HOGs inferred by OrthoFinder using the set of predicted proteins for the five <italic>Syzygium</italic> species. <bold>(B)</bold> Rooted species tree inferred by OrthoFinder.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1248780-g005.tif"/>
</fig>
</sec>
<sec id="s3_6">
<label>3.6</label>
<title>Annotation and comparison of LTR-RTs Gypsy and Copia repertoires</title>
<p>To clarify the dynamic activity of full-length LTR-RTs belonging to the superfamilies Gypsy and Copia within the <italic>Syzygium</italic> genus, we identified the lineages belonging to each superfamily located on the chromosomes of <italic>S. malaccense</italic> (429 Mbp), <italic>S. aqueum</italic> (387 Mbp), <italic>S. jambos</italic> (416 Mbp), and <italic>S. syzygioides</italic> (425 Mbp) and estimated their insertion time. Then, we compared the repertoires&#x2019; compositions and repeat element insertion times of the four species with those of <italic>S. aromaticum</italic> (368 Mbp).</p>
<p>We found that <italic>S. malaccense</italic> and <italic>S. aromaticum</italic>, the largest and smallest chromosome-scale assemblies of this study, contained the highest (8427) and lowest number (6167) of LTR-RTs in Gypsy and Copia, respectively (<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6A</bold>
</xref>; <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Tables&#xa0;6&#x2013;9</bold>
</xref>). In the five <italic>Syzygium</italic> species&#x2019; chromosomes, we identified a higher number of LTR-RTs for Gypsy than Copia, with a ratio of Gypsy to Copia content ranging from 1.09 for <italic>S. syzygioides</italic> to 1.45 for <italic>S. malaccense</italic>. The Gypsy superfamily comprised a higher proportion of nested elements (17.37% to 24.47%) compared to the Copia superfamily (7.01% to 9.44%), suggesting distinct accumulation and mobile activity of both superfamilies in all five species. Our results revealed little variation in the number of Copia elements on the chromosomes of <italic>S. aqueum</italic> (2705 elements) and <italic>S. aromaticum</italic> (2809), the two smallest chromosome-scale assemblies, and on the chromosomes of <italic>S. syzygioides</italic> (3290), <italic>S. jambos</italic> (3324), and <italic>S. malaccense</italic> (3433). In contrast, we found a notably higher accumulation of Gypsy elements (4994) in the chromosomes of <italic>S. malaccense</italic> compared to the four other species. The ratio of Gypsy content varied from 1.35 when comparing <italic>S. malaccense</italic> with <italic>S. jambos</italic> to 1.49 when comparing <italic>S. malaccense</italic> with <italic>S. aromaticum</italic>. It represented a difference in Gypsy effective length of 19,402,234 bp to 21,766,176bp, respectively. In the five <italic>Syzygium</italic> chromosome-scale assemblies, the most abundant lineage belonged to the Gypsy superfamily, but it varied according to the species. The Gypsy lineage Tekay was the most represented for <italic>S. aromaticum</italic> (1534 elements), <italic>S. jambos</italic> (1674 elements), and <italic>S. syzygioides</italic> (2090 elements). At the same time, for S. <italic>malaccense</italic> and <italic>S. aqueum</italic>, we found a higher abundance of the gypsy lineage Ogre (2382 and 1799 elements, respectively). Among the Gypsy superfamily, the most abundant lineages, Tekay and Ogre, were those with the highest proportion of nested elements (19.10% to 28.55% and 16.69% to 27.92%, respectively) in all five species. For <italic>S. aromaticum</italic>, <italic>S. malaccense</italic>, and <italic>S. syzygioides</italic>, the proportion of nested elements belonging to the Athila lineage was also among the highest identified (<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6B</bold>
</xref>; <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figures&#xa0;4, 5</bold>
</xref>).</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Composition of the full-length LTR-RTs Gypsy and Copia repertoires. <bold>(A)</bold> Number of elements belonging to the Gypsy and Copia lineages identified on the 11 chromosomes of <italic>S. aromaticum</italic> (Saro), <italic>S. malaccense</italic> (Smal), <italic>S. aqueum</italic> (Saqu), <italic>S. jambos</italic> (Sjam), and <italic>S. syzygioides</italic> (Ssyz). <bold>(B)</bold> Proportion of nested and non-nested elements. Gypsy (others) group comprises the lineages non-chromo-outgroup, Reina, Retand, tatIII, and elements Gypsy to which no lineages were assigned. Copia (others) group comprises the lineages Alesia, Bianca, Gymco-I, Gymco-IV, Gymco-II, and Osser.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1248780-g006.tif"/>
</fig>
<p>Regarding the Copia superfamily, the most represented lineages on the chromosomes of the five <italic>Syzygium</italic> species were Ale (608 to 873 elements), followed by the lineage Tork (456 to 762 elements) for <italic>S. malaccense</italic>, <italic>S. aqueum, S. jambos</italic> and <italic>S. Syzygoides</italic>, and the lineage SIRE (502 elements) for <italic>S. aromaticum</italic>.</p>
<p>The insertion times of 97.13% of the full-length Gypsy and Copia elements identified in the five <italic>Syzygium</italic> species were estimated (33,861elements). Nearly all elements (97.33%) were inserted in the last 5 million years (32,958 elements) (<xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref>). During this time period, distinct insertion activities of the two superfamilies occurred in the five <italic>Syzygium</italic> species.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Distribution of insertion times of full-length LTR-RTs of <italic>S. aromaticum</italic> (Saro), <italic>S. malaccense</italic> (Smal), <italic>S. aqueum</italic> (Saqu), <italic>S. jambos</italic> (Sjam), and <italic>S. syzygioides</italic> (Ssyz). <bold>(A)</bold> LTR-RTs Gypsy. <bold>(B)</bold> LTR-RTs Copia.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1248780-g007.tif"/>
</fig>
<p>Compared to the other four <italic>Syzygium</italic> species, the chromosomes of <italic>S. aromaticum</italic> underwent a more ancient wave of Gypsy insertions (peak at ~2.5 million years ago [Mya]), principally attributed to the Tekay elements, the most abundant lineage in this species (<xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7A</bold>
</xref>). We also found that a few recent insertions (18.02% of insertions) occurred in <italic>S. aromaticum</italic> chromosomes within the last one million years. In contrast, a recent burst of Gypsy insertions (~0&#x2013;1 Mya) occurred in four other species chromosomes: most insertions of Gypsy in <italic>S. malaccense</italic> (44.53%), <italic>S. aqueum</italic> (44.43%), <italic>S. jambos</italic> (52.55%), and <italic>S. syzygioides</italic> (36.45%) were less than one million years old. We inferred that the high number of Gypsy LTR-RTs found in <italic>S. malaccense</italic> may be attributable to two successive waves of insertions: a peak of Tekay insertions at ~2 Mya and a more recent peak of Ogre at ~1 Mya.</p>
<p>Similar to what we observed for the Gypsy superfamily, the insertion of Copia elements occurred earlier in <italic>S. aromaticum</italic> compared to the four other species, with fewer recent insertions (<xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7B</bold>
</xref>). Compared to the Gypsy elements, a smaller proportion of recent Copia insertions (less than one million years old) were detected in <italic>S. aromaticum</italic> (10.66%), <italic>S. malaccense</italic> (24.16%), <italic>S. aqueum</italic> (26.49%), and <italic>S. jambos</italic> (36.90%) suggesting a distinct recent insertion pattern of the two superfamilies in the four species. However, we found a comparable proportion of Gypsy (36.45%) and Copia (32.22%) elements that were less than one million years old in <italic>S. syzygioides</italic>, the species for which we found the lowest ratio of Gypsy to Copia content (1.09).</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>Plant genome size, ploidy level, and heterozygosity rates are challenges for genome assembly and annotation. However, lower sequencing costs and recent advances in long-read sequencing technologies, Hi-C technologies, and bioinformatics tools have facilitated the generation of assemblies with high contiguity up to the chromosome-scale also for non-model plants or non-major plant crops (<xref ref-type="bibr" rid="B27">Kyriakidou et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B51">Pucker et&#xa0;al., 2022</xref>). Newly assembled and annotated genomes from related species can then be used to perform comparative genomics analyses to investigate plant genome evolution and function. Third-generation long-reads from Oxford Nanopore Technologies and Illumina short-reads combined with the Hi-C technology enabled the <italic>de novo</italic> assembly of the chromosome-scale genome for <italic>S. malaccense, S. aqueum</italic>, <italic>S. jambos</italic>, and <italic>S. syzygioides</italic>. A high level of quality at the base level, contiguity, and completeness was reached for the four newly sequenced genomes. The quality of the newly assembled <italic>Syzygium</italic> species genomes were comparable to that of the <italic>S. aromaticum</italic> genome. The slight differences found between the species assemblies&#x2019; quality metrics may be linked to the combined impact of the ploidy level and high heterozygosity rates of the four newly sequenced species on the assembly process.</p>
<p>Previous infrageneric comparative genetic mapping analyses revealed high levels of synteny and collinearity among the <italic>Eucalyptus</italic> genus (<xref ref-type="bibr" rid="B23">Hudson et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B32">Li et&#xa0;al., 2015</xref>). In addition, genomic synteny analyses conducted between the <italic>de novo</italic> assembly of <italic>E. urophylla</italic> &#xd7; <italic>E. grandis</italic> (EUC) and 30 <italic>Eucalyptus</italic> species revealed that the genome structure of EUC, <italic>E. grandis</italic>, and <italic>E. globulus</italic> showed the higher collinearity, and the absence of large-scale structural variation. Nevertheless, large structural variations among the different chromosomes of the EUC and other <italic>Eucalyptus</italic> species were also detected (<xref ref-type="bibr" rid="B58">Shen et&#xa0;al., 2023</xref>). We found that the six <italic>Syzygium</italic> genomes studied were highly syntenic. The intrachromosomal rearrangements (duplications, translocations, and inversions) observed between <italic>S. aromaticum</italic> and the five other <italic>Syzygium</italic> species represent a small percentage (~5% on average) of the 11 chromosomes&#x2019; length. These intrachromosomal rearrangements could result from contigs that were not well placed because of Hi-C signals that were not strong enough to correctly determine their position and orientation; however, they may also result from the six species&#x2019; distinct genome evolutions.</p>
<p>Organizational conservation of chromosomes 2, 4, 6, 8, 9, 10, and 11 among the six <italic>Syzygium</italic> species studied constitutes new evidence supporting the 10 intrachromosomal rearrangements previously reported on these chromosomes between <italic>S. aromaticum</italic> and <italic>E. grandis</italic> genomes (<xref ref-type="bibr" rid="B44">Ouadi et&#xa0;al., 2022</xref>). These 10 rearrangements were also observed when aligning the DNA sequences of the chromosomes of <italic>E. grandis</italic> with those of <italic>S. malaccense</italic>, <italic>S. aqueum</italic>, <italic>S. jambos</italic>, and <italic>S. syzygioides.</italic> Among the rearrangements reported between the chromosomes of <italic>S. aromaticum</italic> and <italic>E. grandis</italic>, similar large terminal inversions on chromosomes 4, 9, 10, and 11 were also reported in the two eucalypts <italic>E. grandis</italic> and <italic>C. citriodora</italic> suggesting that these terminal inversions occurred on <italic>E. grandis</italic> chromosomes (<xref ref-type="bibr" rid="B4">Butler et&#xa0;al., 2017</xref>). Two other large terminal inversions were detected between chromosomes 4 and 9 of <italic>S. aromaticum</italic> and <italic>E. grandis</italic> but not between <italic>C. citriodora</italic> and <italic>E. grandis</italic>. These inversions were also observed when comparing the chromosome sequences of <italic>E. grandis</italic> with those of the four newly assembled genomes, suggesting that these inversions resulted from an evolution of the chromosome organization rather than from sequencing and assembly artifacts. Further comparative genomics analyses will be needed with additional <italic>Syzygium</italic> and Myrtaceous species to determine if these inversions are specific to the <italic>Syzygium</italic> genus or subgenus, for which the crown ages were estimated at 51.2 Mya and 9.4 Mya, respectively, (<xref ref-type="bibr" rid="B33">Low et&#xa0;al., 2022</xref>).</p>
<p>The analyses of the phylogenetic relationships between gene sequences of <italic>S. aromaticum</italic>, <italic>S. malaccense</italic>, <italic>S. aqueum</italic>, <italic>S. jambos</italic>, and <italic>S. syzygioides</italic> and comparisons of their full-length LTR-RTs repertoires provided insights into the distinct genome evolution of each species following the divergence of the <italic>Syzygium</italic> subg. <italic>Syzygium</italic> species 9.4 Mya (<xref ref-type="bibr" rid="B33">Low et&#xa0;al., 2022</xref>). The species tree inferred by OrthoFinder indicated that pairwise <italic>S. aromaticum</italic> and <italic>S. aqueum</italic> and <italic>S. malaccense</italic> and <italic>S. jambos</italic> were closely related, which is consistent with the genome-level phylogenetic trees generated by Low et&#xa0;al. (<xref ref-type="bibr" rid="B33">Low et&#xa0;al., 2022</xref>). We observed older waves of LTR-RTs Gypsy and Copia insertions in <italic>S. aromaticum</italic> and fewer insertions less than 1 million years old in the <italic>S. aromaticum</italic> chromosomes compared to those of the four other species studied. In plants, the RNA Directed DNA Methylation (RdDM) pathway, a <italic>de novo</italic> DNA methylation mechanism involving small interfering RNA, plays an important role in TE repression (<xref ref-type="bibr" rid="B66">Wambui Mbichi et&#xa0;al., 2020</xref>). Further detailed analysis such as DNA methylation studies will be valuable to clarify the molecular causes of the recent low insertion number of LTR-RTs elements observed in <italic>S. aromaticum</italic>.</p>
<p>
<italic>S. aromaticum</italic> is cultivated to produce clove bud (the dried, unopened flower bud), essential oil (EO), and oleoresins rich in eugenol (<xref ref-type="bibr" rid="B41">Nurdjannah and Bermawie, 2012</xref>). The EO of <italic>S. aromaticum</italic> contains ~72 to 96.6% of eugenol, while the EO of <italic>S. aqueum</italic> has 0.19% eugenol (<xref ref-type="bibr" rid="B54">Razafimamonjison et&#xa0;al., 2014</xref>; <xref ref-type="bibr" rid="B61">Sobeh et&#xa0;al., 2016</xref>). Eugenol is a phenylpropane with multiple pharmaceutical activities and is considered a promising alternative drug for human health (e.g., cancer and pathogenic microorganism resistance, diabetes, obesity, and autoimmune diseases) (<xref ref-type="bibr" rid="B25">Kamatou et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B1">Batiha et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B43">Otunola, 2022</xref>). The genome assembly of <italic>S. aromaticum</italic> was exploited to investigate the genetic basis of this important characteristic. The identification of gene families involved in eugenol biosynthesis revealed the presence of multiple copies of genes encoding EGS, which catalyzes the synthesis of eugenol from coniferyl acetate. A cluster of 14 copies was reported on chromosome 10, and additional copies were located on chromosome 11 of <italic>S. aromaticum</italic>. In the genome assembly of the four newly sequenced species, we found fewer gene copies on chromosome 10 (1 to 3 copies) and no copies on chromosome 11 of <italic>S. malaccense</italic>, <italic>S. aqueum</italic>, and <italic>S. jambos</italic>. The presence of this structural variation suggested that a gene-dosage effect may be associated with the high amount of eugenol. Further studies are needed to elucidate the biological functions of the EGS gene copies in <italic>S. aromaticum</italic> and the four other species (e.g., <italic>in vitro</italic> characterization).</p>
<p>
<italic>S. malaccense</italic>, <italic>S. aqueum</italic>, and <italic>S. jambos</italic> are grown for their edible fruit. Like <italic>S. aromaticum</italic> and other <italic>Syzygium</italic> species, they are also used in traditional medicine. Research on their numerous pharmaceutical properties has been undertaken (e.g., analgesic, anti-inflammatory, antioxidant, hepatoprotective, antidiabetic, antifungal, antibacterial, antiviral, and anticancer activities) (<xref ref-type="bibr" rid="B38">Nair, 2017</xref>; <xref ref-type="bibr" rid="B8">Cock and Cheesman, 2018</xref>). For instance, <italic>S. jambos</italic> is traditionally used to treat hemorrhages, wounds, and ulcers; <italic>S. malaccense</italic> is used to treat mouth ulcers and diabetes; and <italic>S. aqueum</italic> to treat diabetes and childbirth pain (<xref ref-type="bibr" rid="B64">Uddin et&#xa0;al., 2022</xref>). The chromosome-scale assemblies for these species are new valuable resources for the Myrtaceae family. Combined with other comparative genomics and multi-omics studies, they can be used to further investigate the genomic evolution of the Myrtaceous species and to study the genetic basis of important agronomical traits and biosynthesis of secondary metabolites.</p>
</sec>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found below: <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/">https://www.ncbi.nlm.nih.gov/</ext-link>, PRJNA962868 <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/">https://www.ncbi.nlm.nih.gov/</ext-link>, PRJNA962711 <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/">https://www.ncbi.nlm.nih.gov/</ext-link>, PRJNA962713 <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/">https://www.ncbi.nlm.nih.gov/</ext-link>, PRJNA962712 <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/genbank/">https://www.ncbi.nlm.nih.gov/genbank/</ext-link>, JASUUE000000000 <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/genbank/">https://www.ncbi.nlm.nih.gov/genbank/</ext-link>, JASUUB000000000 <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/genbank/">https://www.ncbi.nlm.nih.gov/genbank/</ext-link>, JASUUC000000000 <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/genbank/">https://www.ncbi.nlm.nih.gov/genbank/</ext-link>, JASUUD000000000 <ext-link ext-link-type="uri" xlink:href="https://zenodo.org/">https://zenodo.org/</ext-link>, 7870328 <ext-link ext-link-type="uri" xlink:href="https://zenodo.org/">https://zenodo.org/</ext-link>, 7870326 <ext-link ext-link-type="uri" xlink:href="https://zenodo.org/">https://zenodo.org/</ext-link>, 7870330 <ext-link ext-link-type="uri" xlink:href="https://zenodo.org/">https://zenodo.org/</ext-link>, 7870334.</p>
</sec>
<sec id="s6" sec-type="author-contributions">
<title>Author contributions</title>
<p>SO performed the laboratory work, analyzed data, and wrote the manuscript. NS performed computational analysis of sequencing data, conceived, and supervised the study, and contributed to manuscript writing. FK, and NI conceived and supervised the study and contributed to manuscript writing. All authors contributed to the article and approved the submitted version.</p>
</sec>
</body>
<back>
<sec id="s7" sec-type="funding-information">
<title>Funding</title>
<p>The authors declare that this study received funding from the company Philip Morris International. The funder had the following involvement in the study: the study design, collection, analysis, interpretation of data, the writing of this article and the decision to submit it for publication.</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>We would like to thank Dr. Leyla Davis, curator of the Masoala Hall in the Z&#xfc;rich Zoo (Switzerland), for authorizing the sampling of the <italic>Syzygium</italic> trees for this project. We would like to also thank Remi Dulize for his technical contributions, and Lindsay Reese and Rebecca Higgins for manuscript revision.</p>
</ack>
<sec id="s8" sec-type="COI-statement">
<title>Conflict of interest</title>	<p>Authors SO, NS, and NI were employed by the company Philip Morris International. </p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s9" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpls.2023.1248780/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpls.2023.1248780/full#supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet_1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Batiha</surname> <given-names>G. E.-S.</given-names>
</name>
<name>
<surname>Alkazmi</surname> <given-names>L. M.</given-names>
</name>
<name>
<surname>Wasef</surname> <given-names>L. G.</given-names>
</name>
<name>
<surname>Beshbishy</surname> <given-names>A. M.</given-names>
</name>
<name>
<surname>Nadwa</surname> <given-names>E. H.</given-names>
</name>
<name>
<surname>Rashwan</surname> <given-names>E. K.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>
<italic>Syzygium aromaticum</italic> L.(Myrtaceae): Traditional uses, bioactive chemical constituents, pharmacological and toxicological activities</article-title>. <source>Biomolecules</source> <volume>10</volume> (<issue>2</issue>), <fpage>202</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/biom10020202</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Beech</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Rivers</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Oldfield</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Smith</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>GlobalTreeSearch: The first complete global database of tree species and country distributions</article-title>. <source>J. Sustain. For.</source> <volume>36</volume> (<issue>5</issue>), <fpage>454</fpage>&#x2013;<lpage>489</lpage>. doi: <pub-id pub-id-type="doi">10.1080/10549811.2017.1310049</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Buchfink</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Huson</surname> <given-names>D. H.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Fast and sensitive protein alignment using DIAMOND</article-title>. <source>Nat. Methods</source> <volume>12</volume> (<issue>1</issue>), <fpage>59</fpage>&#x2013;<lpage>60</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nmeth.3176</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Butler</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Vaillancourt</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Potts</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>D.</given-names>
</name>
<name>
<surname>King</surname> <given-names>G. J.</given-names>
</name>
<name>
<surname>Baten</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Comparative genomics of <italic>Eucalyptus</italic> and <italic>Corymbia</italic> reveals low rates of genome structural rearrangement</article-title>. <source>BMC Genom.</source> <volume>18</volume> (<issue>1</issue>), <fpage>397</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12864-017-3782-7</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Gu</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>fastp: an ultra-fast all-in-one FASTQ preprocessor</article-title>. <source>Bioinformatics</source> <volume>34</volume> (<issue>17</issue>), <fpage>i884</fpage>&#x2013;<lpage>i890</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/bty560</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Concepcion</surname> <given-names>G. T.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Haplotype-resolved de novo assembly using phased assembly graphs with hifiasm</article-title>. <source>Nat. Methods</source> <volume>18</volume> (<issue>2</issue>), <fpage>170</fpage>&#x2013;<lpage>175</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41592-020-01056-5</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Christenhusz</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>Byng</surname> <given-names>J. W.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>The number of known plants species in the world and its annual increase</article-title>. <source>Phytotaxa</source> <volume>261</volume> (<issue>3</issue>), <fpage>201</fpage>&#x2013;<lpage>217-201&#x2013;217</lpage>. doi: <pub-id pub-id-type="doi">10.11646/phytotaxa.261.3.1</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cock</surname> <given-names>I. E.</given-names>
</name>
<name>
<surname>Cheesman</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Plants of the genus <italic>Syzygium</italic> (Myrtaceae): A review on ethnobotany, medicinal properties and phytochemistry</article-title>. <source>Bioactive Compounds Medicinal Plants: Properties Potential Hum. Health</source> <volume>35&#x2013;84</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1201/b22426</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Craven</surname> <given-names>L. A.</given-names>
</name>
<name>
<surname>Biffin</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>An infrageneric classification of <italic>Syzygium</italic> (Myrtaceae)</article-title>. <source>Blumea-Biodiver. Evol. Biogeogr. Plants</source> <volume>55</volume> (<issue>1</issue>), <fpage>94</fpage>&#x2013;<lpage>99</lpage>. doi: <pub-id pub-id-type="doi">10.3767/000651910X499303</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ellestad</surname> <given-names>P.</given-names>
</name>
<name>
<surname>P&#xe9;rez-Farrera</surname> <given-names>M. A.</given-names>
</name>
<name>
<surname>Buerki</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Genomic Insights into Cultivated Mexican Vanilla planifolia Reveal High Levels of Heterozygosity Stemming from Hybridization</article-title>. <source>Plants</source> <volume>11</volume> (<issue>16</issue>), <fpage>2090</fpage>. doi: <pub-id pub-id-type="doi">10.3390/plants11162090</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ellinghaus</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Kurtz</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Willhoeft</surname> <given-names>U.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>LTRharvest, an efficient and flexible software for de novo detection of LTR retrotransposons</article-title>. <source>BMC Bioinf.</source> <volume>9</volume>, <fpage>1</fpage>&#x2013;<lpage>14</lpage>. doi: <pub-id pub-id-type="doi">10.1186/1471-2105-9-18</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Emms</surname> <given-names>D. M.</given-names>
</name>
<name>
<surname>Kelly</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>OrthoFinder: phylogenetic orthology inference for comparative genomics</article-title>. <source>Genome Biol.</source> <volume>20</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>14</lpage>. doi: <pub-id pub-id-type="doi">10.1186/s13059-019-1832-y</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Feng</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Kang</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A chromosome-level genome assembly provides insights into ascorbic acid accumulation and fruit softening in guava (<italic>Psidium guajava</italic>)</article-title>. <source>Plant Biotechnol. J.</source> <volume>19</volume> (<issue>4</issue>), <fpage>717</fpage>&#x2013;<lpage>730</lpage>. doi: <pub-id pub-id-type="doi">10.1111/pbi.13498</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Frith</surname> <given-names>M. C.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>A new repeat-masking method enables specific detection of homologous sequences</article-title>. <source>Nucleic Acids Res.</source> <volume>39</volume> (<issue>4</issue>), <fpage>e23</fpage>&#x2013;<lpage>e23</lpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkq1212</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Niu</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>CD-HIT: accelerated for clustering the next-generation sequencing data</article-title>. <source>Bioinformatics</source> <volume>28</volume> (<issue>23</issue>), <fpage>3150</fpage>&#x2013;<lpage>3152</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/bts565</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Girgis</surname> <given-names>H. Z.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Red: an intelligent, rapid, accurate tool for detecting repeats <italic>de-novo</italic> on the genomic scale</article-title>. <source>BMC Bioinf.</source> <volume>16</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>19</lpage>. doi: <pub-id pub-id-type="doi">10.1186/s12859-015-0654-5</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Goel</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Schneeberger</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>plotsr: visualizing structural similarities and rearrangements between multiple genomes</article-title>. <source>Bioinformatics</source> <volume>38</volume> (<issue>10</issue>), <fpage>2922</fpage>&#x2013;<lpage>2926</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btac196</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Goel</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Jiao</surname> <given-names>W.-B.</given-names>
</name>
<name>
<surname>Schneeberger</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>SyRI: finding genomic rearrangements and local sequence differences from whole-genome assemblies</article-title>. <source>Genome Biol.</source> <volume>20</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>13</lpage>. doi: <pub-id pub-id-type="doi">10.1186/s13059-019-1911-0</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Grattapaglia</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Vaillancourt</surname> <given-names>R. E.</given-names>
</name>
<name>
<surname>Shepherd</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Thumma</surname> <given-names>B. R.</given-names>
</name>
<name>
<surname>Foley</surname> <given-names>W.</given-names>
</name>
<name>
<surname>K&#xfc;lheim</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>Progress in Myrtaceae genetics and genomics: Eucalyptus as the pivotal genus</article-title>. <source>Tree Genet. Genomes</source> <volume>8</volume> (<issue>3</issue>), <fpage>463</fpage>&#x2013;<lpage>508</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11295-012-0491-x</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guan</surname> <given-names>D.</given-names>
</name>
<name>
<surname>McCarthy</surname> <given-names>S. A.</given-names>
</name>
<name>
<surname>Wood</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Howe</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Durbin</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Identifying and removing haplotypic duplication in primary genome assemblies</article-title>. <source>Bioinformatics</source> <volume>36</volume> (<issue>9</issue>), <fpage>2896</fpage>&#x2013;<lpage>2898</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa025</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Healey</surname> <given-names>A. L.</given-names>
</name>
<name>
<surname>Shepherd</surname> <given-names>M.</given-names>
</name>
<name>
<surname>King</surname> <given-names>G. J.</given-names>
</name>
<name>
<surname>Butler</surname> <given-names>J. B.</given-names>
</name>
<name>
<surname>Freeman</surname> <given-names>J. S.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>D. J.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Pests, diseases, and aridity have shaped the genome of <italic>Corymbia citriodora</italic>
</article-title>. <source>Commun. Biol.</source> <volume>4</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>13</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s42003-021-02009-0</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Wen</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yi</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Helitron distribution in Brassicaceae and whole Genome Helitron density as a character for distinguishing plant species</article-title>. <source>BMC Bioinf.</source> <volume>20</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>20</lpage>. doi: <pub-id pub-id-type="doi">10.1186/s12859-019-2945-8</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hudson</surname> <given-names>C. J.</given-names>
</name>
<name>
<surname>Kullan</surname> <given-names>A. R.</given-names>
</name>
<name>
<surname>Freeman</surname> <given-names>J. S.</given-names>
</name>
<name>
<surname>Faria</surname> <given-names>D. A.</given-names>
</name>
<name>
<surname>Grattapaglia</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Kilian</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>High synteny and colinearity among Eucalyptus genomes revealed by high-density comparative genetic mapping</article-title>. <source>Tree Genet. Genomes</source> <volume>8</volume> (<issue>2</issue>), <fpage>339</fpage>&#x2013;<lpage>352</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11295-011-0444-9</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Izuno</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Wicker</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Hatakeyama</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Copetti</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Shimizu</surname> <given-names>K. K.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Updated genome assembly and annotation for <italic>metrosideros polymorpha</italic>, an emerging model tree species of ecological divergence</article-title>. <source>G3-Genes Genom. Genet.</source> <volume>9</volume> (<issue>11</issue>), <fpage>3513</fpage>&#x2013;<lpage>3520</lpage>. doi: <pub-id pub-id-type="doi">10.1534/g3.119.400643</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kamatou</surname> <given-names>G. P.</given-names>
</name>
<name>
<surname>Vermaak</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Viljoen</surname> <given-names>A. M.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Eugenol&#x2014;from the remote Maluku Islands to the international market place: a review of a remarkable and versatile molecule</article-title>. <source>Molecules</source> <volume>17</volume> (<issue>6</issue>), <fpage>6953</fpage>&#x2013;<lpage>6981</lpage>. doi: <pub-id pub-id-type="doi">10.3390/molecules17066953</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kolmogorov</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Yuan</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Pevzner</surname> <given-names>P. A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Assembly of long, error-prone reads using repeat graphs</article-title>. <source>Nat. Biotechnol.</source> <volume>37</volume> (<issue>5</issue>), <fpage>540</fpage>&#x2013;<lpage>546</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41587-019-0072-8</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kyriakidou</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Tai</surname> <given-names>H. H.</given-names>
</name>
<name>
<surname>Anglin</surname> <given-names>N. L.</given-names>
</name>
<name>
<surname>Ellis</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Str&#xf6;mvik</surname> <given-names>M. V.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Current strategies of polyploid plant genome sequence assembly</article-title>. <source>Front. Plant Sci.</source> <volume>9</volume>, <elocation-id>1660</elocation-id>. doi: <pub-id pub-id-type="doi">10.3389/fpls.2018.01660</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lexa</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Jedlicka</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Vanat</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Cervenansky</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Kejnovsky</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>TE-greedy-nester: structure-based detection of LTR retrotransposons and their nesting</article-title>. <source>Bioinformatics</source> <volume>36</volume> (<issue>20</issue>), <fpage>4991</fpage>&#x2013;<lpage>4999</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa632</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM</article-title>. <source>arXiv:1303.3997v2</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1303.3997</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Minimap2: pairwise alignment for nucleotide sequences</article-title>. <source>Bioinformatics</source> <volume>34</volume> (<issue>18</issue>), <fpage>3094</fpage>&#x2013;<lpage>3100</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/bty191</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Xiao</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Mei</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Gap-free genome assembly and comparative analysis reveal the evolution and anthocyanin accumulation mechanism of Rhodomyrtus tomentosa</article-title>. <source>Hortic. Res</source> <volume>10</volume> (<issue>3</issue>). doi: <pub-id pub-id-type="doi">10.1093/hr/uhad005</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Weng</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2015</year>). <article-title>Comparative genomics analyses reveal extensive chromosome colinearity and novel quantitative trait loci in Eucalyptus</article-title>. <source>PloS One</source> <volume>10</volume> (<issue>12</issue>), <elocation-id>e0145144</elocation-id>. doi: <pub-id pub-id-type="doi">10.1371/journal.pone.0145144</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Low</surname> <given-names>Y. W.</given-names>
</name>
<name>
<surname>Rajaraman</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Tomlin</surname> <given-names>C. M.</given-names>
</name>
<name>
<surname>Ahmad</surname> <given-names>J. A.</given-names>
</name>
<name>
<surname>Ardi</surname> <given-names>W. H.</given-names>
</name>
<name>
<surname>Armstrong</surname> <given-names>K.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Genomic insights into rapid speciation within the world&#x2019;s largest tree genus Syzygium</article-title>. <source>Nat. Commun.</source> <volume>13</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>15</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41467-022-32637-x</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Machado</surname> <given-names>R. M.</given-names>
</name>
<name>
<surname>Forni-Martins</surname> <given-names>E. R.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Psidium cattleyanum Sabine (Myrtaceae), a neotropical polyploid complex with wide geographic distribution: insights from cytogenetic and DNA content analysis</article-title>. <source>Braz. J. Bot.</source> <volume>45</volume> (<issue>3</issue>), <fpage>943</fpage>&#x2013;<lpage>955</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s40415-022-00829-w</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mak</surname> <given-names>Q. C.</given-names>
</name>
<name>
<surname>Wick</surname> <given-names>R. R.</given-names>
</name>
<name>
<surname>Holt</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J. R.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Polishing de novo nanopore assemblies of bacteria and eukaryotes with FMLRC2</article-title>. <source>Mol. Biol. Evol.</source> <volume>40</volume> (<issue>3</issue>), <fpage>msad048</fpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/msad048</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marcon</surname> <given-names>H. S.</given-names>
</name>
<name>
<surname>Domingues</surname> <given-names>D. S.</given-names>
</name>
<name>
<surname>Silva</surname> <given-names>J. C.</given-names>
</name>
<name>
<surname>Borges</surname> <given-names>R. J.</given-names>
</name>
<name>
<surname>Matioli</surname> <given-names>F. F.</given-names>
</name>
<name>
<surname>de Mattos Fontes</surname> <given-names>M. R.</given-names>
</name>
<etal/>
</person-group>. (<year>2015</year>). <article-title>Transcriptionally active LTR retrotransposons in Eucalyptus genus are differentially expressed and insertionally polymorphic</article-title>. <source>BMC Plant Biol.</source> <volume>15</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>16</lpage>. doi: <pub-id pub-id-type="doi">10.1186/s12870-015-0550-1</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Myburg</surname> <given-names>A. A.</given-names>
</name>
<name>
<surname>Grattapaglia</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Tuskan</surname> <given-names>G. A.</given-names>
</name>
<name>
<surname>Hellsten</surname> <given-names>U.</given-names>
</name>
<name>
<surname>Hayes</surname> <given-names>R. D.</given-names>
</name>
<name>
<surname>Grimwood</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2014</year>). <article-title>The genome of Eucalyptus grandis</article-title>. <source>Nature</source> <volume>510</volume> (<issue>7505</issue>), <fpage>356</fpage>&#x2013;<lpage>362</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nature13308</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Nair</surname> <given-names>K. N.</given-names>
</name>
</person-group> (<year>2017</year>). <source>The genus Syzygium: Syzygium Cumini and Other Underutilized Species</source> (<publisher-loc>United States</publisher-loc>: <publisher-name>CRC Press</publisher-name>).</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Neumann</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Nov&#xe1;k</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Ho&#x161;t&#xe1;kov&#xe1;</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Macas</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Systematic survey of plant LTR-retrotransposons elucidates phylogenetic relationships of their polyprotein domains and provides a reference for element classification</article-title>. <source>Mobile DNA</source> <volume>10</volume>, <fpage>1</fpage>&#x2013;<lpage>17</lpage>. doi: <pub-id pub-id-type="doi">10.1186/s13100-018-0144-1</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Niknafs</surname> <given-names>Y. S.</given-names>
</name>
<name>
<surname>Pandian</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Iyer</surname> <given-names>H. K.</given-names>
</name>
<name>
<surname>Chinnaiyan</surname> <given-names>A. M.</given-names>
</name>
<name>
<surname>Iyer</surname> <given-names>M. K.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>TACO produces robust multisample transcriptome assemblies from RNA-seq</article-title>. <source>Nat. Methods</source> <volume>14</volume> (<issue>1</issue>), <fpage>68</fpage>&#x2013;<lpage>70</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nmeth.4078</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Nurdjannah</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Bermawie</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2012</year>). &#x201c;<article-title>Cloves</article-title>,&#x201d; in <source>Handbook of herbs and spices</source> (<publisher-loc>Amsterdam, Neatherlands</publisher-loc>: <publisher-name>Elsevier</publisher-name>), <fpage>197</fpage>&#x2013;<lpage>215</lpage>.</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Oginuma</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Kato</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Tobe</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Mathenge</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Juma</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>1993</year>). <article-title>Chromosomes of some woody plants in Kenya</article-title>. <source>Acta Phytotax. Geobot.</source> <volume>44</volume> (<issue>1</issue>), <fpage>53</fpage>&#x2013;<lpage>58</lpage>.</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Otunola</surname> <given-names>G. A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Culinary spices in food and medicine: an overview of Syzygium aromaticum (L.) Merr. and LM Perry [Myrtaceae]</article-title>. <source>Front. Pharmacol.</source> <volume>12</volume>, <elocation-id>3817</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fphar.2021.793200</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ouadi</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Sierro</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Goepfert</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Bovet</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Glauser</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Vallat</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>The clove (Syzygium aromaticum) genome provides insights into the eugenol biosynthesis pathway</article-title>. <source>Commun. Biol.</source> <volume>5</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>13</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s42003-022-03618-z</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Panggabean</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>1991</year>). &#x201c;<article-title>Syzygium aqueum (Burm. f.) Alst., Syzygium malaccense (L.) M. &amp; P, and Syzygium samarangense (Blume) M. &amp; P. <italic>Plant Resources of South-East Asia 2</italic>
</article-title>,&#x201d; in <source>Edible fruits and nuts</source> (<publisher-loc>Pudoc, Wageningen</publisher-loc>: <publisher-name>Pudoc Scientific Publishers</publisher-name>), <fpage>292</fpage>&#x2013;<lpage>294</lpage>.</citation>
</ref>
<ref id="B46">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Parnell</surname> <given-names>J. A.</given-names>
</name>
<name>
<surname>Craven</surname> <given-names>L. A.</given-names>
</name>
<name>
<surname>Biffin</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2007</year>). &#x201c;<article-title>Matters of scale: dealing with one of the largest genera of angiosperms</article-title>,&#x201d; in <source>Reconstructing the tree of life: taxonomy and systematics of species rich taxa</source> (<publisher-loc>Boca Raton, FL</publisher-loc>: <publisher-name>CRC Press LLC</publisher-name>), <fpage>253</fpage>&#x2013;<lpage>270</lpage>.</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pedrosa</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Gita&#xed;</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Silva</surname> <given-names>A. E. B.</given-names>
</name>
<name>
<surname>Felix</surname> <given-names>L. P.</given-names>
</name>
<name>
<surname>Guerra</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>Cytogenetics of angiosperms collected in the state of Pernambuco: V</article-title>. <source>Acta Bot. Bras.</source> <volume>13</volume> (<issue>1</issue>), <fpage>49</fpage>&#x2013;<lpage>60</lpage>. doi: <pub-id pub-id-type="doi">10.1590/S0102-33061999000100006</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pellicer</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Leitch</surname> <given-names>I. J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>The Plant DNA C-values database (release 7.1): an updated online repository of plant genome size data for comparative studies</article-title>. <source>New Phytol.</source> <volume>226</volume> (<issue>2</issue>), <fpage>301</fpage>&#x2013;<lpage>305</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/nph.16261</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pertea</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Pertea</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>GFF Utilities: GffRead and GffCompare [version 2; peer review: 3 approved]</article-title>. <source>F1000Research</source> <volume>9</volume> (<issue>304</issue>). doi:&#xa0;<pub-id pub-id-type="doi">10.12688/f1000research.23297.2</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="web">
<person-group person-group-type="author">
<collab>POWO</collab>
</person-group> (<year>2023</year>) <source>Plants of the World Online. Facilitated by the Royal Botanic Gardens, Kew</source>. Available at: <uri xlink:href="http://www.plantsoftheworldonline.org/">http://www.plantsoftheworldonline.org/</uri>. Retrieved 11 April 2023.</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pucker</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Irisarri</surname> <given-names>I.</given-names>
</name>
<name>
<surname>de Vries</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Plant genome sequence assembly in the era of long reads: Progress, challenges and future directions</article-title>. <source>Quant. Plant Biol.</source> <volume>3</volume> (<issue>5</issue>), <fpage>e5</fpage>. doi: <pub-id pub-id-type="doi">10.1017/qpb.2021.18</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Quinlan</surname> <given-names>A. R.</given-names>
</name>
<name>
<surname>Hall</surname> <given-names>I. M.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>BEDTools: a flexible suite of utilities for comparing genomic features</article-title>. <source>Bioinformatics</source> <volume>26</volume> (<issue>6</issue>), <fpage>841</fpage>&#x2013;<lpage>842</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btq033</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ranallo-Benavidez</surname> <given-names>T. R.</given-names>
</name>
<name>
<surname>Jaron</surname> <given-names>K. S.</given-names>
</name>
<name>
<surname>Schatz</surname> <given-names>M. C.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>GenomeScope 2.0 and Smudgeplot for reference-free profiling of polyploid genomes</article-title>. <source>Nat. Commun.</source> <volume>11</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>10</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41467-020-14998-3</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Razafimamonjison</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Jahiel</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Duclos</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Ramanoelina</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Fawbush</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Danthu</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Bud, leaf and stem essential oil composition of Syzygium aromaticum from Madagascar, Indonesia and Zanzibar</article-title>. <source>Int. J. Basic Appl. Sci.</source> <volume>3</volume> (<issue>3</issue>), <fpage>224</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.14419/ijbas.v3i3.2473</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Saber</surname> <given-names>F. R.</given-names>
</name>
<name>
<surname>Munekata</surname> <given-names>P. E.</given-names>
</name>
<name>
<surname>Rizwan</surname> <given-names>K.</given-names>
</name>
<name>
<surname>El-Nashar</surname> <given-names>H. A.</given-names>
</name>
<name>
<surname>Fahmy</surname> <given-names>N. M.</given-names>
</name>
<name>
<surname>Aly</surname> <given-names>S. H.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Family Myrtaceae: The treasure hidden in the complex/diverse composition</article-title>. <source>Crit. Rev. Food Sci. Nutr.</source>, <fpage>1</fpage>&#x2013;<lpage>19</lpage>. doi: <pub-id pub-id-type="doi">10.1080/10408398.2023.2173720</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shao</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Kingsford</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Accurate assembly of transcripts through phase-preserving graph decomposition</article-title>. <source>Nat. Biotechnol.</source> <volume>35</volume> (<issue>12</issue>), <fpage>1167</fpage>&#x2013;<lpage>1169</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nbt.4020</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shen</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Le</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>SeqKit: a cross-platform and ultrafast toolkit for FASTA/Q file manipulation</article-title>. <source>PloS One</source> <volume>11</volume> (<issue>10</issue>), <elocation-id>e0163962</elocation-id>. doi: <pub-id pub-id-type="doi">10.1371/journal.pone.0163962</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shen</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Ouyang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Su</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>E. urophylla&#xd7; E. grandis high-quality genome and comparative genomics provide insights on evolution and diversification of eucalyptus</article-title>. <source>BMC Genom.</source> <volume>24</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>10</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12864-023-09318-0</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shi</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Generic repeat finder: a high-sensitivity tool for genome-wide de novo repeat detection</article-title>. <source>Plant Physiol.</source> <volume>180</volume> (<issue>4</issue>), <fpage>1803</fpage>&#x2013;<lpage>1815</lpage>. doi: <pub-id pub-id-type="doi">10.1104/pp.19.00386</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sim&#xe3;o</surname> <given-names>F. A.</given-names>
</name>
<name>
<surname>Waterhouse</surname> <given-names>R. M.</given-names>
</name>
<name>
<surname>Ioannidis</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Kriventseva</surname> <given-names>E. V.</given-names>
</name>
<name>
<surname>Zdobnov</surname> <given-names>E. M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>BUSCO: assessing genome assembly and annotation completeness with single-copy orthologs</article-title>. <source>Bioinformatics</source> <volume>31</volume> (<issue>19</issue>), <fpage>3210</fpage>&#x2013;<lpage>3212</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btv351</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sobeh</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Braun</surname> <given-names>M. S.</given-names>
</name>
<name>
<surname>Krstin</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Youssef</surname> <given-names>F. S.</given-names>
</name>
<name>
<surname>Ashour</surname> <given-names>M. L.</given-names>
</name>
<name>
<surname>Wink</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Chemical profiling of the essential oils of Syzygium aqueum, Syzygium samarangense and Eugenia uniflora and their discrimination using chemometric analysis</article-title>. <source>Chem. Biodivers.</source> <volume>13</volume> (<issue>11</issue>), <fpage>1537</fpage>&#x2013;<lpage>1550</lpage>. doi: <pub-id pub-id-type="doi">10.1002/cbdv.201600089</pub-id>
</citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thrimawithana</surname> <given-names>A. H.</given-names>
</name>
<name>
<surname>Jones</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Hilario</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Grierson</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Ngo</surname> <given-names>H. M.</given-names>
</name>
<name>
<surname>Liachko</surname> <given-names>I.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>A whole genome assembly of Leptospermum scoparium (Myrtaceae) for m&#x101;nuka research</article-title>. <source>N. Z. J. Crop Hortic. Sci.</source> <volume>47</volume> (<issue>4</issue>), <fpage>233</fpage>&#x2013;<lpage>260</lpage>. doi: <pub-id pub-id-type="doi">10.1080/01140671.2019.1657911</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tuler</surname> <given-names>A. C.</given-names>
</name>
<name>
<surname>Carrijo</surname> <given-names>T. T.</given-names>
</name>
<name>
<surname>Peixoto</surname> <given-names>A. L.</given-names>
</name>
<name>
<surname>Garbin</surname> <given-names>M. L.</given-names>
</name>
<name>
<surname>da Silva Ferreira</surname> <given-names>M. F.</given-names>
</name>
<name>
<surname>Carvalho</surname> <given-names>C. R.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Diversification and geographical distribution of Psidium (Myrtaceae) species with distinct ploidy levels</article-title>. <source>Trees</source> <volume>33</volume> (<issue>4</issue>), <fpage>1101</fpage>&#x2013;<lpage>1110</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00468-019-01845-2</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Uddin</surname> <given-names>A. N.</given-names>
</name>
<name>
<surname>Hossain</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Reza</surname> <given-names>A. A.</given-names>
</name>
<name>
<surname>Nasrin</surname> <given-names>M. S.</given-names>
</name>
<name>
<surname>Alam</surname> <given-names>A. K.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Traditional uses, pharmacological activities, and phytochemical constituents of the genus Syzygium: A review</article-title>. <source>Food Sci. Nutr.</source> <volume>10</volume> (<issue>6</issue>), <fpage>1789</fpage>&#x2013;<lpage>1819</lpage>. doi: <pub-id pub-id-type="doi">10.1002/fsn3.2797</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Van Lingen</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>1991</year>). &#x201c;<article-title>Syzygium jambos (L.) Alston. Plant Resources of South-East Asia 2,&#x201d; in Edible fruits and nuts</article-title> (<publisher-loc>Pudoc, Wageningen</publisher-loc>: <publisher-name>Pudoc Scientific Publishers</publisher-name>), <fpage>296</fpage>&#x2013;<lpage>298</lpage>.</citation>
</ref>
<ref id="B66">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wambui Mbichi</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Q.-F.</given-names>
</name>
<name>
<surname>Wan</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>RNA directed DNA methylation and seed plant genome evolution</article-title>. <source>Plant Cell Rep.</source> <volume>39</volume>, <fpage>983</fpage>&#x2013;<lpage>996</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00299-020-02558-4</pub-id>
</citation>
</ref>
<ref id="B67">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Warren</surname> <given-names>R. L.</given-names>
</name>
<name>
<surname>Coombe</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Mohamadi</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Jaquish</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Isabel</surname> <given-names>N.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>ntEdit: scalable genome sequence polishing</article-title>. <source>Bioinformatics</source> <volume>35</volume> (<issue>21</issue>), <fpage>4430</fpage>&#x2013;<lpage>4432</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btz400</pub-id>
</citation>
</ref>
<ref id="B68">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wicker</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Sabot</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Hua-Van</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Bennetzen</surname> <given-names>J. L.</given-names>
</name>
<name>
<surname>Capy</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Chalhoub</surname> <given-names>B.</given-names>
</name>
<etal/>
</person-group>. (<year>2007</year>). <article-title>A unified classification system for eukaryotic transposable elements</article-title>. <source>Nat. Rev. Genet.</source> <volume>8</volume> (<issue>12</issue>), <fpage>973</fpage>&#x2013;<lpage>982</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nrg2165</pub-id>
</citation>
</ref>
<ref id="B69">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wilson</surname> <given-names>P. G.</given-names>
</name>
</person-group> (<year>2010</year>). &#x201c;<article-title>Myrtaceae</article-title>,&#x201d; in <source>Flowering Plants. Eudicots</source> (<publisher-loc>Berlin, Heidelberg</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>212</fpage>&#x2013;<lpage>271</lpage>.</citation>
</ref>
<ref id="B70">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>R.-G.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>G.-Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.-L.</given-names>
</name>
<name>
<surname>Dainat</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Z.-X.</given-names>
</name>
<name>
<surname>Ou</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>TEsorter: an accurate and fast method to classify LTR-retrotransposons in plant genomes</article-title>. <source>Hortic. Res.</source> <volume>9</volume>. doi: <pub-id pub-id-type="doi">10.1093/hr/uhac017</pub-id>
</citation>
</ref>
<ref id="B71">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Xiao</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>The chromosome-level Melaleuca alternifolia genome provides insights into the molecular mechanisms underlying terpenoids biosynthesis</article-title>. <source>Ind. Crops Prod.</source> <volume>189</volume>, <fpage>115819</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.indcrop.2022.115819</pub-id>
</citation>
</ref>
<ref id="B72">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname> <given-names>C.</given-names>
</name>
<name>
<surname>McCarthy</surname> <given-names>S. A.</given-names>
</name>
<name>
<surname>Durbin</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>YaHS: yet another Hi-C scaffolding tool</article-title>. <source>Bioinformatics</source> <volume>39</volume> (<issue>1</issue>). doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btac808</pub-id>
</citation>
</ref>
<ref id="B73">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname> <given-names>S.-S.</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>X.-M.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>K.-F.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Nie</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>A comprehensive annotation dataset of intact LTR retrotransposons of 300 plant genomes</article-title>. <source>Sci. Data</source> <volume>8</volume> (<issue>1</issue>), <fpage>174</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41597-021-00968-x</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>