<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2022.822942</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A High-Quality Reference Genome Sequence and Genetic Transformation System of <italic>Aralia elata</italic></article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Wenxuan</given-names>
</name>
<xref rid="aff1" ref-type="aff"><sup>1</sup></xref>
<xref rid="fn0001" ref-type="author-notes"><sup>&#x2020;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1571791/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Guo</surname>
<given-names>Wenhua</given-names>
</name>
<xref rid="aff2" ref-type="aff"><sup>2</sup></xref>
<xref rid="fn0001" ref-type="author-notes"><sup>&#x2020;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1598071/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Song</given-names>
</name>
<xref rid="aff1" ref-type="aff"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/826018/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xu</surname>
<given-names>Honghao</given-names>
</name>
<xref rid="aff2" ref-type="aff"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1574607/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhao</surname>
<given-names>Yue</given-names>
</name>
<xref rid="aff2" ref-type="aff"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1684755/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Su</given-names>
</name>
<xref rid="aff1" ref-type="aff"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/831568/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>You</surname>
<given-names>Xiangling</given-names>
</name>
<xref rid="aff2" ref-type="aff"><sup>2</sup></xref>
<xref rid="c001" ref-type="corresp"><sup>&#x002A;</sup></xref>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>State Key Laboratory of Tree Genetics and Breeding, Northeast Forestry University</institution>, <addr-line>Harbin</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>Key Laboratory of Saline-Alkali Vegetation Ecology Restoration, Ministry of Education, Northeast Forestry University</institution>, <addr-line>Harbin</addr-line>, <country>China</country></aff>
<author-notes>
<fn id="fn0002" fn-type="edited-by"><p>Edited by: Fang Du, Beijing Forestry University, China</p></fn>
<fn id="fn0003" fn-type="edited-by"><p>Reviewed by: Liangsheng Zhang, Zhejiang University, China; Sunil Kumar Sahu, Beijing Genomics Institute (BGI), China; Guangpeng Ren, Lanzhou University, China</p></fn>
<corresp id="c001">&#x002A;Correspondence: Xiangling You, <email>youxiangling@nefu.edu.cn</email></corresp>
<fn id="fn0001" fn-type="equal"><p><sup>&#x2020;</sup>These authors have contributed equally to this work</p></fn>
<fn id="fn0004" fn-type="other"><p>This article was submitted to Plant Bioinformatics, a section of the journal Frontiers in Plant Science</p></fn>
</author-notes>
<pub-date pub-type="epub">
<day>01</day>
<month>03</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>13</volume>
<elocation-id>822942</elocation-id>
<history>
<date date-type="received">
<day>26</day>
<month>11</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>14</day>
<month>02</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2022 Liu, Guo, Chen, Xu, Zhao, Chen and You.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Liu, Guo, Chen, Xu, Zhao, Chen and You</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p><italic>Aralia elata</italic> is a perennial woody plant of the genus <italic>Aralia</italic> in the family Araliaceae. It is rich in saponins and therefore has a wide range of pharmacological effects. Here, we report a high-quality reference genome of <italic>A. elata</italic>, with a genome size of 1.21&#x2009;Gb and a contig N50 of 51.34&#x2009;Mb, produced by PacBio HiFi sequencing technology. This is the first genome assembly for the genus <italic>Aralia</italic>. Through genome evolutionary analysis, we explored the phylogeny and whole genome duplication (WGD) events in the <italic>A. elata</italic> genome. The results indicated that a recent WGD event occurred in the <italic>A. elata</italic> genome. Estimation of the divergence times indicated that the WGD may be shared by Araliaceae. By analyzing the genome sequence of <italic>A. elata</italic> and combining the transcriptome data from three tissues, we discovered important genes related to triterpene saponins biosynthesis. Furthermore, based on the embryonic callus induction system of <italic>A. elata</italic> established in our laboratory, we set up the genetic transformation system of this plant. The genomic resources and genetic transformation system obtained in this study provide insights into <italic>A. elata</italic> and lays the foundation for further exploration of the <italic>A. elata</italic> regulatory mechanism.</p>
</abstract>
<kwd-group>
<kwd><italic>Aralia elata</italic></kwd>
<kwd>genome assembly</kwd>
<kwd>evolutionary analysis</kwd>
<kwd>terpenoid biosynthesis pathway</kwd>
<kwd>transgenic system</kwd>
</kwd-group>
<contract-num rid="cn1">30972390</contract-num>
<contract-num rid="cn2">2572018CL02</contract-num>
<contract-sponsor id="cn1">National Natural Science Foundation of China<named-content content-type="fundref-id">10.13039/501100001809</named-content></contract-sponsor>
<contract-sponsor id="cn2">Fundamental Research Funds for the Central Universities<named-content content-type="fundref-id">10.13039/501100012226</named-content></contract-sponsor>
<counts>
<fig-count count="6"/>
<table-count count="1"/>
<equation-count count="0"/>
<ref-count count="48"/>
<page-count count="11"/>
<word-count count="6997"/>
</counts>
</article-meta>
</front>
<body>
<sec id="sec1" sec-type="intro">
<title>Introduction</title>
<p><italic>Aralia elata</italic> (Miq.) Seem. (Araliaceae), also known as Chinese angelica-tree, is widely distributed in Northeast China (mainly Heilongjiang and Jilin province), Korea, Japan, Russia, the south of Far East, the south of Sakhalin, and Kuril Islands (<xref ref-type="bibr" rid="ref1">Ahn, 1998</xref>; <xref ref-type="bibr" rid="ref35">Reunov et al., 2007</xref>). It is one of the most desirable mountain wild vegetables in Asia benefitting from the rich nutrients in its young shoots (<xref ref-type="bibr" rid="ref7">Cheng et al., 2021</xref>). Furthermore, as a Chinese traditional medicinal plant, <italic>A. elata</italic> plays roles on rheumatism, diabetes, hepatitis, neurasthenia, and stomach spasms (<xref ref-type="bibr" rid="ref44">Zhang et al., 2018</xref>), especially the anti-tumor role (<xref ref-type="bibr" rid="ref14">Duan et al., 2019</xref>). Those medicinal potentials were depended on the bioactive components of saponins. More than 100 kinds of saponins belonging to triterpene saponins have been reported in <italic>A. elata</italic> and they are mainly oleanane-type saponins (<xref ref-type="bibr" rid="ref7">Cheng et al., 2021</xref>).</p>
<p>The triterpenoid biosynthesis is initiated from isopentenyl diphosphate (IPP) that is derived from the metabolism of cytosolic mevalonic acid (MVA) or the plastid methylerythritol phosphate (MEP; <xref ref-type="bibr" rid="ref37">Sawai and Saito, 2011</xref>). This biosynthesis process is catalyzed by a series of key enzymes. The enzymes before triterpenoid structural skeleton formation include farnesyl diphosphate (FPP) synthase (FPS), squalene synthase (SS), and squalene epoxidase (SE). Oxidosqualene cyclases (OSCs) catalyze oxidosqualene to different triterpenoid backbones. <italic>&#x03B2;-AeAS</italic> has been identified to encode the OSC in <italic>A. elata</italic> (<xref ref-type="bibr" rid="ref001">Wu, 2011</xref>). Subsequently, the key enzymes are cytochrome P450 monooxygenases (P450), which mediate oxidations. Uridine diphosphate-dependent glycosyl transferases (UDT) finally catalyze glycosylations to generate different triterpenoid saponins (<xref ref-type="bibr" rid="ref37">Sawai and Saito, 2011</xref>). Recent studies revealed that subfamilies of CYP450, such as CYP71, CYP72, CYP88, CYP93, CYP716, and CYP749 are extensively involved in the oxidative stress response (<xref ref-type="bibr" rid="ref22">Heitz et al., 2012</xref>) and the biosynthesis of triterpenes (<xref ref-type="bibr" rid="ref4">Carelli et al., 2011</xref>; <xref ref-type="bibr" rid="ref17">Fukushima et al., 2011</xref>; <xref ref-type="bibr" rid="ref21">Han et al., 2011</xref>), sterols, indole alkaloids (<xref ref-type="bibr" rid="ref25">Irmler et al., 2000</xref>; <xref ref-type="bibr" rid="ref9">Collu et al., 2001</xref>; <xref ref-type="bibr" rid="ref31">Nafisi et al., 2007</xref>), geraniol iridoid (<xref ref-type="bibr" rid="ref23">H&#x00F6;fer et al., 2013</xref>), etc.</p>
<p>In <italic>A. elata</italic>, genes that potentially encode these key enzymes, including <italic>AeFPS</italic> (<xref ref-type="bibr" rid="ref001">Wu, 2011</xref>), <italic>AeSS</italic> (<xref ref-type="bibr" rid="ref002">Cheng, 2011</xref>), <italic>AeSE</italic> (<xref ref-type="bibr" rid="ref003">Zhao, 2012</xref>), and <italic>&#x03B2;-AeAS</italic> (<xref ref-type="bibr" rid="ref001">Wu, 2011</xref>) have been cloned and investigated by real-time qRT-PCR. In addition, 254 members of P450 and 122 UGT families were identified by the RNA-sequencing analysis (<xref ref-type="bibr" rid="ref8">Cheng et al., 2020</xref>). But for the complex pathway of triterpenoid synthesis, the information of these key enzyme encoding genes is still limited due to the lack of a genome reference of this species.</p>
<p>With the rapid development of sequencing technology and reduction of sequencing cost, more and more plant genomes have been sequenced and published. The third-generation sequencing, especially the High Fidelity (HiFi) technology, has greatly reduced the cost and shortened the circle of genome sequencing. In this study, we sequenced, assembled, and annotated a high-quality genome of <italic>A. elata</italic> using HiFi data. This is the first genome of the genus <italic>Aralia</italic>. Using comparative genomics, we explored the evolutionary trajectory and whole genome duplication (WGD) events of <italic>A. elata</italic>. We also identified the key enzyme encoding genes involved in the triterpenoid biosynthesis pathway in the genome. The expressional patterns of these genes were preliminarily investigated. Genetic transformation is the most efficient way to further explore the functions of the annotated genes in <italic>A. elata</italic>. However, no transformation system has been established for this non-model plant species. We therefore established an <italic>Agrobacterium tumefaciens</italic> mediated genetic transformation system for <italic>A. elata</italic>, which laid a solid foundation for plant genetic engineering. The genomic resources of <italic>A. elata</italic> provided here will be valuable for biological and breeding research on <italic>Aralia</italic> species and will provide new tools for Araliaceae geneticists and breeders.</p>
</sec>
<sec id="sec2" sec-type="materials|methods">
<title>Materials and Methods</title>
<sec id="sec3">
<title>Plant Materials, DNA Extraction, and Library Construction</title>
<p>Fresh and healthy leaves of 2-month-old tissue culture plantlets of <italic>A. elata</italic> were harvested and immediately frozen in liquid nitrogen and preserved at &#x2212;80&#x00B0;C. The samples were then sent to the company (Annoroad Gene Technology, China) for DNA extraction. The quality and quantity of the isolated DNA were assessed using a NanoDrop 2000&#x0026;8000 spectrophotometer and a Qubit 2.0 Fluorometer, respectively. Illumina and PacBio libraries were constructed using the eligible DNA following the instruction for each technology, respectively.</p>
</sec>
<sec id="sec4">
<title>Genome Sequencing, Assembly, and Quality Assessment</title>
<p>We integrated Illumina HiSeq and PacBio HiFi sequencing data to achieve the complete genome sequence of <italic>A. elata</italic>. The Illumina library was sequenced on the Illumina HiSeq X Ten platform following standard Illumina protocols. After filtering out adapter sequences, low-quality reads, and duplicated reads, the clean reads were used to investigate the genomic features including genome size and heterozygosity by <italic>k</italic>-mer distribution analysis using GenomeScope (<xref ref-type="bibr" rid="ref41">Vurture et al., 2017</xref>). Two libraries were constructed for PacBio HiFi sequencing. The subreads generated from the PacBio libraries were assembled into contigs using hifiasm with the default parameters (<xref ref-type="bibr" rid="ref6">Cheng et al., 2008</xref>). The Illumina sequencing reads were aligned to the genome assembly using BWA (<xref ref-type="bibr" rid="ref29">Li, 2013</xref>) to assess its completeness. Benchmarking Universal Single-Copy Orthologs (BUSCO) was also used to assess the quality of the final genome assembly (<xref ref-type="bibr" rid="ref38">Sim&#x00E3;o et al., 2015</xref>).</p>
</sec>
<sec id="sec5">
<title>Genome Annotation</title>
<p>The <italic>A. elata</italic> genome was annotated by the integration of multiple strategies including <italic>de novo</italic>, homology-based, and transcriptome-based predictions. Repeat Masker and Repeat Modeler were used to identify the repetitive sequences in the genome based on repeat sequence database. Augustus was used for <italic>de novo</italic> prediction of protein coding genes based on the repeat masked genome. For similarity-based gene prediction, eight species including <italic>Arabidopsis thaliana</italic>, <italic>Oryza sativa</italic>, <italic>Daucus carota</italic>, <italic>Populus trichocarpa</italic>, <italic>Apium graveolens</italic>, <italic>Vitis vinifera</italic>, <italic>Panax notoginseng</italic>, and <italic>Coriandrum sativum</italic> were selected, and the protein sequences of these species were downloaded from Phytozome.<xref rid="fn0005" ref-type="fn"><sup>1</sup></xref> Annotation of coding genes in the genome was subsequently performed using these homologous proteins. BLAST with identity&#x2009;&#x2265;&#x2009;0.95 and coverage&#x2009;&#x2265;&#x2009;0.90 as thresholds was used to identify genes with significant similarity in the <italic>A. elata</italic> genome. To carry out the RNA-Seq aided gene prediction, we downloaded the transcriptome data of <italic>A. elata</italic> from NCBI SRA database (BioProject: PRJNA555256). The clean reads were assembled into transcripts using Trinity (<xref ref-type="bibr" rid="ref19">Haas et al., 2013</xref>), which were aligned against the genome assembly for gene structure prediction using Program to Assemble Spliced Alignments (PASA; <xref ref-type="bibr" rid="ref20">Haas et al., 2008</xref>). The gene sets predicted by the various strategies were integrated into a non-redundant and more complete gene set by Evidence Modeler (EVM; <xref ref-type="bibr" rid="ref20">Haas et al., 2008</xref>). BUSCO was used to evaluate the integrity and completeness of the predicted gene set.</p>
</sec>
<sec id="sec6">
<title>Analysis of Genomic Evolution and WGD Events</title>
<p>We used OrthoFinder to identify the orthologous groups in 12 species: five species from Apiales including <italic>A. elata</italic>, <italic>Panax notoginseng</italic>, <italic>Daucus carota</italic>, <italic>Apium graveolens</italic>, and <italic>Coriandrum sativum</italic>, two species from Asterales including <italic>Lactuca sativa</italic> and <italic>Taraxacum mongolicum</italic>, one species from Tubiflorae (<italic>Capsicum annuum</italic>), three other dicot species including <italic>Arabidopsis thaliana</italic>, <italic>Carica papaya</italic> and <italic>Populus trichocarpa</italic>, and one monocot <italic>Oryza sativa</italic>, which was used as the outgroup (<xref ref-type="bibr" rid="ref18">Guo et al., 2021</xref>). MUSCLE was used for multiple sequence alignment for each single-copy orthologous group identified by OrthoFinder (<xref ref-type="bibr" rid="ref15">Emms and Kelly, 2019</xref>). All the alignment blocks were then manually concatenated, and substitution model for each alignment block was estimated using ModelTest-NG (<xref ref-type="bibr" rid="ref11">Darriba et al., 2020</xref>) program. The results were subsequently used to construct a phylogenetic tree using maximum-likelihood algorithm. Divergence times of these species in the phylogenetic tree were estimated with MCMCtree (v4.0) using the Bayesian Relaxed Molecular Clock (BRMC) approach (<xref ref-type="bibr" rid="ref42">Yang, 2007</xref>). The parameters of MCMCtree were set as follows: burn-in&#x2009;=&#x2009;2,000; sample-frequency&#x2009;=&#x2009;10; and sample-number&#x2009;=&#x2009;20,000. <italic>Oryza sativa</italic> was designated as an outgroup of the phylogenetic tree. The calibration times of each divergent nodes were obtained from the TimeTree website (<xref ref-type="bibr" rid="ref28">Kumar et al., 2017</xref>). Gene family amplification and contraction was analyzed by CAF&#x00C9; using the phylogenetic tree and gene numbers in each orthogroup (<xref ref-type="bibr" rid="ref12">De Bie et al., 2006</xref>).</p>
</sec>
<sec id="sec7">
<title>Identification and Tissue Specific Expression of Genes Involved in Triterpene Saponins Biosynthesis</title>
<p>BLASTP, with E-value of 1e&#x2212;5 as a threshold, was used to identify candidate enzymes that catalyze triterpene saponins biosynthesis. The NCBI Conserved Domain Database (<xref ref-type="bibr" rid="ref30">Marchler-Bauer et al., 2010</xref>) was used to scan conserved domains in these candidates. Only the protein sequences containing canonical domains were identified as authentic enzymes. IQ-TREE (<xref ref-type="bibr" rid="ref32">Nguyen et al., 2015</xref>) was used to construct phylogenetic trees for these protein sequences. The expressional profiles of genes encoding these enzymes were investigated using RNA-seq data retrieved from public databases (PRJNA555256). The gene expression analysis was performed using the nf-core/rnaseq v3.2 (<xref ref-type="bibr" rid="ref16">Ewels et al., 2020</xref>) pipeline in nextflow v21.04.1 (<xref ref-type="bibr" rid="ref13">Di Tommaso et al., 2017</xref>). The sequencing reads were mapped to the reference genome using Spliced Transcripts Alignments to a Reference V2.7.6a (STAR) as an aligner. Gene expression levels were then determined by using RNA-Seq by Expectation-Maximization v1.3.1 (RSEM). Trimmed mean of M value (TMM; <xref ref-type="bibr" rid="ref36">Robinson and Oshlack, 2010</xref>) method was used to normalize and measure the expression levels of these samples.</p>
</sec>
<sec id="sec8">
<title>Establishment of <italic>Agrobacterium</italic> Mediated Genetic Transformation for <italic>Aralia elata</italic></title>
<p>The system of vegetable propagation of <italic>A. elata</italic> was built in our lab (<xref ref-type="bibr" rid="ref10">Dai et al., 2011</xref>). The somatic embryogenic callus was induced from the roots of the somatic embryo plants in the induction medium (1/2 SH medium with 3.0&#x2009;mg/L of IBA and 0.2&#x2009;mg/L of KT) for 3&#x2009;weeks. When the callus was transformed to the re-differentiation medium: 1/2 SH with 1.0&#x2009;mg/L IBA and 0.2&#x2009;mg/L KT, after 6&#x2009;weeks, lots of somatic embryo or plants emerged.</p>
<p>The roots of the above somatic embryo plants were used for <italic>Agrobacterium tumefaciens</italic> mediated genetic transformation. After 3&#x2009;days of pre-culture, the roots were infected by <italic>A. tumefaciens</italic> for 5, 10, and 15&#x2009;min, respectively and then co-cultured in medium for 3&#x2009;days. Next, they were transformed to the selection medium with 50&#x2009;mg/L kanamycin and 200&#x2009;mg/L timentin. Then after 8&#x2009;weeks, the calli were checked by PCR using the primers 5&#x2032;-CGC ACA ATC CCA CTA TCC TT-3&#x2032;, and 5&#x2032;-AAG ACC GGC AAC AGG ATT C-3&#x2032; to choose the callus line of gene transformation. The positive callus lines were transformed into above plant-medium.</p>
</sec>
</sec>
<sec id="sec9" sec-type="results">
<title>Results</title>
<sec id="sec10">
<title>Genome Sequencing, Assembly, and Annotation</title>
<p>To investigate the genomic features of <italic>A. elata</italic>, 17, 21, 25, and 27 <italic>K</italic>-mer distribution analysis was performed (<xref rid="fig1" ref-type="fig">Figure 1A</xref>; <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 1</xref>), respectively, using 56.89&#x2009;Gb of the Illumina reads. The Illumina reads representing 50.79&#x00D7; coverage based on the estimated genome size of 1.12&#x2009;Gb (<italic>K</italic>-mer analysis; <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 1</xref>). The <italic>K</italic>-mer distributions followed a Poisson distribution, with two peaks corresponding to homozygous and heterozygous sequences, respectively (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 1</xref>). According to the <italic>K</italic>-mer distribution analysis, the genome size of <italic>A. elata</italic> was estimated as 1.08&#x2013;1.14&#x2009;Gb and the heterozygosity ratio of the genome was estimated as 1.60&#x2013;1.69% (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 2</xref>). The results indicated that the genome of <italic>A. elata</italic> is highly heterozygous and repetitive. We then used HiFi technologies to sequence the <italic>A. elata</italic> genome. A total of 51.14&#x2009;Gb of HiFi reads from two libraries were obtained for the genome assembly. A total of 25.75 and 25.39&#x2009;Gb data were generated from the two libraries, respectively (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 3</xref>). The HiFi reads were assembled into contigs using hifiasm. The final assembled genome was 1.21&#x2009;Gb in size with a contig N50 length of 51.34&#x2009;Mb. The genome assembly contained 1,350 contigs, the longest contig was 100.88&#x2009;Mb, and the average contig length was 0.89&#x2009;Mb. The GC content of the <italic>A. elata</italic> genome is 36.13% (<xref rid="tab1" ref-type="table">Table 1</xref>).</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption><p><bold>(A)</bold> 17-mer analysis for estimating the genome size of <italic>Aralia elata</italic>. <bold>(B)</bold> Assessment the gene coverage rate of genome assembly and annotation using Benchmarking Universal Single-Copy Orthologs (BUSCO).</p></caption>
<graphic xlink:href="fpls-13-822942-g001.tif"/>
</fig>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption><p>Assembly statistics of the <italic>Aralia elata</italic> genome.</p></caption>
<table frame="hsides" rules="groups">
<tbody>
<tr>
<th align="left" valign="top">N50 contig size (bp)</th>
<th align="center" valign="top">51,342,398</th>
</tr>
<tr>
<td align="left" valign="middle">L50 contig number</td>
<td align="center" valign="middle">8</td>
</tr>
<tr>
<td align="left" valign="middle">N75 contig size (bp)</td>
<td align="center" valign="middle">38,678,071</td>
</tr>
<tr>
<td align="left" valign="middle">L75 contig number</td>
<td align="center" valign="middle">15</td>
</tr>
<tr>
<td align="left" valign="middle">N90 contig size (bp)</td>
<td align="center" valign="middle">17,066,756</td>
</tr>
<tr>
<td align="left" valign="middle">L90 contig number</td>
<td align="center" valign="middle">22</td>
</tr>
<tr>
<td align="left" valign="middle">Longest contig (bp)</td>
<td align="center" valign="middle">100,882,612</td>
</tr>
<tr>
<td align="left" valign="middle">Shortest contig (bp)</td>
<td align="center" valign="middle">10,694</td>
</tr>
<tr>
<td align="left" valign="middle">Average contig (bp)</td>
<td align="center" valign="middle">893,717</td>
</tr>
<tr>
<td align="left" valign="middle">Total length (bp)</td>
<td align="center" valign="middle">1,206,518,707</td>
</tr>
<tr>
<td align="left" valign="middle">Total N length (bp)</td>
<td align="center" valign="middle">0</td>
</tr>
<tr>
<td align="left" valign="middle">Number of contigs</td>
<td align="center" valign="middle">1,350</td>
</tr>
<tr>
<td align="left" valign="middle">GC content (%)</td>
<td align="center" valign="middle">36.13</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To evaluate the completeness of the genome assembly, short reads generated for the genomic survey were mapped to the genome. In total, 99.95% of the short reads were mapped to the contigs, 99.48% of which were properly pair-end mapped (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 4</xref>). The completeness of the genome assembly was also evaluated by BUSCO. The result revealed that the genome covered at least 98.8% of the BUSCO genes, 87.2% of which were classified as &#x201C;complete and single-copy,&#x201D; 11.6% as &#x201C;complete and duplicated,&#x201D; 0.6% as &#x201C;fragmented,&#x201D; and 0.6% as &#x201C;missing&#x201D; (<xref rid="fig1" ref-type="fig">Figure 1B</xref>; <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 5</xref>). All the results suggested a high quality of the <italic>A. elata</italic> genome assembly.</p>
<p>Repetitive sequences, including tandem repeat and interspersed repeats, are important parts of genomes. In this study, two strategies, <italic>de novo</italic> prediction and homology-based identification, were used to annotate the repetitive sequences in the <italic>A. elata</italic> genome. According to the integrated results obtained above, the proportion of repetitive sequences in the genome was 71.69%, which was higher than carrot (45.95%; <xref ref-type="bibr" rid="ref24">Iorizzo et al., 2016</xref>). The most abundant type of repetitive elements was long terminal repeat (LTR), which accounted for 49.15% of the genome, while DNA transposon repetitive sequences accounted for only 3.86% of the genome (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 6</xref>).</p>
<p>To annotate the protein coding genes in the <italic>A. elata</italic> genome, we used a combination of <italic>ab initio</italic> prediction, homology-based search, and transcript evidence from RNA-seq data. Finally, a total of 37,016 genes were annotated in the genome. We evaluated the completeness and quality of the annotated proteome through BUSCO using Embryophyta_odb10 as database. The results indicated that 97.7% of the conserved genes were annotated in the genome, which included 93.7 and 4.0% complete and fragmented BUSCO genes, respectively. The BUSCO assessment indicated that the annotation of genome was of high accuracy (<xref rid="fig1" ref-type="fig">Figure 1B</xref>; <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 7</xref>).</p>
</sec>
<sec id="sec11">
<title>Genome Evolution of <italic>Aralia elata</italic></title>
<p>In order to reveal the evolutionary position of <italic>A. elata</italic>, we compared the genome assembly with genomes from 11 other plants. A total of 250 single-copy gene families were identified among these species by OrthoFinder. These single-copy genes were used to construct a phylogenetic tree using a maximum likelihood method. Consistent with Angiosperm Phylogeny Group, <italic>A. elata</italic> was closed to <italic>P. notoginseng</italic>, another Araliaceae species and these two species were classed into a clade. This clade was most closely related to the species from Apiales family (<xref rid="fig2" ref-type="fig">Figure 2A</xref>). The divergent times of these species were then estimated based on the phylogenetic tree. We estimated that <italic>A. elata</italic> and <italic>P. notoginseng</italic> diverged from Apiaceae at approximately 80.1 million years ago (mya). <italic>Aralia elata</italic> and <italic>P. notoginseng</italic> subsequently diverged into two species at around 24.2 mya. These results showed that the relationship between <italic>A. elata</italic> and <italic>P. notoginseng</italic> is very close. In addition, we performed a comparative analysis of gene family evolution in the phylogenetic tree. A total of 1,925 gene families were expanded in the <italic>A. elata</italic> lineage, whereas 1,832 gene families had undergone contraction (<xref rid="fig2" ref-type="fig">Figure 2A</xref>).</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption><p><bold>(A)</bold> Inferred phylogenetic tree of <italic>Aralia elata</italic> and 11 plant species based on protein sequences of single-copy orthologous genes. Numbers at each node represent the estimated divergence time of each node in million years ago (mya). Gene family expansions are indicated in red, and gene family contractions are indicated in green. <bold>(B)</bold> Ks distribution of paralogous gene pairs in the <italic>A. elata</italic>, <italic>Panax notoginseng</italic>, and <italic>Panax ginseng</italic> genome. The probability density of Ks was estimated using the &#x201C;density&#x201D; function in the R language. <bold>(C)</bold> Distribution diagram of 4DTv values. The dark black-filled part indicates the 4DTv analysis inside <italic>A. elata</italic>, and the peaks marked by the dotted line indicate where the two whole genome duplication (WGD) events of <italic>A. elata</italic> occurred. <bold>(D)</bold> Collinear analysis among <italic>Daucus carota</italic>, <italic>A. elata</italic>, and <italic>Vitis vinifera</italic> genome. The blue lines in the genomes of <italic>A. elata</italic> and <italic>V. vinifera</italic> indicate that the 2:1 correspondence between the two collinear regions. <bold>(E)</bold> Venn diagram showing the cluster distribution of shared gene families among <italic>A. elata</italic>, <italic>C. sativum</italic>, <italic>P. notoginseng</italic>, <italic>D. carota</italic>, and <italic>A. thaliana</italic>.</p></caption>
<graphic xlink:href="fpls-13-822942-g002.tif"/>
</fig>
<p>The gene family analysis among these species revealed that the 33,499 genes in the <italic>A. elata</italic> genome were clustered into 15,637 gene families with an average size of 2.14. The members in the gene families varied greatly, and the largest family contained 277 genes. We then investigated the specific and shared gene families among the species of <italic>A. elata</italic>, <italic>C. sativum</italic>, <italic>P. notoginseng</italic>, <italic>A. thaliana</italic>, and <italic>D. carota</italic>. The results indicated that 10,021 gene families were observed in all the investigated species, and 1,031 gene families appeared to be lineage specific to <italic>A. elata</italic> (<xref rid="fig2" ref-type="fig">Figure 2E</xref>).</p>
<p>Whole genome duplication occurs widely in flowering plants and plays important roles in genome evolution, the formation of new species, and gene neofunctionalization (<xref ref-type="bibr" rid="ref33">Piegu et al., 2006</xref>; <xref ref-type="bibr" rid="ref40">Van de Peer et al., 2009</xref>). The previous results indicated that two species in Araliaceae, <italic>P. notoginseng</italic> and <italic>Panax ginseng</italic>, have experienced one and two recent WGD events, respectively (<xref ref-type="bibr" rid="ref27">Kim et al., 2018</xref>; <xref ref-type="bibr" rid="ref26">Jiang et al., 2021</xref>). To further explore the evolutional trajectory of <italic>A. elata</italic>, we investigated the WGD events in its genome. The protein sequences from the <italic>A. elata</italic> genome were searched against themselves using BLASTP (E&#x2009;&#x003C;&#x2009;1e&#x2212;5) to identify homologous gene pairs (<xref ref-type="bibr" rid="ref3">Camacho et al., 2009</xref>). We calculated the 4DTv (4-fold degenerate synonymous sites of the third codons) for the optimal gene pairs and plotted the distribution of the 4DTv values (<xref rid="fig2" ref-type="fig">Figure 2C</xref>). Two peaks were observed at approximately 0.12 and 0.50, respectively. The right peak at approximately 0.50 revealed the core eudicot gamma triplication event. The left peak at approximately 0.12 indicated that <italic>A. elata</italic> underwent a recent WGD event. We then investigated the syntenic blocks between <italic>V. vinifera</italic> and <italic>A. elata</italic> using McscanX to further confirm the WGD event in <italic>A. elata</italic>, because <italic>V. vinifera</italic> does not undergo any recent WGDs. A 2:1 syntenic relationship between <italic>A. elata</italic> and <italic>V. vinifera</italic> (<xref rid="fig2" ref-type="fig">Figure 2D</xref>) was observed, which confirmed the recent WGD event occurred in the <italic>A. elata</italic> genome.</p>
<p><italic>Ks</italic> (synonymous substitution rate) values can be used to estimate the timing of large-scale duplications (<xref ref-type="bibr" rid="ref2">Blanc and Wolfe, 2004</xref>). We calculated the <italic>Ks</italic> values of the gene pairs and plotted the distributions to estimate the occurrence time of the WGD events of <italic>A. elata</italic>, <italic>P. notoginseng</italic>, and <italic>P.ginseng</italic>, respectively (<xref rid="fig2" ref-type="fig">Figure 2B</xref>; <xref ref-type="bibr" rid="ref5">Chen et al., 2020</xref>). Two peaks were observed in the <italic>Ks</italic> distributions of the <italic>P. notoginseng</italic> and <italic>A. elata</italic> genomes, whereas the <italic>P.ginseng</italic> genome contained three <italic>Ks</italic> peaks. The <italic>Ks</italic> distribution result of <italic>A. elata</italic> was consistent with the 4DTv values. The main peak at approximately 0.38 indicated that a recent WGD event occurred in the <italic>A. elata</italic> genome. Similar <italic>Ks</italic> peaks around 0.38 were also found in the <italic>P. notoginseng</italic> and <italic>P.ginseng</italic> genomes, which indicated that the recent WGD event may be shared by <italic>A. elata</italic>, <italic>P. notoginseng</italic>, and <italic>P.ginseng</italic>. Then we calculated the occurring time of the WGD event of <italic>A. elata</italic> according to the method reported (<xref ref-type="bibr" rid="ref34">Qin et al., 2014</xref>). The WGD event was estimated to occur at approximately 29.6 mya in the <italic>A. elata</italic> genome. Because the divergence time of <italic>A. elata</italic> and <italic>P. notoginseng</italic> was estimated to be 24.2 mya, this WGD event may occur before the differentiation of the two species. This is consistent with the published result (<xref ref-type="bibr" rid="ref26">Jiang et al., 2021</xref>). All the results above indicated that unlike <italic>P. ginseng</italic>, who experienced an extra genus specific WGD, <italic>A. elata</italic> and <italic>P. notoginseng</italic> genome experienced only one recent WGD (<xref ref-type="bibr" rid="ref27">Kim et al., 2018</xref>). In addition, this WGD may be shared by species in Araliaceae.</p>
</sec>
<sec id="sec12">
<title>Analysis of Key Enzyme Encoding Genes Involved in Triterpene Saponins Biosynthesis</title>
<p>The biosynthesis pathways of terpenoids in plants have been comprehensively explained. The research on <italic>Aralia</italic> Linn. plants have attracted extensive interest from researchers. By integrating sequence similarity, conserved domain, and phylogenetic relationship results (<xref rid="fig3" ref-type="fig">Figure 3</xref>; <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 2</xref>; <xref ref-type="supplementary-material" rid="SM2">Supplementary Table 8</xref>), we identified 22 candidate genes encoding the enzymes that may catalyze the biosynthesis processes of terpenoids in the <italic>A. elata</italic> genome (<xref rid="fig4" ref-type="fig">Figure 4</xref>). We used transcriptome sequencing data of <italic>A. elata</italic> downloaded from public database to investigate the expressional profiles of these genes. The RNA-seq reads were aligned to the genome assembly and obtained their expression levels in roots, stems, and leaves. <xref rid="fig4" ref-type="fig">Figure 4</xref> illustrates the normalized expressional levels of these enzyme-coding genes in each tissue. The results indicated that many genes appeared to be expressed in tissue-specific manners. For example, genes encoding CYP450s are abundant in stems and roots.</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption><p>Phylogenetic analysis of P450 genes. Genes that putatively encode Cytochromes P450 were identified by the integration of sequence similarities, conserved domains, and the phylogeny. The colored clades in the phylogenetic tree include genes encoding the subfamilies of CYP72A and CYP716A, which are the key enzymes of P450 participated in the biosynthetic pathway of triterpene saponins. Genes marked in purple are reported to catalyze the synthesis of triterpene saponins in other species.</p></caption>
<graphic xlink:href="fpls-13-822942-g003.tif"/>
</fig>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption><p>Overview of the saponin biosynthetic pathway in <italic>Aralia elata</italic> and expression profiles of key enzyme encoding genes. The histogram shows the expression levels of different genes in different tissues. Green indicates the amount of expression in the leaves, red indicates the amount of expression in the stem, and blue indicates the amount of expression in the root. The most genes of enzymes involved in the saponin biosynthesis pathway were highly expressed in roots higher than in leaves and stems.</p></caption>
<graphic xlink:href="fpls-13-822942-g004.tif"/>
</fig>
<p>Previous studies have shown that CYP72A and CYP716A subfamily members are the main CYP450s involved in the biosynthesis of pentacyclic triterpene saponins. Therefore, we pay special attention to the four CYP716a and three CYP72a coding genes identified in the <italic>A. elata</italic> genome. At the same time, 12 genes related to the terpene skeleton and triterpene biosynthesis pathway were identified, including MVA pathway and 2,3-oxysqualene biosynthesis pathway. Among them, six enzymes (AACT, HMGS, HMGR, MK, PMK, and MVD) were associated with the MVA pathway.</p>
<p>In the MVA pathway, Acetyl-CoA is synthesized into Acetoacetyl-CoA, which is catalyzed by the AACT enzyme (encoded by <italic>Arel.002085</italic>). The expression level of <italic>Arel.002085</italic> in leaves and roots are slightly higher than that in stems. Acetoacetyl-CoA is synthesized into 3-Hydroxy-3-methylglutaryl-CoA, which is catalyzed by HMGS enzyme (encoded by <italic>Arel.020097</italic>). HMGR enzyme (encoded by <italic>Arel.004178</italic>) subsequently catalyzes the synthesis of Mevalonic acid, and then MK enzyme (encoded by <italic>Arel.022041</italic>) is used to catalyze the synthesis of Mevalonic acid-5P. The expressional levels of <italic>Arel.020097</italic>, <italic>Arel.004178</italic>, and <italic>Arel.022041</italic> in roots were higher than those in stems and leaves, indicating that the biosynthetic reaction mainly occurred in the roots of <italic>A. elata</italic>. Then, under the catalysis of PMK enzyme (encoded by <italic>Arel.027136</italic>), Mevalonic acid-5-pyrophosphate was generated, and then Isopentenyl pyrophosphate is catalyzed by MVD enzyme (encoded by <italic>Arel.003662</italic>), and then catalyzed by IPPI enzyme (encoded by <italic>Arel.030181</italic>) to form Dimethylallyl pyrophosphate, further condensation of Isopentenyl pyrophosphate and Dimethylallyl pyrophosphate to form various terpenoids. Except for <italic>Arel.003662</italic>, whose expression levels in leaves and roots are slightly higher than those in stems, the expression levels of <italic>Arel.027136</italic> and <italic>Arel.030181</italic> in stems are slightly higher than those in leaves and roots. The results indicated that these biosynthetic reactions in this part may occur in the stems.</p>
<p>After that, Isopentenyl pyrophosphate and Dimethylallyl pyrophosphate is catalyzed by GPS enzyme (encoded by <italic>Arel.001014</italic>) to produce Geranyl diphosphate. FPS enzyme (encoded by <italic>Arel.030122</italic>) then catalyzes Geranyl diphosphate into Farnesyl pyrophosphate. SS enzyme (encoded by <italic>Arel.022914</italic>) catalyzes Farnesyl pyrophosphate into Squalene. Finally, 2,3-oxidosqualene is synthesized by the catalysis of SE enzyme (encoded by <italic>Arel.013186</italic>). In the synthetic pathway, <italic>Arel.001014</italic> and <italic>Arel.030122</italic> genes are expressed at higher levels in leaves and stems, while <italic>Arel.002914</italic> and <italic>Arel.013186</italic> genes are expressed at higher levels in stems and roots. Based on these results, it is speculated that the key triterpene skeleton biosynthesis reaction mainly occurs in the stem.</p>
<p>Finally, 2,3-oxidosqualene is catalyzed by <italic>&#x03B2;</italic>-AS (encoded by <italic>Arel.032339</italic>, <italic>Arel.032335</italic>, and <italic>Arel.020446</italic>) to form <italic>&#x03B2;</italic>-amyrin, and then oleanolic acid is formed under the catalysis of CYP716A subfamily members (including CYP716A244, CYP716A12, CYP716A179, and CYP716A94), and then Hederagenin is formed under the catalysis of CYP72A subfamily members (including CYP72A68, CYP72A68v2, and CYP72A397). Among them, genes encoding CYP72A and CYP716A subfamily members have higher expression levels in roots and stems than in leaves.</p>
<p>Based on the expression levels of these genes, we explored the secondary metabolism in <italic>A. elata</italic> plants at the spatial levels. We compared the expressional patterns of these genes in different tissues. We found that most of the genes involved in the saponin biosynthesis were specifically expressed in roots, and a few were highly expressed in leaves and stems (<xref rid="fig5" ref-type="fig">Figure 5</xref>).</p>
<fig position="float" id="fig5">
<label>Figure 5</label>
<caption><p>Heatmap of gene transcript abundance in the saponin biosynthetic pathway of key enzyme genes at three different parts in <italic>Aralia elata</italic>. RPKM values are log2-based. Yellow and blue indicate high and low expression levels, respectively.</p></caption>
<graphic xlink:href="fpls-13-822942-g005.tif"/>
</fig>
</sec>
<sec id="sec13">
<title>Establishment of the <italic>Agrobacterium</italic> Mediated Transformation System for <italic>Aralia elata</italic></title>
<p>Biotechnology is an efficient way to increase the contents of secondary metabolites in plants. The annotation of <italic>A. elata</italic> genome will provide many candidate genes for the generation of genetically modified <italic>A. elata</italic> plants. However, it is still difficult for genetic transformation in <italic>A. elata</italic>. Based on the embryonic callus induction system of <italic>A. elata</italic> established in our laboratory, we set up the genetic transformation system of this plant. Roots of well-grown tissue culture seedlings were used as explants for <italic>A. tumefacien</italic> infestation, and the roots were precultured, co-cultured, and selection cultured (kanamycin resistance) to obtain resistant callus. DNA extracted from the resistant callus was examined by PCR. As shown in <xref rid="fig6" ref-type="fig">Figure 6</xref>, the target fragment was successfully detected in the positive transgenic plants and found to be better transformed at 10&#x2009;min of infection time. We transferred the transgenic callus to differentiation medium to obtain somatic embryonic seedlings. Next, the somatic embryo seedlings were transferred to WPM medium containing 20&#x2009;g/L sucrose and cultured under 16&#x2009;h light and 8&#x2009;h dark conditions for 4&#x2009;weeks, and then the plants were moved into soil and cultured in a greenhouse for 2&#x2009;months, as shown in <xref rid="fig6" ref-type="fig">Figure 6</xref>, the transgenic plants grew well, and finally we obtained transgenic plants.</p>
<fig position="float" id="fig6">
<label>Figure 6</label>
<caption><p>Establishment systems of the propagation and gene information of <italic>Aralia elata</italic>. <bold>(A)</bold> The induction of the somatic embryogenic callus from the root in the 1/2SH medium with 3.0&#x2009;mg/L of IBA and 0.2&#x2009;mg/L of KT for 3-week culture. <bold>(B)</bold> The somatic embryogenesis from that callus in the 1/2SH medium with 1.0&#x2009;mg/L of IBA and 0.2&#x2009;mg/L of KT for 3-week culture. <bold>(C)</bold> Introduction of the exogenous genes in the plant. P, positive control; The WT was not transformed with <italic>Agrobacterium</italic>; 1&#x2013;5, the transgenic plant with target gene; CK<sup>&#x2212;</sup>, negative control. <bold>(D)</bold> The somatic embryo plant of gene transformation.</p></caption>
<graphic xlink:href="fpls-13-822942-g006.tif"/>
</fig>
</sec>
</sec>
<sec id="sec14" sec-type="discussions">
<title>Discussion</title>
<p><italic>Aralia elata</italic> is one of the most widely used Chinese medicinal plants from the family Aralialeae and is well known in China and worldwide for its good efficacy. Triterpenoid saponins are widely existed in Aralialeae and are the most studied active ingredients of <italic>A. elata</italic>. Most of the aglycones are oleanolic acid, ivy, and their derivatives. <xref ref-type="bibr" rid="ref004">Kochetkoy (1963)</xref> reported its chemical composition for the first time and obtained three saponins. The research on saponins of <italic>A. elata</italic> has become a hot topic, and many studies have reported its chemical components. Up to now, more than 100 saponins have been isolated and identified from <italic>A. elata</italic>. However, the complete biosynthetic pathway of saponins of <italic>A. elata</italic> has not been determined and further research is needed. Here, we briefly analyzed the terpenoid biosynthesis pathway of <italic>A. elata</italic>, to provide a reference for follow-up research. The contents of triterpenoid saponins in <italic>A. elata</italic> could be increased by genetically modification of the candidate genes involved in this pathway. The annotated genome and the genetic transformation system established in this study would be used for the further functional genome analysis in this species.</p>
<p>In the family Aralialeae, the genomes of some species including <italic>Eleuthorcoccus senticusus</italic> (<xref ref-type="bibr" rid="ref43">Yang et al., 2021</xref>), <italic>P. ginseng</italic> (<xref ref-type="bibr" rid="ref27">Kim et al., 2018</xref>), and <italic>P. notoginseng</italic> (<xref ref-type="bibr" rid="ref26">Jiang et al., 2021</xref>) have been reported. The high-quality genomic analysis of <italic>A. elata</italic> will provide a valuable extensive information for studying the evolutionary landscape of other species in Araliaceae. Gene mining of high-quality genomic and transcriptomic data can provide resources for further exploration of plant growth and secondary metabolism mechanisms (<xref ref-type="bibr" rid="ref39">Tu et al., 2020</xref>). So, we produced the first high-quality genome reference for <italic>A. elata</italic> with the latest sequencing technologies and bioinformatics methods. The size of the assembled genome is very close to the predicted result of <italic>K</italic>-mer, reflecting no obvious expansion or collapse occurred during the assembly process. Benefit from the long lengths and high accuracy of HiFi reads, the continuity and completeness of the <italic>A. elata</italic> genome obtained in this study are at a high-quality level. The evolutional process of the genome was studied based on the genome. Our results combined with the published genomes revealed the WGD trajectory in Araliaceae. A recent WGD event occurred before the divergence of species in Araliaceae.</p>
<p>In conclusion, the high-quality <italic>A. elata</italic> genome sequence described in this article, combined with comparative genome analysis, identification and tissue species expression analysis of putative genes involved in saponins biosynthesis, and the establishment of an efficient genetic transformation system of <italic>A. elata</italic> will contribute to <italic>A. elata</italic> breeding and cultivation.</p>
</sec>
<sec id="sec15" sec-type="data-availability">
<title>Data Availability Statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found in the article/<xref rid="sec18" ref-type="sec"><bold>Supplementary Material</bold></xref>.</p>
</sec>
<sec id="sec16">
<title>Author Contributions</title>
<p>XY conceived the project. XY and SuC designed the experiments. WL, SoC, and WG performed most of the experiments and analyzed the data. The other authors assisted in the experiments and discussed the results. XY, SuC, WL, and WG wrote the manuscript. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec id="sec41" sec-type="funding-information">
<title>Funding</title>
<p>This work was financially supported by the National Natural Science Foundation of China (No. 30972390) and the Fundamental Research Funds for the Central Universities (2572018CL02).</p>
</sec>
<sec id="conf1" sec-type="COI-statement">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="sec190" sec-type="disclaimer">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
</body>
<back>
<ack>
<p>The authors extremely appreciate the suggestions and comments from the editors and reviewers for improving the quality of this manuscript.</p>
</ack>
<sec id="sec18" sec-type="supplementary-material">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link xlink:href="https://www.frontiersin.org/articles/10.3389/fpls.2022.822942/full#supplementary-material" ext-link-type="uri">https://www.frontiersin.org/articles/10.3389/fpls.2022.822942/full#supplementary-material</ext-link>
<supplementary-material xlink:href="Table_1.DOCX" id="SM1" mimetype="application/vnd.openxmlformats" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_2.XLSX" id="SM2" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/></p>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Ahn</surname> <given-names>D.</given-names></name></person-group> (<year>1998</year>). <source>Illustrated Book of Korean Medicinal Herbs.</source> <publisher-loc>Seoul (Korea)</publisher-loc>: <publisher-name>Kyo-hak Publishing Co.</publisher-name>, <fpage>107</fpage>.</citation></ref>
<ref id="ref2"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Blanc</surname> <given-names>G.</given-names></name> <name><surname>Wolfe</surname> <given-names>K. H.</given-names></name></person-group> (<year>2004</year>). <article-title>Widespread paleopolyploidy in model plant species inferred from age distributions of duplicate genes</article-title>. <source>Plant Cell</source> <volume>16</volume>, <fpage>1667</fpage>&#x2013;<lpage>1678</lpage>. doi: <pub-id pub-id-type="doi">10.1105/tpc.021345</pub-id>, PMID: <pub-id pub-id-type="pmid">15208399</pub-id></citation></ref>
<ref id="ref3"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Camacho</surname> <given-names>C.</given-names></name> <name><surname>Coulouris</surname> <given-names>G.</given-names></name> <name><surname>Avagyan</surname> <given-names>V.</given-names></name> <name><surname>Ma</surname> <given-names>N.</given-names></name> <name><surname>Papadopoulos</surname> <given-names>J.</given-names></name> <name><surname>Bealer</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>BLAST+: architecture and applications</article-title>. <source>BMC Bioinformatics</source> <volume>10</volume>:<fpage>421</fpage>. doi: <pub-id pub-id-type="doi">10.1186/1471-2105-10-421</pub-id>, PMID: <pub-id pub-id-type="pmid">20003500</pub-id></citation></ref>
<ref id="ref4"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Carelli</surname> <given-names>M.</given-names></name> <name><surname>Biazzi</surname> <given-names>E.</given-names></name> <name><surname>Panara</surname> <given-names>F.</given-names></name> <name><surname>Tava</surname> <given-names>A.</given-names></name> <name><surname>Scaramelli</surname> <given-names>L.</given-names></name> <name><surname>Porceddu</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title><italic>Medicago truncatula</italic> CYP716A12 is a multifunctional oxidase involved in the biosynthesis of hemolytic saponins</article-title>. <source>Plant Cell</source> <volume>23</volume>, <fpage>3070</fpage>&#x2013;<lpage>3081</lpage>. doi: <pub-id pub-id-type="doi">10.1105/tpc.111.087312</pub-id>, PMID: <pub-id pub-id-type="pmid">21821776</pub-id></citation></ref>
<ref id="ref5"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>C.</given-names></name> <name><surname>Chen</surname> <given-names>H.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Thomas</surname> <given-names>H. R.</given-names></name> <name><surname>Frank</surname> <given-names>M. H.</given-names></name> <name><surname>He</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>TBtools: an integrative toolkit developed for interactive analyses of big biological data</article-title>. <source>Mol. Plant</source> <volume>13</volume>, <fpage>1194</fpage>&#x2013;<lpage>1202</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.molp.2020.06.009</pub-id>, PMID: <pub-id pub-id-type="pmid">32585190</pub-id></citation></ref>
<ref id="ref002"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Cheng</surname> <given-names>H.</given-names></name></person-group> (<year>2011</year>). <source>Study on Genes Related to Triterpenoid Saponin Biosynthesis Pathway in Aralia elata.</source> <publisher-loc>ChangChun, China</publisher-loc>: <publisher-name>Jilin University</publisher-name>.</citation></ref>
<ref id="ref6"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Cheng</surname> <given-names>H.</given-names></name> <name><surname>Concepcion</surname> <given-names>G.</given-names></name> <name><surname>Feng</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>H.</given-names></name> <name><surname>Li</surname> <given-names>H.</given-names></name></person-group> (<year>2008</year>). Haplotype-resolved de novo assembly with phased assembly graphs. arXiv. [Preprint].</citation></ref>
<ref id="ref7"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cheng</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>H.</given-names></name> <name><surname>Tong</surname> <given-names>X.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Chen</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Effects of shading on triterpene saponin accumulation and related gene expression of <italic>Aralia elata</italic> (Miq.) seem</article-title>. <source>Plant Physiol. Biochem.</source> <volume>160</volume>, <fpage>166</fpage>&#x2013;<lpage>174</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.plaphy.2021.01.009</pub-id>, PMID: <pub-id pub-id-type="pmid">33497847</pub-id></citation></ref>
<ref id="ref8"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cheng</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>H.</given-names></name> <name><surname>Tong</surname> <given-names>X.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Identification and analysis of CYP450 and UGT supergene family members from the transcriptome of <italic>Aralia elata</italic> (Miq.) seem reveal candidate genes for triterpenoid saponin biosynthesis</article-title>. <source>BMC Plant Biol.</source> <volume>20</volume>:<fpage>214</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12870-020-02411-6</pub-id>, PMID: <pub-id pub-id-type="pmid">32404131</pub-id></citation></ref>
<ref id="ref9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Collu</surname> <given-names>G.</given-names></name> <name><surname>Unver</surname> <given-names>N.</given-names></name> <name><surname>Peltenburg-Looman</surname> <given-names>A. M.</given-names></name> <name><surname>Van Der Heijden</surname> <given-names>R.</given-names></name> <name><surname>Verpoorte</surname> <given-names>R.</given-names></name> <name><surname>Memelink</surname> <given-names>J.</given-names></name></person-group> (<year>2001</year>). <article-title>Geraniol 10-hydroxylase1, a cytochrome P450 enzyme involved in terpenoid indole alkaloid biosynthesis</article-title>. <source>FEBS Lett.</source> <volume>508</volume>, <fpage>215</fpage>&#x2013;<lpage>220</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S0014-5793(01)03045-9</pub-id>, PMID: <pub-id pub-id-type="pmid">26300313</pub-id></citation></ref>
<ref id="ref10"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dai</surname> <given-names>J.-L.</given-names></name> <name><surname>Tan</surname> <given-names>X.</given-names></name> <name><surname>Zhan</surname> <given-names>Y.-G.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.-Q.</given-names></name> <name><surname>Xiao</surname> <given-names>S.</given-names></name> <name><surname>Gao</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>Rapid and repetitive plant regeneration of <italic>Aralia elata</italic> seem. via somatic embryogenesis</article-title>. <source>Plant Cell Tissue Organ Cult.</source> <volume>104</volume>, <fpage>125</fpage>&#x2013;<lpage>130</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11240-010-9801-x</pub-id></citation></ref>
<ref id="ref11"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Darriba</surname> <given-names>D.</given-names></name> <name><surname>Posada</surname> <given-names>D.</given-names></name> <name><surname>Kozlov</surname> <given-names>A. M.</given-names></name> <name><surname>Stamatakis</surname> <given-names>A.</given-names></name> <name><surname>Morel</surname> <given-names>B.</given-names></name> <name><surname>Flouri</surname> <given-names>T.</given-names></name></person-group> (<year>2020</year>). <article-title>ModelTest-NG: a new and scalable tool for the selection of DNA and protein evolutionary models</article-title>. <source>Mol. Biol. Evol.</source> <volume>37</volume>, <fpage>291</fpage>&#x2013;<lpage>294</lpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/msz189</pub-id>, PMID: <pub-id pub-id-type="pmid">31432070</pub-id></citation></ref>
<ref id="ref12"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>De Bie</surname> <given-names>T.</given-names></name> <name><surname>Cristianini</surname> <given-names>N.</given-names></name> <name><surname>Demuth</surname> <given-names>J. P.</given-names></name> <name><surname>Hahn</surname> <given-names>M. W.</given-names></name></person-group> (<year>2006</year>). <article-title>CAFE: a computational tool for the study of gene family evolution</article-title>. <source>Bioinformatics</source> <volume>22</volume>, <fpage>1269</fpage>&#x2013;<lpage>1271</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btl097</pub-id>, PMID: <pub-id pub-id-type="pmid">16543274</pub-id></citation></ref>
<ref id="ref13"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Di Tommaso</surname> <given-names>P.</given-names></name> <name><surname>Chatzou</surname> <given-names>M.</given-names></name> <name><surname>Floden</surname> <given-names>E. W.</given-names></name> <name><surname>Barja</surname> <given-names>P. P.</given-names></name> <name><surname>Palumbo</surname> <given-names>E.</given-names></name> <name><surname>Notredame</surname> <given-names>C.</given-names></name></person-group> (<year>2017</year>). <article-title>Nextflow enables reproducible computational workflows</article-title>. <source>Nat. Biotechnol.</source> <volume>35</volume>, <fpage>316</fpage>&#x2013;<lpage>319</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nbt.3820</pub-id>, PMID: <pub-id pub-id-type="pmid">34184051</pub-id></citation></ref>
<ref id="ref14"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Duan</surname> <given-names>H.</given-names></name> <name><surname>Wang</surname> <given-names>D.</given-names></name> <name><surname>Wang</surname> <given-names>S.</given-names></name> <name><surname>Wu</surname> <given-names>S.</given-names></name></person-group> (<year>2019</year>). <article-title>Research progress of <italic>Aralia continentalis</italic> kitag of bioactive components</article-title>. <source>Farm Product. Process.</source></citation></ref>
<ref id="ref15"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Emms</surname> <given-names>D. M.</given-names></name> <name><surname>Kelly</surname> <given-names>S.</given-names></name></person-group> (<year>2019</year>). <article-title>OrthoFinder: phylogenetic orthology inference for comparative genomics</article-title>. <source>Genome Biol.</source> <volume>20</volume>, <fpage>1</fpage>&#x2013;<lpage>14</lpage>. doi: <pub-id pub-id-type="doi">10.1186/s13059-019-1832-y</pub-id>, PMID: <pub-id pub-id-type="pmid">31727128</pub-id></citation></ref>
<ref id="ref16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ewels</surname> <given-names>P. A.</given-names></name> <name><surname>Peltzer</surname> <given-names>A.</given-names></name> <name><surname>Fillinger</surname> <given-names>S.</given-names></name> <name><surname>Patel</surname> <given-names>H.</given-names></name> <name><surname>Alneberg</surname> <given-names>J.</given-names></name> <name><surname>Wilm</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>The nf-core framework for community-curated bioinformatics pipelines</article-title>. <source>Nat. Biotechnol.</source> <volume>38</volume>, <fpage>276</fpage>&#x2013;<lpage>278</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41587-020-0439-x</pub-id>, PMID: <pub-id pub-id-type="pmid">32055031</pub-id></citation></ref>
<ref id="ref17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fukushima</surname> <given-names>E. O.</given-names></name> <name><surname>Seki</surname> <given-names>H.</given-names></name> <name><surname>Ohyama</surname> <given-names>K.</given-names></name> <name><surname>Ono</surname> <given-names>E.</given-names></name> <name><surname>Umemoto</surname> <given-names>N.</given-names></name> <name><surname>Mizutani</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>CYP716A subfamily members are multifunctional oxidases in triterpenoid biosynthesis</article-title>. <source>Plant Cell Physiol.</source> <volume>52</volume>, <fpage>2050</fpage>&#x2013;<lpage>2061</lpage>. doi: <pub-id pub-id-type="doi">10.1093/pcp/pcr146</pub-id>, PMID: <pub-id pub-id-type="pmid">31819715</pub-id></citation></ref>
<ref id="ref18"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guo</surname> <given-names>X.</given-names></name> <name><surname>Fang</surname> <given-names>D.</given-names></name> <name><surname>Sahu</surname> <given-names>S. K.</given-names></name> <name><surname>Yang</surname> <given-names>S.</given-names></name> <name><surname>Guang</surname> <given-names>X.</given-names></name> <name><surname>Folk</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Chloranthus genome provides insights into the early diversification of angiosperms</article-title>. <source>Nat. Commun.</source> <volume>12</volume>, <fpage>1</fpage>&#x2013;<lpage>14</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-021-26922-4</pub-id>, PMID: <pub-id pub-id-type="pmid">34836973</pub-id></citation></ref>
<ref id="ref19"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Haas</surname> <given-names>B. J.</given-names></name> <name><surname>Papanicolaou</surname> <given-names>A.</given-names></name> <name><surname>Yassour</surname> <given-names>M.</given-names></name> <name><surname>Grabherr</surname> <given-names>M.</given-names></name> <name><surname>Blood</surname> <given-names>P. D.</given-names></name> <name><surname>Bowden</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2013</year>). <article-title>De novo transcript sequence reconstruction from RNA-seq using the trinity platform for reference generation and analysis</article-title>. <source>Nat. Protoc.</source> <volume>8</volume>, <fpage>1494</fpage>&#x2013;<lpage>1512</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nprot.2013.084</pub-id>, PMID: <pub-id pub-id-type="pmid">23845962</pub-id></citation></ref>
<ref id="ref20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Haas</surname> <given-names>B. J.</given-names></name> <name><surname>Salzberg</surname> <given-names>S. L.</given-names></name> <name><surname>Zhu</surname> <given-names>W.</given-names></name> <name><surname>Pertea</surname> <given-names>M.</given-names></name> <name><surname>Allen</surname> <given-names>J. E.</given-names></name> <name><surname>Orvis</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2008</year>). <article-title>Automated eukaryotic gene structure annotation using EVidenceModeler and the program to assemble spliced alignments</article-title>. <source>Genome Biol.</source> <volume>9</volume>, <fpage>1</fpage>&#x2013;<lpage>22</lpage>. doi: <pub-id pub-id-type="doi">10.1186/gb-2008-9-1-r7</pub-id>, PMID: <pub-id pub-id-type="pmid">18190707</pub-id></citation></ref>
<ref id="ref21"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Han</surname> <given-names>J.-Y.</given-names></name> <name><surname>Kim</surname> <given-names>H.-J.</given-names></name> <name><surname>Kwon</surname> <given-names>Y.-S.</given-names></name> <name><surname>Choi</surname> <given-names>Y.-E.</given-names></name></person-group> (<year>2011</year>). <article-title>The Cyt P450 enzyme CYP716A47 catalyzes the formation of protopanaxadiol from dammarenediol-II during ginsenoside biosynthesis in Panax ginseng</article-title>. <source>Plant Cell Physiol.</source> <volume>52</volume>, <fpage>2062</fpage>&#x2013;<lpage>2073</lpage>. doi: <pub-id pub-id-type="doi">10.1093/pcp/pcr150</pub-id>, PMID: <pub-id pub-id-type="pmid">22039120</pub-id></citation></ref>
<ref id="ref22"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Heitz</surname> <given-names>T.</given-names></name> <name><surname>Widemann</surname> <given-names>E.</given-names></name> <name><surname>Lugan</surname> <given-names>R.</given-names></name> <name><surname>Miesch</surname> <given-names>L.</given-names></name> <name><surname>Ullmann</surname> <given-names>P.</given-names></name> <name><surname>D&#x00E9;saubry</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2012</year>). <article-title>Cytochromes P450 CYP94C1 and CYP94B3 catalyze two successive oxidation steps of plant hormone jasmonoyl-isoleucine for catabolic turnover</article-title>. <source>J. Biol. Chem.</source> <volume>287</volume>, <fpage>6296</fpage>&#x2013;<lpage>6306</lpage>. doi: <pub-id pub-id-type="doi">10.1074/jbc.M111.316364</pub-id>, PMID: <pub-id pub-id-type="pmid">22215670</pub-id></citation></ref>
<ref id="ref23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>H&#x00F6;fer</surname> <given-names>R.</given-names></name> <name><surname>Dong</surname> <given-names>L.</given-names></name> <name><surname>Andr&#x00E9;</surname> <given-names>F.</given-names></name> <name><surname>Ginglinger</surname> <given-names>J.-F.</given-names></name> <name><surname>Lugan</surname> <given-names>R.</given-names></name> <name><surname>Gavira</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2013</year>). <article-title>Geraniol hydroxylase and hydroxygeraniol oxidase activities of the CYP76 family of cytochrome P450 enzymes and potential for engineering the early steps of the (seco) iridoid pathway</article-title>. <source>Metab. Eng.</source> <volume>20</volume>, <fpage>221</fpage>&#x2013;<lpage>232</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ymben.2013.08.001</pub-id>, PMID: <pub-id pub-id-type="pmid">23933465</pub-id></citation></ref>
<ref id="ref24"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Iorizzo</surname> <given-names>M.</given-names></name> <name><surname>Ellison</surname> <given-names>S.</given-names></name> <name><surname>Senalik</surname> <given-names>D.</given-names></name> <name><surname>Zeng</surname> <given-names>P.</given-names></name> <name><surname>Satapoomin</surname> <given-names>P.</given-names></name> <name><surname>Huang</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>A high-quality carrot genome assembly provides new insights into carotenoid accumulation and asterid genome evolution</article-title>. <source>Nat. Genet.</source> <volume>48</volume>, <fpage>657</fpage>&#x2013;<lpage>666</lpage>. doi: <pub-id pub-id-type="doi">10.1038/ng.3565</pub-id>, PMID: <pub-id pub-id-type="pmid">27158781</pub-id></citation></ref>
<ref id="ref25"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Irmler</surname> <given-names>S.</given-names></name> <name><surname>Schr&#x00F6;der</surname> <given-names>G.</given-names></name> <name><surname>St-Pierre</surname> <given-names>B.</given-names></name> <name><surname>Crouch</surname> <given-names>N. P.</given-names></name> <name><surname>Hotze</surname> <given-names>M.</given-names></name> <name><surname>Schmidt</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2000</year>). <article-title>Indole alkaloid biosynthesis in <italic>Catharanthus roseus</italic>: new enzyme activities and identification of cytochrome P450 CYP72A1 as secologanin synthase</article-title>. <source>Plant J.</source> <volume>24</volume>, <fpage>797</fpage>&#x2013;<lpage>804</lpage>. doi: <pub-id pub-id-type="doi">10.1046/j.1365-313x.2000.00922.x</pub-id>, PMID: <pub-id pub-id-type="pmid">11135113</pub-id></citation></ref>
<ref id="ref26"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jiang</surname> <given-names>Z.</given-names></name> <name><surname>Tu</surname> <given-names>L.</given-names></name> <name><surname>Yang</surname> <given-names>W.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Hu</surname> <given-names>T.</given-names></name> <name><surname>Ma</surname> <given-names>B.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>The chromosome-level reference genome assembly for <italic>Panax notoginseng</italic> and insights into ginsenoside biosynthesis</article-title>. <source>Plant Commun.</source> <volume>2</volume>:<fpage>100113</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.xplc.2020.100113</pub-id>, PMID: <pub-id pub-id-type="pmid">33511345</pub-id></citation></ref>
<ref id="ref27"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname> <given-names>N. H.</given-names></name> <name><surname>Jayakodi</surname> <given-names>M.</given-names></name> <name><surname>Lee</surname> <given-names>S. C.</given-names></name> <name><surname>Choi</surname> <given-names>B. S.</given-names></name> <name><surname>Jang</surname> <given-names>W.</given-names></name> <name><surname>Lee</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Genome and evolution of the shade-requiring medicinal herb <italic>Panax ginseng</italic></article-title>. <source>Plant Biotechnol. J.</source> <volume>16</volume>, <fpage>1904</fpage>&#x2013;<lpage>1917</lpage>. doi: <pub-id pub-id-type="doi">10.1111/pbi.12926</pub-id>, PMID: <pub-id pub-id-type="pmid">29604169</pub-id></citation></ref>
<ref id="ref004"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kochetkoy</surname> <given-names>H. K.</given-names></name></person-group> (<year>1963</year>). <article-title>Chemical constituent of <italic>Aralia elata</italic></article-title>. <source>DoklAkadNauk</source> <volume>50</volume>:<fpage>1289</fpage>.</citation></ref>
<ref id="ref28"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kumar</surname> <given-names>S.</given-names></name> <name><surname>Stecher</surname> <given-names>G.</given-names></name> <name><surname>Suleski</surname> <given-names>M.</given-names></name> <name><surname>Hedges</surname> <given-names>S. B.</given-names></name></person-group> (<year>2017</year>). <article-title>TimeTree: a resource for timelines, timetrees, and divergence times</article-title>. <source>Mol. Biol. Evol.</source> <volume>34</volume>, <fpage>1812</fpage>&#x2013;<lpage>1819</lpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/msx116</pub-id>, PMID: <pub-id pub-id-type="pmid">28387841</pub-id></citation></ref>
<ref id="ref29"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name></person-group> (<year>2013</year>). Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. arXiv [Preprint].</citation></ref>
<ref id="ref30"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Marchler-Bauer</surname> <given-names>A.</given-names></name> <name><surname>Lu</surname> <given-names>S.</given-names></name> <name><surname>Anderson</surname> <given-names>J. B.</given-names></name> <name><surname>Chitsaz</surname> <given-names>F.</given-names></name> <name><surname>Derbyshire</surname> <given-names>M. K.</given-names></name> <name><surname>Deweese-Scott</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2010</year>). <article-title>CDD: a conserved domain database for the functional annotation of proteins</article-title>. <source>Nucleic Acids Res.</source> <volume>39</volume>, <fpage>D225</fpage>&#x2013;<lpage>D229</lpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkq1189</pub-id>, PMID: <pub-id pub-id-type="pmid">21109532</pub-id></citation></ref>
<ref id="ref31"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nafisi</surname> <given-names>M.</given-names></name> <name><surname>Goregaoker</surname> <given-names>S.</given-names></name> <name><surname>Botanga</surname> <given-names>C. J.</given-names></name> <name><surname>Glawischnig</surname> <given-names>E.</given-names></name> <name><surname>Olsen</surname> <given-names>C. E.</given-names></name> <name><surname>Halkier</surname> <given-names>B. A.</given-names></name> <etal/></person-group>. (<year>2007</year>). <article-title><italic>Arabidopsis</italic> cytochrome P450 monooxygenase 71A13 catalyzes the conversion of indole-3-acetaldoxime in camalexin synthesis</article-title>. <source>Plant Cell</source> <volume>19</volume>, <fpage>2039</fpage>&#x2013;<lpage>2052</lpage>. doi: <pub-id pub-id-type="doi">10.1105/tpc.107.051383</pub-id>, PMID: <pub-id pub-id-type="pmid">17573535</pub-id></citation></ref>
<ref id="ref32"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nguyen</surname> <given-names>L.-T.</given-names></name> <name><surname>Schmidt</surname> <given-names>H. A.</given-names></name> <name><surname>Von Haeseler</surname> <given-names>A.</given-names></name> <name><surname>Minh</surname> <given-names>B. Q.</given-names></name></person-group> (<year>2015</year>). <article-title>IQ-TREE: a fast and effective stochastic algorithm for estimating maximum-likelihood phylogenies</article-title>. <source>Mol. Biol. Evol.</source> <volume>32</volume>, <fpage>268</fpage>&#x2013;<lpage>274</lpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/msu300</pub-id>, PMID: <pub-id pub-id-type="pmid">25371430</pub-id></citation></ref>
<ref id="ref33"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Piegu</surname> <given-names>B.</given-names></name> <name><surname>Guyot</surname> <given-names>R.</given-names></name> <name><surname>Picault</surname> <given-names>N.</given-names></name> <name><surname>Roulin</surname> <given-names>A.</given-names></name> <name><surname>Saniyal</surname> <given-names>A.</given-names></name> <name><surname>Kim</surname> <given-names>H.</given-names></name> <etal/></person-group>. (<year>2006</year>). <article-title>Doubling genome size without polyploidization: dynamics of retrotransposition-driven genomic expansions in <italic>Oryza australiensis</italic>, a wild relative of rice</article-title>. <source>Genome Res.</source> <volume>16</volume>, <fpage>1262</fpage>&#x2013;<lpage>1269</lpage>. doi: <pub-id pub-id-type="doi">10.1101/gr.5290206</pub-id>, PMID: <pub-id pub-id-type="pmid">16963705</pub-id></citation></ref>
<ref id="ref34"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Qin</surname> <given-names>C.</given-names></name> <name><surname>Yu</surname> <given-names>C.</given-names></name> <name><surname>Shen</surname> <given-names>Y.</given-names></name> <name><surname>Fang</surname> <given-names>X.</given-names></name> <name><surname>Chen</surname> <given-names>L.</given-names></name> <name><surname>Min</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>Whole-genome sequencing of cultivated and wild peppers provides insights into capsicum domestication and specialization</article-title>. <source>Proc. Natl. Acad. Sci. U. S. A.</source> <volume>111</volume>, <fpage>5135</fpage>&#x2013;<lpage>5140</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.1400975111</pub-id>, PMID: <pub-id pub-id-type="pmid">24591624</pub-id></citation></ref>
<ref id="ref35"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Reunov</surname> <given-names>A.</given-names></name> <name><surname>Reunova</surname> <given-names>G.</given-names></name> <name><surname>Zhuravlev</surname> <given-names>Y. N.</given-names></name></person-group> (<year>2007</year>). <article-title>Morphological study of pollen grains in mature anthers of <italic>Aralia elata</italic>, <italic>A. continentalis</italic>, and <italic>A. cordata</italic> (Araliaceae)</article-title>. <source>Dokl. Biol. Sci.</source> <volume>417</volume>, <fpage>465</fpage>&#x2013;<lpage>468</lpage>. doi: <pub-id pub-id-type="doi">10.1134/s0012496607060166</pub-id>, PMID: <pub-id pub-id-type="pmid">18274493</pub-id></citation></ref>
<ref id="ref36"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Robinson</surname> <given-names>M. D.</given-names></name> <name><surname>Oshlack</surname> <given-names>A.</given-names></name></person-group> (<year>2010</year>). <article-title>A scaling normalization method for differential expression analysis of RNA-seq data</article-title>. <source>Genome Biol.</source> <volume>11</volume>:<fpage>R25</fpage>. doi: <pub-id pub-id-type="doi">10.1186/gb-2010-11-3-r25</pub-id>, PMID: <pub-id pub-id-type="pmid">20196867</pub-id></citation></ref>
<ref id="ref37"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sawai</surname> <given-names>S.</given-names></name> <name><surname>Saito</surname> <given-names>K.</given-names></name></person-group> (<year>2011</year>). <article-title>Triterpenoid biosynthesis and engineering in plants</article-title>. <source>Front. Plant Sci.</source> <volume>2</volume>:<fpage>25</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fpls.2011.00025</pub-id>, PMID: <pub-id pub-id-type="pmid">22639586</pub-id></citation></ref>
<ref id="ref38"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sim&#x00E3;o</surname> <given-names>F. A.</given-names></name> <name><surname>Waterhouse</surname> <given-names>R. M.</given-names></name> <name><surname>Ioannidis</surname> <given-names>P.</given-names></name> <name><surname>Kriventseva</surname> <given-names>E. V.</given-names></name> <name><surname>Zdobnov</surname> <given-names>E. M.</given-names></name></person-group> (<year>2015</year>). <article-title>BUSCO: assessing genome assembly and annotation completeness with single-copy orthologs</article-title>. <source>Bioinformatics</source> <volume>31</volume>, <fpage>3210</fpage>&#x2013;<lpage>3212</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btv351</pub-id>, PMID: <pub-id pub-id-type="pmid">26059717</pub-id></citation></ref>
<ref id="ref39"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tu</surname> <given-names>L.</given-names></name> <name><surname>Su</surname> <given-names>P.</given-names></name> <name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Gao</surname> <given-names>L.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Hu</surname> <given-names>T.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Genome of <italic>Tripterygium wilfordii</italic> and identification of cytochrome P450 involved in triptolide biosynthesis</article-title>. <source>Nat. Commun.</source> <volume>11</volume>:<fpage>971</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-020-14776-1</pub-id>, PMID: <pub-id pub-id-type="pmid">32080175</pub-id></citation></ref>
<ref id="ref40"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Van De Peer</surname> <given-names>Y.</given-names></name> <name><surname>Maere</surname> <given-names>S.</given-names></name> <name><surname>Meyer</surname> <given-names>A.</given-names></name></person-group> (<year>2009</year>). <article-title>The evolutionary significance of ancient genome duplications</article-title>. <source>Nat. Rev. Genet.</source> <volume>10</volume>, <fpage>725</fpage>&#x2013;<lpage>732</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nrg2600</pub-id>, PMID: <pub-id pub-id-type="pmid">19652647</pub-id></citation></ref>
<ref id="ref41"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vurture</surname> <given-names>G. W.</given-names></name> <name><surname>Sedlazeck</surname> <given-names>F. J.</given-names></name> <name><surname>Nattestad</surname> <given-names>M.</given-names></name> <name><surname>Underwood</surname> <given-names>C. J.</given-names></name> <name><surname>Fang</surname> <given-names>H.</given-names></name> <name><surname>Gurtowski</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>GenomeScope: fast reference-free genome profiling from short reads</article-title>. <source>Bioinformatics</source> <volume>33</volume>, <fpage>2202</fpage>&#x2013;<lpage>2204</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btx153</pub-id>, PMID: <pub-id pub-id-type="pmid">28369201</pub-id></citation></ref>
<ref id="ref001"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>Y.</given-names></name></person-group> (<year>2011</year>). <source>Clong and Fundational Analysis of Triterpenoid Saponin Related Genes in Aralia elata.</source> <publisher-loc>ChuangChun,China</publisher-loc>: <publisher-name>Jilin University</publisher-name>.</citation></ref>
<ref id="ref42"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>Z.</given-names></name></person-group> (<year>2007</year>). <article-title>PAML 4: phylogenetic analysis by maximum likelihood</article-title>. <source>Mol. Biol. Evol.</source> <volume>24</volume>, <fpage>1586</fpage>&#x2013;<lpage>1591</lpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/msm088</pub-id>, PMID: <pub-id pub-id-type="pmid">30194417</pub-id></citation></ref>
<ref id="ref43"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>Z.</given-names></name> <name><surname>Chen</surname> <given-names>S.</given-names></name> <name><surname>Wang</surname> <given-names>S.</given-names></name> <name><surname>Hu</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>G.</given-names></name> <name><surname>Dong</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Chromosomal-scale genome assembly of <italic>Eleutherococcus senticosus</italic> provides insights into chromosome evolution in Araliaceae</article-title>. <source>Mol. Ecol. Resour.</source> <volume>21</volume>, <fpage>2204</fpage>&#x2013;<lpage>2220</lpage>. doi: <pub-id pub-id-type="doi">10.1111/1755-0998.13403</pub-id>, PMID: <pub-id pub-id-type="pmid">33891787</pub-id></citation></ref>
<ref id="ref44"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>W.</given-names></name> <name><surname>He</surname> <given-names>H.</given-names></name> <name><surname>Song</surname> <given-names>X.-Y.</given-names></name> <name><surname>Yao</surname> <given-names>G.-D.</given-names></name> <name><surname>Song</surname> <given-names>S.-J.</given-names></name></person-group> (<year>2018</year>). <article-title>Triterpene saponins with neuroprotective effects from a wild vegetable <italic>Aralia elata</italic></article-title>. <source>J. Funct. Foods</source> <volume>45</volume>, <fpage>313</fpage>&#x2013;<lpage>320</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jff.2018.04.026</pub-id></citation></ref>
<ref id="ref003"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>C. Y.</given-names></name></person-group> (<year>2012</year>). <source>Clong and Genetic Transfermaiton of Key Genes Related to Triterpenoid Saponin Biosynthetic Pathway in Aralia elata.</source> <publisher-loc>ChuangChun, China</publisher-loc>: <publisher-name>Jilin University</publisher-name>.</citation></ref></ref-list>
<fn-group><fn id="fn0005"><p><sup>1</sup><ext-link xlink:href="https://phytozome-next.jgi.doe.gov/" ext-link-type="uri">https://phytozome-next.jgi.doe.gov/</ext-link></p></fn></fn-group>
</back>
</article>