<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fgene.2020.00701</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Chromosome Level Genome Assembly of <italic>Andrographis paniculata</italic></article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Liang</surname> <given-names>Ying</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x2020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/989444/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Chen</surname> <given-names>Shanshan</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x2020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/696229/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Wei</surname> <given-names>Kunhua</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x2020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/849114/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Yang</surname> <given-names>Zijiang</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/930787/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Duan</surname> <given-names>Shengchang</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Du</surname> <given-names>Yuan</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Qu</surname> <given-names>Peng</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Miao</surname> <given-names>Jianhua</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Chen</surname> <given-names>Wei</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<xref ref-type="aff" rid="aff7"><sup>7</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x002A;</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Dong</surname> <given-names>Yang</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="aff" rid="aff7"><sup>7</sup></xref>
<xref ref-type="corresp" rid="c003"><sup>&#x002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/694956/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Guangxi Key Laboratory of Medicinal Resources Protection and Genetic Improvement, Guangxi Botanical Garden of Medicinal Plants</institution>, <addr-line>Nanning</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>BGI College, Zhengzhou University</institution>, <addr-line>Zhengzhou</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>National and Local Joint Engineering Research Center on Germplasm Innovation and Utilization of Chinese Medicinal Materials in Southwest China, Yunnan Agricultural University</institution>, <addr-line>Kunming</addr-line>, <country>China</country></aff>
<aff id="aff4"><sup>4</sup><institution>NowBio Biotechnology Company</institution>, <addr-line>Kunming</addr-line>, <country>China</country></aff>
<aff id="aff5"><sup>5</sup><institution>State Key Laboratory for Conservation and Utilization of Bio-Resources in Yunnan, Yunnan Agricultural University</institution>, <addr-line>Kunming</addr-line>, <country>China</country></aff>
<aff id="aff6"><sup>6</sup><institution>College of Agronomy and Biotechnology, Yunnan Agricultural University</institution>, <addr-line>Kunming</addr-line>, <country>China</country></aff>
<aff id="aff7"><sup>7</sup><institution>Yunnan Research Institute for Local Plateau Agriculture and Industry</institution>, <addr-line>Kunming</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Xiaoming Song, North China University of Science and Technology, China</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Marcio Resende, University of Florida, United States; Ergude Bao, Beijing Jiaotong University, China</p></fn>
<corresp id="c001">&#x002A;Correspondence: Jianhua Miao, <email>mjh1962@vip.163.com</email></corresp>
<corresp id="c002">Wei Chen, <email>wchenntr@gmail.com</email></corresp>
<corresp id="c003">Yang Dong, <email>loyalyang@163.com</email></corresp>
<fn fn-type="other" id="fn002"><p><sup>&#x2020;</sup>These authors have contributed equally to this work</p></fn>
<fn fn-type="other" id="fn004"><p>This article was submitted to Plant Genomics, a section of the journal Frontiers in Genetics</p></fn>
</author-notes>
<pub-date pub-type="epub">
<day>30</day>
<month>06</month>
<year>2020</year>
</pub-date>
<pub-date pub-type="collection">
<year>2020</year>
</pub-date>
<volume>11</volume>
<elocation-id>701</elocation-id>
<history>
<date date-type="received">
<day>20</day>
<month>02</month>
<year>2020</year>
</date>
<date date-type="accepted">
<day>09</day>
<month>06</month>
<year>2020</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2020 Liang, Chen, Wei, Yang, Duan, Du, Qu, Miao, Chen and Dong.</copyright-statement>
<copyright-year>2020</copyright-year>
<copyright-holder>Liang, Chen, Wei, Yang, Duan, Du, Qu, Miao, Chen and Dong</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p><italic>Andrographis paniculata</italic> (Chinese name: Chuanxinlian) is an annual dicotyledonous medicinal plant widely grown in China and Southeast Asia. The dried plant has a highly acclaimed usage in the traditional Chinese medicine for its antipyretic, anti-inflammatory, and analgesic effects. In order to help delineate the biosynthetic pathways of various secondary metabolites, we report in this study a high-quality reference genome for <italic>A. paniculata</italic>. With the help of both PacBio single molecule real time sequencing and Illumina sequencing reads for error correction, the <italic>A. paniculata</italic> genome was assembled into a total size of 284 Mb with a contig N50 size of 5.14 Mb. The contigs were further assembled into 24 pseudo-chromosomes by the Hi-C technique. We also analyzed the gene families (e.g., <italic>KSL</italic>, and <italic>CYP450</italic>) whose protein products are essential for synthesizing bioactive compounds in <italic>A. paniculata.</italic> In conclusion, the high-quality <italic>A. paniculata</italic> genome assembly builds the foundation for decoding the biosynthetic pathways of various medicinal compounds.</p>
</abstract>
<kwd-group>
<kwd>PacBio sequencing</kwd>
<kwd>Hi-C</kwd>
<kwd>genome assembly</kwd>
<kwd>medicinal plant</kwd>
<kwd><italic>Andrographis paniculata</italic></kwd>
</kwd-group>
<contract-sponsor id="cn001">National Natural Science Foundation of China<named-content content-type="fundref-id">10.13039/501100001809</named-content></contract-sponsor>
<contract-sponsor id="cn002">Agriculture Research System of China<named-content content-type="fundref-id">10.13039/501100010203</named-content></contract-sponsor>
<counts>
<fig-count count="7"/>
<table-count count="1"/>
<equation-count count="0"/>
<ref-count count="76"/>
<page-count count="14"/>
<word-count count="0"/>
</counts>
</article-meta>
</front>
<body>
<sec id="S1">
<title>Introduction</title>
<p>During the course of human history, all civilizations have tried to explore various plants for medicinal purposes and they formed unique empirical knowledge about how these herbal plants could be used to treat various diseases. Even though much of this knowledge has gradually given way to modern medicine, WHO found that the popularity of herbal medicines increased in almost all parts of the world in recent years (<xref ref-type="bibr" rid="B21">Fitzsimons, 2013</xref>). For this reason, herbal plants not only raise the enthusiasm from the general public, but also remain a rich source for discovering novel drug candidates among the researchers.</p>
<p>With the emergence and development in high-throughput sequencing technology, the genome sequences of more than 200 plants have been reported (<xref ref-type="bibr" rid="B35">Lin et al., 2011</xref>). Particularly, genome sequencing is a powerful tool for studying various aspects of physiology and genetics in non-model plants, many of which are traditional herbal plants (<xref ref-type="bibr" rid="B71">Zerikly and Challis, 2009</xref>; <xref ref-type="bibr" rid="B13">De Luca et al., 2012</xref>; <xref ref-type="bibr" rid="B4">Chae et al., 2014</xref>). For example, the reference genome of <italic>Scutellaria baicalensis</italic> (<xref ref-type="bibr" rid="B71">Zerikly and Challis, 2009</xref>; <xref ref-type="bibr" rid="B13">De Luca et al., 2012</xref>; <xref ref-type="bibr" rid="B4">Chae et al., 2014</xref>; <xref ref-type="bibr" rid="B75">Zhao et al., 2019</xref>), <italic>Panax ginseng</italic> (<xref ref-type="bibr" rid="B65">Xu et al., 2017</xref>), mint (<xref ref-type="bibr" rid="B58">Vining et al., 2017</xref>), and opium poppy (<xref ref-type="bibr" rid="B24">Guo et al., 2018</xref>) provided insights into the genes involved in the biosynthesis of unique flavonoids, terpenes, alkaloids and many other secondary metabolites. Additionally, the reference genome of <italic>Salvia splendens</italic> was very valuable for helping marker-assisted breeding, genome editing, and molecular genetics (<xref ref-type="bibr" rid="B14">Dong et al., 2018</xref>). As one of the participants of the Herbal Plant Genomics Initiative, our team has reported the genomes of many Chinese herbal plants in past years, including <italic>Salvia miltiorrhiza Bunge</italic> (<xref ref-type="bibr" rid="B72">Zhang et al., 2015</xref>), <italic>Dendrobium officinale</italic> (<xref ref-type="bibr" rid="B67">Yan et al., 2015</xref>), maca (<xref ref-type="bibr" rid="B73">Zhang et al., 2016</xref>), <italic>Panax notoginseng</italic> (<xref ref-type="bibr" rid="B9">Chen et al., 2017</xref>), and fleabane (<xref ref-type="bibr" rid="B68">Yang et al., 2017</xref>). The high-quality genome assembly of <italic>Andrographis paniculata</italic> is presented in this manuscript as a continuum of the bigger research project.</p>
<p><italic>Andrographis paniculata</italic> (<xref ref-type="fig" rid="F1">Figure 1A</xref>) is a dicotyledonous medicinal plant widely distributed and used in tropical and subtropical regions of Asia, including India, China, Thailand, and Malaysia (<xref ref-type="bibr" rid="B34">Lim et al., 2012</xref>). This annual plant belongs to the family of Acanthaceae in the order of Lamiales. The dried plant has a highly acclaimed usage in the traditional Chinese medicine for its antipyretic, anti-inflammatory, and analgesic effects (<xref ref-type="bibr" rid="B55">Sun et al., 2019</xref>). Previous pharmacological research identified andrographolide and neoandrographolide as the main therapeutic constituents in <italic>A. paniculata</italic> (<xref ref-type="bibr" rid="B53">Srivastava and Akhila, 2010</xref>). Andrographolide is a labdane-related diterpenoid and it exhibits anti-cancer (<xref ref-type="bibr" rid="B36">Luo et al., 2014</xref>), anti-virus (<xref ref-type="bibr" rid="B7">Chen et al., 2009</xref>), antimicrobial and anti-inflammatory activities (<xref ref-type="bibr" rid="B10">Chua, 2014</xref>), suggesting potential pharmaceutical values. The leaves of <italic>A. paniculata</italic> contain major amounts of diterpene lactone compounds, including about 0.1% of deoxyandrographolide, and about 0.2% of neoandrographolide (<xref ref-type="bibr" rid="B53">Srivastava and Akhila, 2010</xref>). Even though the biosynthesis of andrographolide and neoandrographolide is achieved by the combination of various isopentenyl diphosphate (IPP) and dimethylallyl diphosphate (DMAPP) (<xref ref-type="bibr" rid="B6">Chen et al., 2011</xref>), the complete profile of <italic>CYTOCHROME P450</italic> genes (<italic>CYPs</italic>), <italic>COPALYL DIPHOSPHATE SYNTHASE</italic> genes (<italic>CPSs</italic>), and <italic>KAURENE SYNTHASE-LIKE PROTEIN</italic> genes (<italic>KSLs</italic>) has not been fully investigated in the <italic>A. paniculata</italic> genome.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption><p>Evaluation of the genome size of <italic>A. paniculata</italic>. <bold>(A)</bold> Photo of a single <italic>A. paniculata</italic> plant with close-up image of the flower. <bold>(B)</bold> The 21<italic>-mer</italic> analysis of the <italic>A. paniculata</italic> genome using GenomeScope. <bold>(C)</bold> Flow cytometry analysis of <italic>A. paniculata</italic> genome size comparing with <italic>Zea mays</italic>.</p></caption>
<graphic xlink:href="fgene-11-00701-g001.tif"/>
</fig>
<p>The genome of <italic>A. paniculata</italic> is highly heterozygous and contains many repetitive sequences. These characteristics pose a big challenge in terms of acquiring a high-quality whole-genome reference assembly. A previous research effort reported an <italic>A. paniculata</italic> genome assembly of &#x223C;269 Mb in total size with a contig N50 of 388 Kb (<xref ref-type="bibr" rid="B55">Sun et al., 2019</xref>). These benchmarks suggest the existence of relatively large un-assembled genome and gaps among contigs. Therefore, it is valuable to improve the genome assembly of <italic>A. paniculata</italic> using better raw data and assembly pipeline. Herein, we reported a reference genome of <italic>A. paniculata</italic> obtained from the PacBio single-molecule real time sequencing data in the size of 284 Mb. The contig N50 size was improved to 5.14 Mb, which is more than 12-fold longer than before. The resultant contigs were further assembled into 24 pseudo-chromosomes by the Hi-C technology, thereby yielding a high-quality <italic>A. paniculata</italic> genome assembly.</p>
</sec>
<sec id="S2" sec-type="materials|methods">
<title>Materials and Methods</title>
<sec id="S2.SS1">
<title>DNA Extraction, Library Construction, and Sequencing</title>
<p>A single <italic>A. paniculata</italic> individual was obtained from Guangxi Medicinal Botanical Garden. Plant DNA was extracted from young leaves with the Novel Plant Genomic DNA Rapid Extraction Kit (Genenode Biotech, Beijing, China) according to the product manual.</p>
<p>A total of 15 &#x03BC;g <italic>A. paniculata</italic> genomic DNA was used to construct eleven PacBio libraries (mean size of 20 kb) with the SMRT Template Prep Kit (Pacific Biosciences, United States). These libraries were sequenced on a PacBio Sequel platform with recommended protocols from the manufacturer (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 1</xref>). Sequence reads with a quality score lower than 0.8 were removed.</p>
<p>In order to perform genome survey and genomic base correction, we also obtained Illumina reads for <italic>A. paniculata.</italic> In brief, 2 &#x03BC;g of genomic DNA was used to construct each library. The genomic DNA was sheared and insert sizes of 241, 313, 424, and 533 bp were selected for the four libraries. All libraries were sequenced on an Illumina HiSeq X Ten platform. The Illumina raw data were processed by fq_filter_V1.5 to remove the low-quality reads, and the parameters were set as follows: -q 33 -t 20 -ta 5 -tb 10 -tc 5 -td 10. We filtered out low-quality reads as specified by the following criteria: (1) filter a read if more than 5% of bases were N or poly-A, (2) filter a read if more than 30 bases were low quality, (3) if the read was contaminated with adaptor sequence, (4) if the size of a read was too small, and (5) if two copies of the paired-end reads had identical sequence (remove both copies). The resultant reads were then corrected by the SOAPec_v2.0.1 package with default settings.</p>
</sec>
<sec id="S2.SS2">
<title>Genome Size Estimation</title>
<p>The genome size of <italic>A. paniculata</italic> was assessed by both flow cytometry and <italic>k</italic>-mer analysis. For the flow cytometry approach, 20 mg of plant tissue was placed in 1.0 ml ice-cold nuclei isolation buffer in a Petri dish. The plant tissue was minced in the buffer with a new razor blade. The homogenate was filtered through a 42-mm nylon mesh into a labeled sample tube. Propidium iodide was added to a final concentration of 50 mg/ml simultaneously with RNase (50 mg/ml) and the sample was incubated on ice for 20 min before measurement. <italic>Zea mays</italic> (B73) was used as internal standard, the internal reference maize genome size is 2.3 Gb (<xref ref-type="bibr" rid="B47">Schnable et al., 2009</xref>).</p>
<p>For the <italic>k-mer</italic> analysis, Jellyfish v.2.2.5 (<xref ref-type="bibr" rid="B37">Marcais and Kingsford, 2011</xref>) was used to perform <italic>k-mer</italic> analysis on the Illumina sequencing error-corrected data. First, FastQC software was used to identify the quality of the input sequencing data for the Illumina sequencing data. The first 500,000,000 lines of the lane2 fastq file were extracted, and the extracted reads were used to identify heterozygosity with the &#x2018;&#x2013;m 21&#x2019; option. The graph was generated by GenomeScope (<xref ref-type="bibr" rid="B59">Vurture et al., 2017</xref>).</p>
</sec>
<sec id="S2.SS3">
<title>Genome Assembly</title>
<p>A total of 53.98 Gb of PacBio data were used in the <italic>de novo</italic> assembly of the <italic>A. paniculata</italic> genome according to an assembly pipeline named HERA (<xref ref-type="bibr" rid="B15">Du and Liang, 2019</xref>). In brief, CANU v1.8 (<xref ref-type="bibr" rid="B27">Koren et al., 2017</xref>) was used to correct the raw PacBio data and assemble contigs. The resultant contigs were further improved with HERA. Next, the pipeline used the assembled genome to create an index file using bwa-mem (<xref ref-type="bibr" rid="B31">Li and Durbin, 2010</xref>). The processed next-generation sequencing data were aligned to the reference genome. Samtools (<xref ref-type="bibr" rid="B32">Li et al., 2009</xref>) was used to sort the resulting bam files. Finally, the bam files were used to polish the contig twice with Pilon (<xref ref-type="bibr" rid="B60">Walker et al., 2014</xref>). In this study, a commercial service provider from the Institute of Genetics and Developmental Biology in Beijing was recruited to assemble the <italic>A. paniculata</italic> genome using a professional version of HERA (<xref ref-type="bibr" rid="B15">Du and Liang, 2019</xref>), which is said to run faster and have better performance resolving repetitive sequences.</p>
</sec>
<sec id="S2.SS4">
<title>Hi-C Library Construction and Pseudomolecule Clustering</title>
<p>Three gram of <italic>A. paniculata</italic> leaves were harvested and crosslinked in a 2% formaldehyde solution for 15min at room temperature. Crosslinking was quenched by adding glycine to a final concentration of 250 mM. The fixed plant tissue was then ground in liquid nitrogen and suspended in extraction buffer for nuclei isolation. After the nuclei were separated, chromatin was solubilized in 0.1% (m/v) SDS at 65&#x00B0;C for 10 min. After SDS was quenched by Triton X-100 (final concentration of 1%), solubilized chromatin was digested by 400 units of <italic>Dpn</italic>II (New England Biolabs, MA, United States) at 37&#x00B0;C overnight. The following steps included biotin labeling of the DNA and blunt-end ligation of DNA fragments. After cross-linking was reversed by the treatment with proteinase K, DNA was purified so that biotin labels could be removed from non-ligated fragment ends. DNA fragments were sonicated into sizes of 400 bp so that paired-end libraries could be obtained. These libraries were sequenced on a NovaSeq 6000 platform (Illumina, United States) to acquire the Hi-C data.</p>
<p>Low-quality Hi-C reads were removed according to the following two criteria: (1) filter a read if more than 10% of bases were N, (2) filter a read if more than 50% bases were low quality (<italic>Q</italic> &#x2264; 5). Clean Hi-C reads were mapped to the draft assembly with Juicer (Juicer, juicer_tools.1.7.6_jcuda.0.8.jar) (<xref ref-type="bibr" rid="B17">Durand et al., 2016a</xref>). A candidate chromosome-length assembly was generated automatically using the 3d-DNA pipeline to correct misjoins, order, orientation, and then anchor contigs from the draft assembly (<xref ref-type="bibr" rid="B16">Dudchenko et al., 2017</xref>). Manual review and refinement of the candidate assembly was performed in Juicebox Assembly Tools (Version 1.9.1) (<xref ref-type="bibr" rid="B18">Durand et al., 2016b</xref>) for quality control and interactive correction. And then the genome was finalized using the &#x201C;run-asm-pipeline-post-review.sh -s finalize &#x2013;sort-output &#x2013;bulid-gapped-map&#x201D; in 3d-DNA with manually adjusted assembly as input (<xref ref-type="bibr" rid="B16">Dudchenko et al., 2017</xref>).</p>
</sec>
<sec id="S2.SS5">
<title>Genome Annotation</title>
<p>The repetitive sequences were identified via sequence alignment and <italic>de novo</italic> prediction. RepeatMasker (<xref ref-type="bibr" rid="B8">Chen, 2004</xref>) was used to compare the assembled genome with the RepBase database (Release16.10)<sup><xref ref-type="fn" rid="footnote1">1</xref></sup> using default settings (<xref ref-type="bibr" rid="B3">Bao et al., 2015</xref>). Repeatproteinmask searches<sup><xref ref-type="fn" rid="footnote2">2</xref></sup> were used for prediction of homologs using default settings. For <italic>de novo</italic> annotation of repetitive elements, LTR_finder (<xref ref-type="bibr" rid="B66">Xu and Wang, 2007</xref>)<sup><xref ref-type="fn" rid="footnote3">3</xref></sup>, Piler (<xref ref-type="bibr" rid="B20">Edgar and Myers, 2005</xref>)<sup><xref ref-type="fn" rid="footnote4">4</xref></sup>, RepeatScout (<xref ref-type="bibr" rid="B45">Price et al., 2005</xref>)<sup><xref ref-type="fn" rid="footnote5">5</xref></sup>, and RepeatModeler<sup><xref ref-type="fn" rid="footnote6">6</xref></sup> were used to construct the <italic>de novo</italic> library, and annotation was carried out with Repeatmasker (cutf 100, cpu 100, run qsub, -nolow,-no, -norna,-parallel 1). Tandem repeats were identified across the genome with Tandem Repeats Finder (cutf 100, cpu 100, period_size 2000, run qsub Match 2 Mismatch 7 Delta 7 PM 80 PI 10 Minscore 50 MaxPeriod 2000).</p>
<p>According to their characteristics and redundancy, the repeat consensus sequences were first classified using TEsort (<xref ref-type="bibr" rid="B74">Zhang et al., 2019</xref>) with REXdb database<sup><xref ref-type="fn" rid="footnote7">7</xref></sup>. For <italic>Copia</italic> and <italic>Gyspy</italic> superfamilies, complete elements were identified based on the presence and order of conserved domains including capsid protein, aspartic proteinase, integrase, reverse transcriptase and RNase H as described in Wicker (<xref ref-type="bibr" rid="B64">Wicker and Keller, 2007</xref>). We extracted all reverse transcriptase and multiple sequence alignment of the extracted RT were then conducted by MAFFT (<xref ref-type="bibr" rid="B41">Nakamura et al., 2018</xref>) and the phylogenetic tree was constructed with IQTREE (<xref ref-type="bibr" rid="B26">Jana et al., 2016</xref>). Itol<sup><xref ref-type="fn" rid="footnote8">8</xref></sup> was used for the visualization and edit of the tree. Finally, density of the TE consensus copies according to their lineages were computed along pseudomolecules and visualized using R.</p>
<p>GlimmerHMM (<xref ref-type="bibr" rid="B1">Aggarwal and Ramaswamy, 2002</xref>), SNAP (<xref ref-type="bibr" rid="B28">Korf, 2004</xref>), GenScan (<xref ref-type="bibr" rid="B1">Aggarwal and Ramaswamy, 2002</xref>), and Augustus (<xref ref-type="bibr" rid="B54">Stanke et al., 2006</xref>) were used for <italic>ab initio</italic> prediction of protein-coding genes with default settings. The homology-based prediction utilized reference protein sequence from <italic>Arabidopsis thaliana</italic> (<xref ref-type="bibr" rid="B38">Michael et al., 2018</xref>), <italic>Sesamum indicum</italic> (<xref ref-type="bibr" rid="B62">Wang et al., 2014</xref>), <italic>Solanum lycopersicum</italic> (<xref ref-type="bibr" rid="B11">Consortium, 2012</xref>), and <italic>Vitis vinifera</italic> (<xref ref-type="bibr" rid="B23">Girollet et al., 2019</xref>) according to an established protocol. RNA-seq data sets for <italic>A. paniculata</italic> leaf and root tissues were obtained from the National Center for Biotechnology Information (NCBI) database (SRX652837, SRX655521), and subsequently used for <italic>de novo</italic> assembly of the transcriptome. We aligned all RNA reads to the <italic>A. paniculata</italic> genome using TopHat (<xref ref-type="bibr" rid="B57">Trapnell et al., 2009</xref>), assembled the transcripts with Cufflinks (<xref ref-type="bibr" rid="B56">Trapnell et al., 2013</xref>) using default parameters, and predicted the open reading frames to obtain reliable transcripts with hidden Markov model (HMM)-based training parameters. Finally, the above 3 gene structure models were compiled by Evidence Modeler tool (<xref ref-type="bibr" rid="B25">Haas et al., 2008</xref>) with the following weights: transcripts-set &#x003E; homology-set &#x003E; <italic>ab initio</italic>-set and redundant genes were removed.</p>
<p>The t-RNAscan-SE tool (<xref ref-type="bibr" rid="B5">Chan and Lowe, 2019</xref>) was used to predict tRNA in the genome sequence with <italic>E</italic>-value set to 1e-5. Plant RNA sequences from Rfam database were selected as reference to predict the rRNA by BLASTN with E-value set to 1e-5. The miRNA and snRNA genes were also predicted by BLASTN against the Rfam database with <italic>E</italic>-value set to e-1.</p>
<p>Gene function was annotated by performing BLASTP (<italic>E</italic>-value &#x2264; 1e-5) against the protein databases. SwissProt<sup><xref ref-type="fn" rid="footnote9">9</xref></sup>, TrEMBL (see footnote 9), KEGG<sup><xref ref-type="fn" rid="footnote10">10</xref></sup>, and InterPro<sup><xref ref-type="fn" rid="footnote11">11</xref></sup> were used for screening the functional domains of the proteins. Gene Ontology (GO) terms for each gene were extracted from the corresponding InterPro entries.</p>
</sec>
<sec id="S2.SS6">
<title>Evolutionary and Phylogenetic Analyses</title>
<p>Protein sequences of <italic>A. paniculata</italic>, <italic>Sesamum indicum</italic>, <italic>Salvia miltiorrhiza</italic>, <italic>Oryza sativa</italic>, <italic>Catharanthus roseus, Arabidopsis thaliana, Solanum lycopersicum</italic>, and <italic>Helianthus annuus</italic> were downloaded from JGI. A full alignment protein search using BLASTP with the parameter <italic>E</italic>-value = 1e-5 was performed to verify the gene family clusters in these species and <italic>A. paniculata</italic>. Ortholog clustering and gene family clustering analyses were performed using OrthoMCL (<xref ref-type="bibr" rid="B33">Li et al., 2003</xref>). Venn diagram format was drawn using a web tool<sup><xref ref-type="fn" rid="footnote12">12</xref></sup> (<xref ref-type="bibr" rid="B73">Zhang et al., 2016</xref>).</p>
<p>An all-against-all BLASTP comparison with a cutoff <italic>E</italic>-value of 1e-5 was preformed, and the results were clustered into groups of homologous proteins using Markov chain clustering with the default inflation parameter. All 1212 single-copy orthologous genes identified in the gene family cluster analysis from the aforementioned species were used to construct a phylogenetic tree. Multiple sequence alignments were performed for each gene using MUSCLE v.3.7<sup><xref ref-type="fn" rid="footnote13">13</xref></sup> with default settings (<xref ref-type="bibr" rid="B19">Edgar, 2004</xref>).</p>
<p>The MCMCTREE program within the PAML package (<xref ref-type="bibr" rid="B69">Yang, 2007</xref>) was used to estimate divergence time of <italic>A. paniculata</italic>, <italic>S. indicum</italic>, <italic>S. miltiorrhiza</italic>, <italic>O. sativa</italic>, <italic>C. s roseus, A. thaliana, S. lycopersicum</italic>, and <italic>H. annuus</italic>. The HKY85 model (model = 4) and independent rates molecular clock (clock = 2) were used for calculation.</p>
<p>CAFE v1.7 (<xref ref-type="bibr" rid="B12">De Bie et al., 2006</xref>) is a tool for analyzing the evolution of gene family size based on the stochastic birth and death model. With the calculated phylogeny and the divergence time, this software was applied to identify gene families that had undergone expansion and/or contraction in the aforementioned species with the parameters: -filter -cpu 10 -lrt -simunum 1000.</p>
</sec>
<sec id="S2.SS7">
<title>Synteny Analysis of Two Genome Assemblies</title>
<p>The fasta and hic.gff files of the published genome assembly (<xref ref-type="bibr" rid="B55">Sun et al., 2019</xref>) were downloaded from NCBI. They were combined with the fasta and contig.gff files of our genome assembly by makeblastdb. BLASTP was used to align these sequence, and MCScanX (<xref ref-type="bibr" rid="B63">Wang et al., 2012</xref>) was used to perform synteny analysis between two genome assemblies.</p>
</sec>
<sec id="S2.SS8">
<title>Analysis of Key Gene Families in the <italic>A. paniculata</italic> Genome</title>
<p>We used hmmsearch to perform a preliminary screening of the gene family (<italic>CYP450</italic> and <italic>terpene sythases, TPSs</italic>) in <italic>A. paniculata</italic> and the gene ID was intercepted with an <italic>E</italic>-value &#x2264; 1e-59. The corresponding protein sequence was used as a query for TBLASTN (<italic>E</italic> = 1e-5) with both versions of the assembled <italic>A. paniculata</italic> genome sequence. The <italic>CYP450</italic> genes from other species were downloaded from the Cytochrome <italic>CYP450</italic> homepage<sup><xref ref-type="fn" rid="footnote14">14</xref></sup> (<xref ref-type="bibr" rid="B42">Nelson, 2009</xref>) and the <italic>TPS</italic> genes from other species were acquired from a previous publication (<xref ref-type="bibr" rid="B6">Chen et al., 2011</xref>). Multiple sequence alignment was carried out with MUSCLE v3.7 (<xref ref-type="bibr" rid="B19">Edgar, 2004</xref>) using default parameters. The maximum likelihood (ML) phylogenetic tree was constructed using MEGA7 (<xref ref-type="bibr" rid="B42">Nelson, 2009</xref>) with 1,000 bootstraps.</p>
<p>The RNA-Seq data of the roots and leaves of <italic>A. paniculata</italic> were downloaded from the NCBI database (SRX652837, SRX655521). FPKM value was calculated for each protein-coding gene by Cufflinks (v. 2.1.1) (<xref ref-type="bibr" rid="B56">Trapnell et al., 2013</xref>). The heatmap was made with the pheatmap package.</p>
</sec>
</sec>
<sec id="S3">
<title>Results</title>
<sec id="S3.SS1">
<title>Genomic Sequencing and High-Quality Genome Assembly</title>
<p>Genome survey with Illumina reads showed that the estimated genome size of <italic>A. paniculata</italic> with <italic>21-mer</italic> analysis was about 295 Mb (<xref ref-type="fig" rid="F1">Figure 1B</xref>). This number was slightly smaller than the estimated genome size of 310 Mb from flow cytometry analysis (<xref ref-type="fig" rid="F1">Figure 1C</xref>), but larger than the previous estimate of 280 Mb (<xref ref-type="bibr" rid="B55">Sun et al., 2019</xref>). The difference probably reflects the between-individual variation of the <italic>A. paniculata</italic> plant. Moreover, the heterozygosity of this sequenced genome was estimated to be 0.175% (<xref ref-type="fig" rid="F1">Figure 1B</xref>).</p>
<p>In order to obtain a high-quality <italic>A. paniculata</italic> genome assembly, we constructed 11 PacBio SMRT sequencing libraries, which produced 53.96 Gb clean data (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 1</xref>). They covered about 183-fold of the estimated genome. We also generated about 238.6 Gb of Illumina sequencing data to polish and correct the error reads that are associated with PacBio sequencing. A combination of these data through a assembly pipeline (see the section on genome assembly methods for details) yielded a draft genome assembly of &#x223C;284.3 Mb with 270 contigs (<xref ref-type="table" rid="T1">Table 1</xref>). This assembly represented about 91.6 &#x2013; 96.3% of the estimated genome size. The longest contig was 9.30 Mb in size and the contig N50 was about 5.15 Mb. This benchmark is more than 12-fold longer than that of a previously reported assembly (<xref ref-type="bibr" rid="B55">Sun et al., 2019</xref>).</p>
<table-wrap position="float" id="T1">
<label>TABLE 1</label>
<caption><p>The library information and data statistics for the <italic>A. paniculata</italic> assembly.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left"><bold>Estimated genome size (Mb)</bold></td>
<td valign="top" align="center"><bold>295 - 310</bold></td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><bold>Assembly statistics</bold></td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Assembly size (Mb) Number of N50 contig</td>
<td valign="top" align="center">284.3 22</td>
</tr>
<tr>
<td valign="top" align="left">N50 contig length (bp)</td>
<td valign="top" align="center">5,149,272</td>
</tr>
<tr>
<td valign="top" align="left">Number of N90 contigs</td>
<td valign="top" align="center">51</td>
</tr>
<tr>
<td valign="top" align="left">N90 contig length (bp)</td>
<td valign="top" align="center">2,610,185</td>
</tr>
<tr>
<td valign="top" align="left">Transposable elements content</td>
<td valign="top" align="center">57.35%</td>
</tr>
<tr>
<td valign="top" align="left"><bold>Gene annotation statistics</bold></td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Total number of protein-coding genes</td>
<td valign="top" align="center">24,015</td>
</tr>
<tr>
<td valign="top" align="left">Total exon number</td>
<td valign="top" align="center">136,156</td>
</tr>
<tr>
<td valign="top" align="left">Average exon number per gene</td>
<td valign="top" align="center">5.67</td>
</tr>
<tr>
<td valign="top" align="left">Average exon size (bp)</td>
<td valign="top" align="center">227.14</td>
</tr>
<tr>
<td valign="top" align="left">Total intro length (bp)</td>
<td valign="top" align="center">453,323</td>
</tr>
<tr>
<td valign="top" align="left">Total number of non-protein-coding genes</td>
<td valign="top" align="center">6,591</td>
</tr>
</tbody>
</table></table-wrap>
<p>We assessed the completeness of the genome assembly of <italic>A. paniculata</italic> by using the Benchmarking Universal Single-Copy Orthologs (BUSCO) approach (<xref ref-type="bibr" rid="B52">Simao et al., 2015</xref>). The result showed that 91.0% of the plant BUSCO genes could be recovered in the genome assembly and 3% of the plant BUSCO genes had partial matches (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 2</xref>). We also mapped cleaned Illumina paired reads back to the genome assembly using BWA mem (<xref ref-type="bibr" rid="B31">Li and Durbin, 2010</xref>). We found that 97% of the reads could be mapped to the genome assembly and 94% of the reads were found to be properly paired. The high mapping rate indicate high completeness of the assembly. These two benchmarks were also higher than those reported in the previously assembly (<xref ref-type="bibr" rid="B55">Sun et al., 2019</xref>). In addition, our contig assembly shared high collinearity with its counterpart from a previous report (<xref ref-type="fig" rid="F2">Figure 2</xref>). These data collectively suggest that the genome assembly of <italic>A. paniculata</italic> in this study has high quality and could be used for subsequent analyses.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption><p>Collinear analysis of the assemblies of <italic>A. paniculata</italic> genome.</p></caption>
<graphic xlink:href="fgene-11-00701-g002.tif"/>
</fig>
<p>Finally, we obtained about 43.33 Gb (&#x223C;152 &#x00D7; coverage) clean Hi-C sequencing data, with which 246,855,874 bp (86.8% of all bases) of the genome assembly were organized into 24 pseudo-chromosomes (<xref ref-type="fig" rid="F3">Figure 3</xref> and <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 3</xref>). As expected, the Hi-C interaction decreases as the physical distance between two sequences increases.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption><p>Hi-C clustering heatmap for pseudomolecule construction in <italic>A. paniculata</italic>. In all, 24 pseudomolecules are assembled and indicated by blue boxes.</p></caption>
<graphic xlink:href="fgene-11-00701-g003.tif"/>
</fig>
</sec>
<sec id="S3.SS2">
<title>Repeat Annotation</title>
<p>Transposable elements (TEs) accounted for about 163.1 Mb or 57.35% of the <italic>A. paniculata</italic> genome (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 4</xref>). Breakdown of the TE statistics showed that DNA retrotransposons and long terminal repeat (LTR) retrotransposons were major subtypes in the <italic>A. paniculata</italic> genome (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 5</xref>). TEs constitute an import part in plant genomes. We analyzed the evolutionary history of various <italic>Ty3-gypsies</italic> and <italic>Ty1-copia</italic> retroposon elements in the <italic>A. paniculata</italic> genome and identified unique dynamics of invasion patterns for different TE lineages (<xref ref-type="fig" rid="F4">Figures 4A,B</xref>). For example, Ivana, Ogre, and Ale elements are all relatively young, suggesting an intense and recent burst of insertion or a strong selection against these TE elements. In contrast, Angela elements are the most ancient ones and the bimodal distribution suggests that the burst of insertion occurred twice in the evolutionary history of <italic>A. paniculata</italic> (<xref ref-type="fig" rid="F4">Figure 4B</xref>). In addition, TE family distribution varied across the genome (<xref ref-type="fig" rid="F4">Figure 4C</xref>). Ogre elements tend to cluster closely, thereby yielding prominent hotspot regions in the genome of <italic>A. paniculata</italic>.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption><p>Repeat analysis <bold>(A)</bold> Neighbor-joining (NJ) trees were built from RT domain sequence similarities among different lineage-specific copies identified in the <italic>A. paniculata</italic> genome. Deep branching revealed ancient expansion while flat branching is consistent with a recent burst of insertion activity. Red branches correspond to outgroup sequences. <bold>(B)</bold> The average age of TEs was revealed for the different lineages by the branching distribution in the NJ trees built from RT (light blue). <bold>(C)</bold> The density of different TE lineages inferred from the detection of their protein-coding domains along pseudomolecules.</p></caption>
<graphic xlink:href="fgene-11-00701-g004.tif"/>
</fig>
</sec>
<sec id="S3.SS3">
<title>Protein-Coding Gene Annotation</title>
<p>A combination of <italic>ab initio</italic> based, homology based, and RNA-Seq based methods were used to predict 24,015 protein-coding genes in the <italic>A. paniculata</italic> genome (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 6</xref>). The predicted mRNA was on average 3,175 bp in length, containing about 5.67 exons with an average CDS length of 1,287 bp. The number of predicted protein-coding genes was comparable to that of <italic>S. indicum</italic>, but much smaller than that of <italic>O. sativa</italic> and <italic>H. annuus</italic> (<xref ref-type="fig" rid="F5">Figure 5A</xref>). Orthologous clustering analysis showed that the <italic>A. paniculata</italic> genome contained 4,449 single-copy orthologs, 6,731 multiple-copy orthologs, 1,234 unique paralogs, 7,165 other orthologs, and 4,436 unclustered genes (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 7</xref>). In addition, Venn diagram showed that 7,798 gene families were shared by <italic>A. paniculata</italic>, <italic>S. indicum, S. miltiorrhiza</italic>, and <italic>O. sativa</italic>. A total of 525 gene families were unique to <italic>A. paniculata</italic>. This number was lower than that of the other three plant species (<xref ref-type="fig" rid="F5">Figure 5B</xref>). Among the 24,015 protein-coding genes, a total of 19,824 predicted genes are supported by the RNA-seq expression data (FPKM &#x003E; 0.05). Functional annotation of predicted protein-coding genes showed that 91.5% could obtain TrEMBL annotation; 62.2% could obtain GO annotation; 77.2% could obtain KEGG annotation; and 81.0% could obtain InterPro annotation (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 8</xref>).</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption><p>Comparative analyses of the <italic>A. paniculata</italic> genome. <bold>(A)</bold> Major groups of orthologous genes in eight plant genomes. <bold>(B)</bold> Venn diagram of shared orthologous gene families among <italic>A. paniculata</italic>, <italic>S. indicum</italic>, <italic>S. miltiorrhiza</italic>, and <italic>O. sativa</italic> genomes. <bold>(C)</bold> Estimation of the time points of divergence (time range shown in parentheses) between <italic>A. paniculata</italic> and seven other plant species based on orthologous single-copy gene pairs. <bold>(D)</bold> Expansion and contraction of gene families in eight plant genomes. Pie diagram on each branch and node corresponds to combined change across lineages.</p></caption>
<graphic xlink:href="fgene-11-00701-g005.tif"/>
</fig>
</sec>
<sec id="S3.SS4">
<title>Non-protein-Coding Gene Annotation</title>
<p>A combination of homolog and <italic>ab initio</italic> based methods identified a total of 6,591 non-protein-coding genes (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 9</xref>). These predicted genes comprised of 93 microRNA genes, 524 transfer RNA (tRNA) genes, 5,785 ribosomal RNA (rRNA) genes, and 189 small nuclear RNA genes (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 9</xref>).</p>
</sec>
<sec id="S3.SS5">
<title>Phylogenetic Analysis of <italic>A. paniculata</italic></title>
<p>Phylogenetic analysis <italic>A. paniculata</italic> and seven other plant species showed that <italic>A. paniculata</italic> shared a common ancestor with <italic>S. miltiorrhiza</italic> approximately 57.0 Myr ago by calculated by r8s (<xref ref-type="fig" rid="F5">Figure 5C</xref>). This estimate corresponds to the report from a previous study (<xref ref-type="bibr" rid="B55">Sun et al., 2019</xref>). During the course of evolution, a total of 1,064 and 1,396 gene families in <italic>A. paniculata</italic> were found to undergo expansion and contraction, respectively (<xref ref-type="fig" rid="F5">Figure 5D</xref>).</p>
</sec>
<sec id="S3.SS6">
<title>Genomic Analysis of Key Genes in the Terpenoid Biosynthetic Pathway</title>
<p>Terpenoids are the largest group of plant secondary metabolites that are key targets for pharmaceutical screening and design (<xref ref-type="bibr" rid="B53">Srivastava and Akhila, 2010</xref>). Despite the structural diversity, these compounds share a common biosynthetic pathway (<xref ref-type="bibr" rid="B70">Yazaki et al., 2017</xref>). Terpenoids are derived from two five-carbon chemicals: IPP and dimethylallyl diphosphate (DMAPP) (<xref ref-type="bibr" rid="B30">Lange et al., 2001</xref>). Their biosynthesis involve the classical acetate/mevalonate pathway in the cytosol and the pyruvate/glyceraldehyde-3-phosphate pathway in the plastids (<xref ref-type="fig" rid="F6">Figure 6A</xref>) (<xref ref-type="bibr" rid="B6">Chen et al., 2011</xref>). Eventually, the condensation of IPP and DMAPP in various combinations will give rise to the countless terpenoids in plants (<xref ref-type="bibr" rid="B53">Srivastava and Akhila, 2010</xref>). In the <italic>A. paniculata</italic> genome, according to the KEGG annotation results, we identified 41 putative genes that were involved in the terpenoid backbone biosynthesis (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 10</xref>). This number is larger than that found in the <italic>Panax notoginseng</italic> genome (<xref ref-type="bibr" rid="B9">Chen et al., 2017</xref>). Additionally, almost all these putative genes exhibited certain levels of expression in the leaf and root tissue of <italic>A. paniculata</italic>. A previous report showed that the <italic>GERANYLGERANYL PYROPHOSPHATE SYNTHASE</italic> (<italic>GGPPS</italic>) group of genes in <italic>A. paniculata</italic> might be involved in the biosynthesis of andrographolide (<xref ref-type="bibr" rid="B61">Wang et al., 2019</xref>). In our genome assembly, a total number of 13 putative <italic>GGPPS</italic> genes were identified, and 11 out of 13 putative genes were expressed (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 10</xref>).</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption><p>Genomic analysis of putative <italic>TPS</italic> genes in the <italic>A. paniculata</italic> genome. <bold>(A)</bold> Schematic biosynthetic pathway of andrographolide and neoandrographolide. <bold>(B)</bold> Phylogenetic analysis of <italic>TPS</italic> genes in <italic>A. paniculata</italic> and other plants. Red branches indicate putative <italic>TPS</italic> genes in <italic>A. paniculata</italic>. <bold>(C)</bold> Expression levels of putative <italic>KS/KSL</italic> genes in the leaf and root tissue of <italic>A. paniculata</italic>. <bold>(D)</bold> Expression levels of putative <italic>CPS</italic> genes in the leaf and root tissue of <italic>A. paniculata</italic>.</p></caption>
<graphic xlink:href="fgene-11-00701-g006.tif"/>
</fig>
<p>Besides <italic>GGPPS</italic> genes, previous studies suggest that <italic>COPALYL DIPHOSPHATE SYNTHASE</italic> (<italic>CPS</italic>) genes are implicated in the biosynthetic pathway of andrographolide and neoandrographolide (<xref ref-type="bibr" rid="B22">Garg et al., 2015</xref>; <xref ref-type="bibr" rid="B49">Shen et al., 2016a</xref>, <xref ref-type="bibr" rid="B50">b</xref>). The CPS enzymes are very similar in protein sequence to kaurene synthase (KS) and kaurene synthase-like (KSL) proteins (<xref ref-type="bibr" rid="B76">Zi et al., 2014</xref>). Despite their distinct catalytic activity, CPSs and KSLs belong to the c-type and e-type subfamilies of the terpene synthase (TPS) enzymes, respectively (<xref ref-type="bibr" rid="B6">Chen et al., 2011</xref>). In the <italic>A. paniculata</italic> genome, we identified a total of 53 putative <italic>TPS</italic> genes (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 11</xref>). Phylogenetic analysis of the TPS protein sequences from <italic>A. paniculata</italic> and eight other species showed that there were 24 putative <italic>TPS-a1</italic> genes, 10 putative <italic>TPS-b</italic> genes, 4 putative <italic>TPS-c</italic> genes, 8 putative <italic>TPS-e</italic> genes, 5 putative <italic>TPS-f</italic> genes, and 2 putative <italic>TPS-g</italic> genes (<xref ref-type="fig" rid="F6">Figure 6B</xref>). This suggest that the <italic>A. paniculata</italic> genome may have four <italic>CPS</italic> genes and eight <italic>KS</italic>/<italic>KSL</italic> genes, all of which show certain levels of expression in the leaf and root tissues of the plant (<xref ref-type="fig" rid="F6">Figures 6C,D</xref>).</p>
</sec>
<sec id="S3.SS7">
<title>Genomic Analysis of the Cytochrome P450 Gene Family in <italic>A. paniculata</italic></title>
<p>Various biosynthetic pathways in plants rely on members of the <italic>CYTOCHROME P450</italic> (<italic>CYP450</italic>) gene family to accomplish chemical modification (<xref ref-type="bibr" rid="B48">Schuler, 1996</xref>; <xref ref-type="bibr" rid="B39">Mizutani and Ohta, 1998</xref>; <xref ref-type="bibr" rid="B40">Morant et al., 2003</xref>). For this reason, we investigated the putative <italic>CYP450</italic> genes in <italic>A. paniculata</italic>, and found a total of 205 candidates (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 12</xref>). The phylogenetic tree of the putative CYP450 protein sequences exhibited nine major clans (<xref ref-type="fig" rid="F7">Figure 7A</xref>). The majority of candidate genes belonged to the clan 71 (104 genes), clan 72 (33 genes), clan 85 (33 genes), and clan 86 (25 genes), respectively. We also found that some <italic>CYP450</italic> genes appeared in a cluster in the contigs of the assembly (<xref ref-type="fig" rid="F7">Figure 7B</xref>). This result agrees with the findings that it is fairly common to have gene clusters for specific biosynthetic pathways in plant genomes (<xref ref-type="bibr" rid="B43">Nutzmann and Osbourn, 2014</xref>). Among the 205 candidate genes, we identified 111 putative <italic>CYP450</italic> genes that were differentially expressed in the root and leaf of <italic>A. paniculata</italic> (<xref ref-type="fig" rid="F7">Figure 7C</xref>). Additionally, the expression level of evm.49.509 in the <italic>CYP450</italic> family was the highest in the root of <italic>A. paniculata</italic>, followed by evm.model.49.208, evm.model.7.226, and evm.model.54.219. In comparison, evm.model.19.136 was the highest expressed gene in the leaves of <italic>A. paniculata</italic>, followed by evm.model.54.197 and evm.model.11.4.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption><p>Genomic analysis of putative <italic>CYP450</italic> genes in the <italic>A. paniculata</italic> genome. <bold>(A)</bold> Phylogenetic analysis of <italic>CYP450</italic> genes in <italic>A. paniculata</italic> and other plants. Red branches indicate putative <italic>CYP450</italic> genes in <italic>A. paniculata</italic>. <bold>(B)</bold> The <italic>CYP450</italic> gene clusters on the contigs. <bold>(C)</bold> Differentially expressed putative <italic>CYP450</italic> genes in <italic>A. paniculata</italic>.</p></caption>
<graphic xlink:href="fgene-11-00701-g007.tif"/>
</fig>
</sec>
</sec>
<sec id="S4">
<title>Discussion</title>
<p>With the advancement and cost reduction of genome sequencing technologies, more and more plant species have revealed their near-complete genetic composition at a single-base resolution. Because most medicinal plant have highly repetitive and/or heterozygous genomes, a high-quality draft assembly is usually difficult and costly to secure. Particularly, lower contiguity of the genome assembly at the contig level will impede genomic analysis of functional genes, delineation of biosynthetic pathways, and the development of novel pharmaceutical candidate.</p>
<p>We reported a chromosome-level reference genome for <italic>A. paniculata</italic> with improved benchmark values. Our updated <italic>A. paniculata</italic> genome was 284 Mb with a contig N50 size of 5.14 M. This number was more than 12-fold longer than that of the previous report (<xref ref-type="bibr" rid="B55">Sun et al., 2019</xref>). The BUSCO value reached 91.7%, which demonstrated the high completeness of the genome assembly.</p>
<p>Repetitive elements account for a large percentage of plant genomes. For example, the genome of lettuce contains about 74.2% of the sequence as repeats and the genome of sunflower includes more than 75% LTRs (<xref ref-type="bibr" rid="B2">Badouin et al., 2017</xref>; <xref ref-type="bibr" rid="B46">Reyes-Chin-Wo et al., 2017</xref>). We analyzed the repetitive element sequences in the <italic>A. paniculata</italic> genome and found that the repeats of the <italic>A. paniculata</italic> genome is as high as 57%, among which long terminal repeat retrotransposons (LTR-RTs) are predominant. Phylogenetic analyses of various <italic>copia</italic> and <italic>gypsies</italic> RT subclasses showed that the burst of invasion of many RT were recent events except Angela elements. This is in contrast to the pea genome where Angela elements drives the invasion fairly recently (<xref ref-type="bibr" rid="B29">Kreplak et al., 2019</xref>). Given these results, it is worth noting that TE annotation remains a challenging task for plant genomes. For instance, about 21.8% of <italic>de novo</italic> identified TEs in the <italic>A. paniculata</italic> genome could not be assigned to a particular category. Moreover, TEs tend to insert into the structures of existing TE elements, creating nested TEs in the genome. With our data, it would be interesting to use newly developed pipelines (e.g., Extensitve <italic>de novo</italic> TE Annotator) (<xref ref-type="bibr" rid="B44">Ou et al., 2019</xref>) to deconvolute nested TEs in the genome of <italic>A. paniculata.</italic></p>
<p>In the present study, we analyzed the <italic>Cyp450</italic> gene family in <italic>A. paniculata</italic>, and identified 205 putative <italic>CYP450</italic> genes with conserved motifs. The results showed that all major classes of <italic>Cyp450</italic> reported by the <xref ref-type="bibr" rid="B43">Nutzmann and Osbourn (2014)</xref> could be found in the <italic>A. paniculata</italic> genome. The number of <italic>Cyp450</italic> genes with high expressions is larger in roots than in leaf. Furthermore, terpene synthesis (TPSs) is one of the main drivers of terpene diversification. The terpene synthase (TPS) family genes are generally categorized into seven clades (<xref ref-type="bibr" rid="B51">Shen et al., 2018</xref>). In the <italic>A. paniculata</italic> genome, we identified a total of 53 putative <italic>TPS</italic> genes, most of which belong to the <italic>TPS-a</italic> and <italic>TPS-b</italic> subfamilies. This is in line with the fact that <italic>TPS-a</italic> and <italic>TPS-b</italic> subfamilies represent the angiosperm-specific genes that have diverged from the other <italic>TPS</italic> genes (<xref ref-type="bibr" rid="B51">Shen et al., 2018</xref>).</p>
</sec>
<sec id="S5">
<title>Conclusion</title>
<p>The high-quality genome of <italic>A. paniculata</italic> will not only lay out the foundation for investigating the genetic basis for secondary metabolite biosynthesis, but also serve as an important resource for the study of other plant species in the <italic>Andrographis</italic> genus.</p>
</sec>
<sec id="S6">
<title>Data Availability Statement</title>
<p>All raw sequence reads and the genome assembly have been deposited at NCBI under the BioProject accession number <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="PRJNA549104">PRJNA549104</ext-link>.</p>
</sec>
<sec id="S7">
<title>Author Contributions</title>
<p>YL, KW, and PQ collected the samples and performed the experiments. SC, ZY, YDu, and SD completed the data analysis. YDo, WC, and JM edited and modified the manuscript. All authors read and approved the manuscript.</p>
</sec>
<sec id="conf1">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
</body>
<back>
<fn-group>
<fn fn-type="financial-disclosure">
<p><bold>Funding.</bold> This study was supported by the National Key R&#x0026;D Program of China (2019YFC1711100), Yunnan Provincial Key Programs of Yunnan Eco-friendly Food International Cooperation Research Center Project (2019ZG00908), the Guangxi Innovation-Driven Development Project (GuiKe AA18242040), National Natural Science Foundation of China (81460582 and 81473309), China Agriculture Research System (CARS-21), and Guangxi Science and Technology Project (GuiKe AD17129044).</p>
</fn>
</fn-group>
<sec id="S9" sec-type="supplementary material">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2020.00701/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2020.00701/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Table_1.xlsx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"></supplementary-material>
</sec>
<ref-list>
<title>References</title>
<ref id="B1"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Aggarwal</surname> <given-names>G.</given-names></name> <name><surname>Ramaswamy</surname> <given-names>R.</given-names></name></person-group> (<year>2002</year>). <article-title><italic>Ab initio</italic> gene identification: prokaryote genome annotation with GeneScan and GLIMMER.</article-title> <source><italic>J. Biosci.</italic></source> <volume>27</volume> <fpage>7</fpage>&#x2013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1007/BF02703679</pub-id> <pub-id pub-id-type="pmid">11927773</pub-id></citation></ref>
<ref id="B2"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Badouin</surname> <given-names>H.</given-names></name> <name><surname>Gouzy</surname> <given-names>J.</given-names></name> <name><surname>Grassa</surname> <given-names>C. J.</given-names></name> <name><surname>Murat</surname> <given-names>F.</given-names></name> <name><surname>Staton</surname> <given-names>S. E.</given-names></name> <name><surname>Cottret</surname> <given-names>L.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>The sunflower genome provides insights into oil metabolism, flowering and asterid evolution.</article-title> <source><italic>Nature</italic></source> <volume>546</volume> <fpage>148</fpage>&#x2013;<lpage>152</lpage>. <pub-id pub-id-type="doi">10.1038/nature22380</pub-id> <pub-id pub-id-type="pmid">28538728</pub-id></citation></ref>
<ref id="B3"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bao</surname> <given-names>W.</given-names></name> <name><surname>Kojima</surname> <given-names>K. K.</given-names></name> <name><surname>Kohany</surname> <given-names>O.</given-names></name></person-group> (<year>2015</year>). <article-title>Repbase update, a database of repetitive elements in eukaryotic genomes.</article-title> <source><italic>Mob. DNA</italic></source> <volume>6</volume>:<issue>11</issue>. <pub-id pub-id-type="doi">10.1186/s13100-015-0041-9</pub-id> <pub-id pub-id-type="pmid">26045719</pub-id></citation></ref>
<ref id="B4"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chae</surname> <given-names>L.</given-names></name> <name><surname>Kim</surname> <given-names>T.</given-names></name> <name><surname>Nilo-Poyanco</surname> <given-names>R.</given-names></name> <name><surname>Rhee</surname> <given-names>S. Y.</given-names></name></person-group> (<year>2014</year>). <article-title>Genomic signatures of specialized metabolism in plants.</article-title> <source><italic>Science</italic></source> <volume>344</volume> <fpage>510</fpage>&#x2013;<lpage>513</lpage>. <pub-id pub-id-type="doi">10.1126/science.1252076</pub-id> <pub-id pub-id-type="pmid">24786077</pub-id></citation></ref>
<ref id="B5"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chan</surname> <given-names>P. P.</given-names></name> <name><surname>Lowe</surname> <given-names>T. M.</given-names></name></person-group> (<year>2019</year>). <article-title>TRNAscan-SE: searching for tRNA genes in genomic sequences.</article-title> <source><italic>Methods Mol. Biol.</italic></source> <volume>1962</volume> <fpage>1</fpage>&#x2013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1007/978-1-4939-9173-0_1</pub-id></citation></ref>
<ref id="B6"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>F.</given-names></name> <name><surname>Tholl</surname> <given-names>D.</given-names></name> <name><surname>Bohlmann</surname> <given-names>J.</given-names></name> <name><surname>Pichersky</surname> <given-names>E.</given-names></name></person-group> (<year>2011</year>). <article-title>The family of terpene synthases in plants: a mid-size family of genes for specialized metabolism that is highly diversified throughout the kingdom.</article-title> <source><italic>Plant J.</italic></source> <volume>66</volume> <fpage>212</fpage>&#x2013;<lpage>229</lpage>. <pub-id pub-id-type="doi">10.1111/j.1365-313X.2011.04520.x</pub-id> <pub-id pub-id-type="pmid">21443633</pub-id></citation></ref>
<ref id="B7"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>J. X.</given-names></name> <name><surname>Xue</surname> <given-names>H. J.</given-names></name> <name><surname>Ye</surname> <given-names>W. C.</given-names></name> <name><surname>Fang</surname> <given-names>B. H.</given-names></name> <name><surname>Liu</surname> <given-names>Y. H.</given-names></name> <name><surname>Yuan</surname> <given-names>S.-H.</given-names></name><etal/></person-group> (<year>2009</year>). <article-title>Activity of andrographolide and its derivatives against influenza virus in vivo and in vitro.</article-title> <source><italic>Biol. Pharm. Bull.</italic></source> <volume>32</volume> <fpage>1385</fpage>&#x2013;<lpage>1391</lpage>. <pub-id pub-id-type="doi">10.1248/bpb.32.1385</pub-id> <pub-id pub-id-type="pmid">19652378</pub-id></citation></ref>
<ref id="B8"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>N.</given-names></name></person-group> (<year>2004</year>). <article-title>Using RepeatMasker to identify repetitive elements in genomic sequences.</article-title> <source><italic>Curr. Protoc. Bioinformatics</italic></source> <volume>25</volume> <fpage>4.10.1</fpage>&#x2013;<lpage>4.10.14</lpage>. <pub-id pub-id-type="doi">10.1002/0471250953.bi0410s05</pub-id> <pub-id pub-id-type="pmid">18428725</pub-id></citation></ref>
<ref id="B9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>W.</given-names></name> <name><surname>Kui</surname> <given-names>L.</given-names></name> <name><surname>Zhang</surname> <given-names>G.</given-names></name> <name><surname>Zhu</surname> <given-names>S.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Whole-genome sequencing and analysis of the Chinese herbal plant <italic>Panax notoginseng</italic>.</article-title> <source><italic>Mol. Plant</italic></source> <volume>10</volume> <fpage>899</fpage>&#x2013;<lpage>902</lpage>. <pub-id pub-id-type="doi">10.1016/j.molp.2017.02.010</pub-id> <pub-id pub-id-type="pmid">28315474</pub-id></citation></ref>
<ref id="B10"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chua</surname> <given-names>L. S.</given-names></name></person-group> (<year>2014</year>). <article-title>Review on liver inflammation and antiinflammatory activity of <italic>Andrographis paniculata</italic> for hepatoprotection.</article-title> <source><italic>Phytother. Res.</italic></source> <volume>28</volume> <fpage>1589</fpage>&#x2013;<lpage>1598</lpage>. <pub-id pub-id-type="doi">10.1002/ptr.5193</pub-id> <pub-id pub-id-type="pmid">25043965</pub-id></citation></ref>
<ref id="B11"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Consortium</surname> <given-names>T. G.</given-names></name></person-group> (<year>2012</year>). <article-title>The tomato genome sequence provides insights into fleshy fruit evolution.</article-title> <source><italic>Nature</italic></source> <volume>485</volume> <fpage>635</fpage>&#x2013;<lpage>641</lpage>. <pub-id pub-id-type="doi">10.1038/nature11119</pub-id> <pub-id pub-id-type="pmid">22660326</pub-id></citation></ref>
<ref id="B12"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>De Bie</surname> <given-names>T.</given-names></name> <name><surname>Cristianini</surname> <given-names>N.</given-names></name> <name><surname>Demuth</surname> <given-names>J. P.</given-names></name> <name><surname>Hahn</surname> <given-names>M. W.</given-names></name></person-group> (<year>2006</year>). <article-title>CAFE: a computational tool for the study of gene family evolution.</article-title> <source><italic>Bioinformatics</italic></source> <volume>22</volume> <fpage>1269</fpage>&#x2013;<lpage>1271</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btl097</pub-id> <pub-id pub-id-type="pmid">16543274</pub-id></citation></ref>
<ref id="B13"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>De Luca</surname> <given-names>V.</given-names></name> <name><surname>Salim</surname> <given-names>V.</given-names></name> <name><surname>Atsumi</surname> <given-names>S. M.</given-names></name> <name><surname>Yu</surname> <given-names>F.</given-names></name></person-group> (<year>2012</year>). <article-title>Mining the biodiversity of plants: a revolution in the making.</article-title> <source><italic>Science</italic></source> <volume>336</volume> <fpage>1658</fpage>&#x2013;<lpage>1661</lpage>. <pub-id pub-id-type="doi">10.1126/science.1217410</pub-id> <pub-id pub-id-type="pmid">22745417</pub-id></citation></ref>
<ref id="B14"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dong</surname> <given-names>A. X.</given-names></name> <name><surname>Xin</surname> <given-names>H. B.</given-names></name> <name><surname>Li</surname> <given-names>Z. J.</given-names></name> <name><surname>Liu</surname> <given-names>H.</given-names></name> <name><surname>Sun</surname> <given-names>Y. Q.</given-names></name> <name><surname>Nie</surname> <given-names>S.</given-names></name><etal/></person-group> (<year>2018</year>). <article-title>High-quality assembly of the reference genome for scarlet sage, <italic>Salvia splendens</italic>, an economically important ornamental plant.</article-title> <source><italic>Gigascience</italic></source> <volume>7</volume>:<issue>giy068</issue>. <pub-id pub-id-type="doi">10.1093/gigascience/giy068</pub-id> <pub-id pub-id-type="pmid">29931210</pub-id></citation></ref>
<ref id="B15"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Du</surname> <given-names>H.</given-names></name> <name><surname>Liang</surname> <given-names>C.</given-names></name></person-group> (<year>2019</year>). <article-title>Assembly of chromosome-scale contigs by efficiently resolving repetitive sequences with long reads.</article-title> <source><italic>Nat. Commun.</italic></source> <volume>10</volume>:<issue>5360</issue>. <pub-id pub-id-type="doi">10.1038/s41467-019-13355-3</pub-id> <pub-id pub-id-type="pmid">31767853</pub-id></citation></ref>
<ref id="B16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dudchenko</surname> <given-names>O.</given-names></name> <name><surname>Batra</surname> <given-names>S. S.</given-names></name> <name><surname>Omer</surname> <given-names>A. D.</given-names></name> <name><surname>Nyquist</surname> <given-names>S. K.</given-names></name> <name><surname>Hoeger</surname> <given-names>M.</given-names></name> <name><surname>Durand</surname> <given-names>N. C.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>De novo assembly of the <italic>Aedes aegypti</italic> genome using Hi-C yields chromosome-length scaffolds.</article-title> <source><italic>Science</italic></source> <volume>356</volume> <fpage>92</fpage>&#x2013;<lpage>95</lpage>. <pub-id pub-id-type="doi">10.1126/science.aal3327</pub-id> <pub-id pub-id-type="pmid">28336562</pub-id></citation></ref>
<ref id="B17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Durand</surname> <given-names>N. C.</given-names></name> <name><surname>Robinson</surname> <given-names>J. T.</given-names></name> <name><surname>Shamim</surname> <given-names>M. S.</given-names></name> <name><surname>Machol</surname> <given-names>I.</given-names></name> <name><surname>Mesirov</surname> <given-names>J. P.</given-names></name> <name><surname>Lander</surname> <given-names>E. S.</given-names></name><etal/></person-group> (<year>2016a</year>). <article-title>Juicebox provides a visualization system for Hi-C contact maps with unlimited zoom.</article-title> <source><italic>Cell Syst.</italic></source> <volume>3</volume> <fpage>99</fpage>&#x2013;<lpage>101</lpage>. <pub-id pub-id-type="doi">10.1016/j.cels.2015.07.012</pub-id> <pub-id pub-id-type="pmid">27467250</pub-id></citation></ref>
<ref id="B18"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Durand</surname> <given-names>N. C.</given-names></name> <name><surname>Shamim</surname> <given-names>M. S.</given-names></name> <name><surname>Machol</surname> <given-names>I.</given-names></name> <name><surname>Rao</surname> <given-names>S. S.</given-names></name> <name><surname>Huntley</surname> <given-names>M. H.</given-names></name> <name><surname>Lander</surname> <given-names>E. S.</given-names></name><etal/></person-group> (<year>2016b</year>). <article-title>Juicer provides a one-click system for analyzing loop-resolution hi-C experiments.</article-title> <source><italic>Cell Syst.</italic></source> <volume>3</volume> <fpage>95</fpage>&#x2013;<lpage>98</lpage>. <pub-id pub-id-type="doi">10.1016/j.cels.2016.07.002</pub-id> <pub-id pub-id-type="pmid">27467249</pub-id></citation></ref>
<ref id="B19"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Edgar</surname> <given-names>R. C.</given-names></name></person-group> (<year>2004</year>). <article-title>MUSCLE: multiple sequence alignment with high accuracy and high throughput.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>32</volume> <fpage>1792</fpage>&#x2013;<lpage>1797</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkh340</pub-id> <pub-id pub-id-type="pmid">15034147</pub-id></citation></ref>
<ref id="B20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Edgar</surname> <given-names>R. C.</given-names></name> <name><surname>Myers</surname> <given-names>E. W.</given-names></name></person-group> (<year>2005</year>). <article-title>PILER: identification and classification of genomic repeats.</article-title> <source><italic>Bioinformatics</italic></source> <volume>21</volume> <issue>(Suppl. 1)</issue>, <fpage>i152</fpage>&#x2013;<lpage>i158</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bti1003</pub-id> <pub-id pub-id-type="pmid">15961452</pub-id></citation></ref>
<ref id="B21"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fitzsimons</surname> <given-names>D. W.</given-names></name></person-group> (<year>2013</year>). <article-title>World health organization.</article-title> <source><italic>Acta Med. Port.</italic></source> <volume>26</volume> <fpage>186</fpage>&#x2013;<lpage>187</lpage>.</citation></ref>
<ref id="B22"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Garg</surname> <given-names>A.</given-names></name> <name><surname>Agrawal</surname> <given-names>L.</given-names></name> <name><surname>Misra</surname> <given-names>R. C.</given-names></name> <name><surname>Sharma</surname> <given-names>S.</given-names></name> <name><surname>Ghosh</surname> <given-names>S.</given-names></name></person-group> (<year>2015</year>). <article-title><italic>Andrographis paniculata</italic> transcriptome provides molecular insights into tissue-specific accumulation of medicinal diterpenes.</article-title> <source><italic>BMC Genomics</italic></source> <volume>16</volume>:<issue>659</issue>. <pub-id pub-id-type="doi">10.1186/s12864-015-1864-y</pub-id> <pub-id pub-id-type="pmid">26328761</pub-id></citation></ref>
<ref id="B23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Girollet</surname> <given-names>N.</given-names></name> <name><surname>Rubio</surname> <given-names>B.</given-names></name> <name><surname>Bert</surname> <given-names>P. F.</given-names></name></person-group> (<year>2019</year>). <article-title>De novo phased assembly of the <italic>Vitis riparia</italic> grape genome.</article-title> <source><italic>Sci. Data</italic></source> <volume>6</volume>:<issue>127</issue>. <pub-id pub-id-type="doi">10.1038/s41597-019-0133-3</pub-id> <pub-id pub-id-type="pmid">31324816</pub-id></citation></ref>
<ref id="B24"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guo</surname> <given-names>L.</given-names></name> <name><surname>Winzer</surname> <given-names>T.</given-names></name> <name><surname>Yang</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Ning</surname> <given-names>Z.</given-names></name> <name><surname>He</surname> <given-names>Z.</given-names></name><etal/></person-group> (<year>2018</year>). <article-title>The opium poppy genome and morphinan production.</article-title> <source><italic>Science</italic></source> <volume>362</volume> <fpage>343</fpage>&#x2013;<lpage>347</lpage>. <pub-id pub-id-type="doi">10.1126/science.aat4096</pub-id> <pub-id pub-id-type="pmid">30166436</pub-id></citation></ref>
<ref id="B25"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Haas</surname> <given-names>B. J.</given-names></name> <name><surname>Salzberg</surname> <given-names>S. L.</given-names></name> <name><surname>Zhu</surname> <given-names>W.</given-names></name> <name><surname>Pertea</surname> <given-names>M.</given-names></name> <name><surname>Allen</surname> <given-names>J. E.</given-names></name> <name><surname>Orvis</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2008</year>). <article-title>Automated eukaryotic gene structure annotation using EVidenceModeler and the program to assemble spliced alignments.</article-title> <source><italic>Genome Biol.</italic></source> <volume>9</volume>:<issue>R7</issue>. <pub-id pub-id-type="doi">10.1186/gb-2008-9-1-r7</pub-id> <pub-id pub-id-type="pmid">18190707</pub-id></citation></ref>
<ref id="B26"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jana</surname> <given-names>T.</given-names></name> <name><surname>Lam-Tung</surname> <given-names>N.</given-names></name> <name><surname>Arndt</surname> <given-names>V. H.</given-names></name> <name><surname>Quang</surname> <given-names>M. B.</given-names></name></person-group> (<year>2016</year>). <article-title>W-IQ-TREE: a fast online phylogenetic tool for maximum likelihood analysis.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>44</volume> <fpage>W232</fpage>&#x2013;<lpage>W235</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkw256</pub-id> <pub-id pub-id-type="pmid">27084950</pub-id></citation></ref>
<ref id="B27"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Koren</surname> <given-names>S.</given-names></name> <name><surname>Walenz</surname> <given-names>B. P.</given-names></name> <name><surname>Berlin</surname> <given-names>K.</given-names></name> <name><surname>Miller</surname> <given-names>J. R.</given-names></name> <name><surname>Bergman</surname> <given-names>N. H.</given-names></name> <name><surname>Phillippy</surname> <given-names>A. M.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Canu: scalable and accurate long-read assembly via adaptive <italic>k</italic>-mer weighting and repeat separation.</article-title> <source><italic>Genome Res.</italic></source> <volume>27</volume> <fpage>722</fpage>&#x2013;<lpage>736</lpage>. <pub-id pub-id-type="doi">10.1101/gr.215087.116</pub-id> <pub-id pub-id-type="pmid">28298431</pub-id></citation></ref>
<ref id="B28"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Korf</surname> <given-names>I.</given-names></name></person-group> (<year>2004</year>). <article-title>Gene finding in novel genomes.</article-title> <source><italic>BMC Bioinformatics</italic></source> <volume>5</volume>:<issue>59</issue>. <pub-id pub-id-type="doi">10.1186/1471-2105-5-59</pub-id> <pub-id pub-id-type="pmid">15144565</pub-id></citation></ref>
<ref id="B29"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kreplak</surname> <given-names>J.</given-names></name> <name><surname>Madoui</surname> <given-names>M.</given-names></name> <name><surname>C&#x00E1;pal</surname> <given-names>P.</given-names></name> <name><surname>Nov&#x00E1;k</surname> <given-names>P.</given-names></name> <name><surname>Burstin</surname> <given-names>J.</given-names></name></person-group> (<year>2019</year>). <article-title>A reference genome for pea provides insight into legume genome evolution.</article-title> <source><italic>Nat. Genet.</italic></source> <volume>51</volume> <fpage>1411</fpage>&#x2013;<lpage>1422</lpage>. <pub-id pub-id-type="doi">10.1038/s41588-019-0480-1</pub-id> <pub-id pub-id-type="pmid">31477930</pub-id></citation></ref>
<ref id="B30"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lange</surname> <given-names>B. M.</given-names></name> <name><surname>Ketchum</surname> <given-names>R. E.</given-names></name> <name><surname>Croteau</surname> <given-names>R. B.</given-names></name></person-group> (<year>2001</year>). <article-title>Isoprenoid biosynthesis. Metabolite profiling of peppermint oil gland secretory cells and application to herbicide target analysis.</article-title> <source><italic>Plant Physiol.</italic></source> <volume>127</volume> <fpage>305</fpage>&#x2013;<lpage>314</lpage>. <pub-id pub-id-type="doi">10.1104/pp.127.1.305</pub-id> <pub-id pub-id-type="pmid">11553758</pub-id></citation></ref>
<ref id="B31"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Durbin</surname> <given-names>R.</given-names></name></person-group> (<year>2010</year>). <article-title>Fast and accurate long-read alignment with burrows-wheeler transform.</article-title> <source><italic>Bioinformatics</italic></source> <volume>26</volume> <fpage>589</fpage>&#x2013;<lpage>595</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp698</pub-id> <pub-id pub-id-type="pmid">20080505</pub-id></citation></ref>
<ref id="B32"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Handsaker</surname> <given-names>B.</given-names></name> <name><surname>Wysoker</surname> <given-names>A.</given-names></name> <name><surname>Fennell</surname> <given-names>T.</given-names></name> <name><surname>Ruan</surname> <given-names>J.</given-names></name> <name><surname>Homer</surname> <given-names>N.</given-names></name><etal/></person-group> (<year>2009</year>). <article-title>The sequence alignment/map format and SAMtools.</article-title> <source><italic>Bioinformatics</italic></source> <volume>25</volume> <fpage>2078</fpage>&#x2013;<lpage>2079</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp352</pub-id> <pub-id pub-id-type="pmid">19505943</pub-id></citation></ref>
<ref id="B33"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>L.</given-names></name> <name><surname>Stoeckert</surname> <given-names>C. J.</given-names></name> <name><surname>Roos</surname> <given-names>D. S.</given-names></name></person-group> (<year>2003</year>). <article-title>OrthoMCL: identification of ortholog groups for eukaryotic genomes.</article-title> <source><italic>Genome Res.</italic></source> <volume>13</volume> <fpage>2178</fpage>&#x2013;<lpage>2189</lpage>. <pub-id pub-id-type="doi">10.1101/gr.1224503</pub-id> <pub-id pub-id-type="pmid">12952885</pub-id></citation></ref>
<ref id="B34"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lim</surname> <given-names>J. C.</given-names></name> <name><surname>Chan</surname> <given-names>T. K.</given-names></name> <name><surname>Ng</surname> <given-names>D. S.</given-names></name> <name><surname>Sagineedu</surname> <given-names>S. R.</given-names></name> <name><surname>Stanslas</surname> <given-names>J.</given-names></name> <name><surname>Fred Wong</surname> <given-names>W. S.</given-names></name><etal/></person-group> (<year>2012</year>). <article-title>Andrographolide and its analogues: versatile bioactive molecules for combating inflammation and cancer.</article-title> <source><italic>Clin. Exp. Pharmacol. Physiol.</italic></source> <volume>39</volume> <fpage>300</fpage>&#x2013;<lpage>310</lpage>. <pub-id pub-id-type="doi">10.1111/j.1440-1681.2011.05633.x</pub-id> <pub-id pub-id-type="pmid">22017767</pub-id></citation></ref>
<ref id="B35"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lin</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>J.</given-names></name> <name><surname>Shen</surname> <given-names>H.</given-names></name> <name><surname>Zhang</surname> <given-names>L.</given-names></name> <name><surname>Papasian</surname> <given-names>C. J.</given-names></name> <name><surname>Deng</surname> <given-names>H. W.</given-names></name><etal/></person-group> (<year>2011</year>). <article-title>Comparative studies of de novo assembly tools for next-generation sequencing technologies.</article-title> <source><italic>Bioinformatics</italic></source> <volume>27</volume> <fpage>2031</fpage>&#x2013;<lpage>2037</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btr319</pub-id> <pub-id pub-id-type="pmid">21636596</pub-id></citation></ref>
<ref id="B36"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>X.</given-names></name> <name><surname>Luo</surname> <given-names>W.</given-names></name> <name><surname>Lin</surname> <given-names>C.</given-names></name> <name><surname>Zhang</surname> <given-names>L.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name></person-group> (<year>2014</year>). <article-title>Andrographolide inhibits proliferation of human lung cancer cells and the related mechanisms.</article-title> <source><italic>Int. J. Clin. Exp. Med.</italic></source> <volume>7</volume> <fpage>4220</fpage>&#x2013;<lpage>4225</lpage>.</citation></ref>
<ref id="B37"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Marcais</surname> <given-names>G.</given-names></name> <name><surname>Kingsford</surname> <given-names>C.</given-names></name></person-group> (<year>2011</year>). <article-title>A fast, lock-free approach for efficient parallel counting of occurrences of <italic>k</italic>-mers.</article-title> <source><italic>Bioinformatics</italic></source> <volume>27</volume> <fpage>764</fpage>&#x2013;<lpage>770</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btr011</pub-id> <pub-id pub-id-type="pmid">21217122</pub-id></citation></ref>
<ref id="B38"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Michael</surname> <given-names>T. P.</given-names></name> <name><surname>Jupe</surname> <given-names>F.</given-names></name> <name><surname>Bemm</surname> <given-names>F.</given-names></name> <name><surname>Motley</surname> <given-names>S. T.</given-names></name> <name><surname>Sandoval</surname> <given-names>J. P.</given-names></name> <name><surname>Lanz</surname> <given-names>C.</given-names></name><etal/></person-group> (<year>2018</year>). <article-title>High contiguity <italic>Arabidopsis thaliana</italic> genome assembly with a single nanopore flow cell.</article-title> <source><italic>Nat. Commun.</italic></source> <volume>9</volume>:<issue>541</issue>. <pub-id pub-id-type="doi">10.1038/s41467-018-03016-2</pub-id> <pub-id pub-id-type="pmid">29416032</pub-id></citation></ref>
<ref id="B39"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mizutani</surname> <given-names>M.</given-names></name> <name><surname>Ohta</surname> <given-names>D.</given-names></name></person-group> (<year>1998</year>). <article-title>Two isoforms of NADPH:cytochrome P450 reductase in <italic>Arabidopsis thaliana</italic>. Gene structure, heterologous expression in insect cells, and differential regulation.</article-title> <source><italic>Plant Physiol.</italic></source> <volume>116</volume> <fpage>357</fpage>&#x2013;<lpage>367</lpage>. <pub-id pub-id-type="doi">10.1104/pp.116.1.357</pub-id> <pub-id pub-id-type="pmid">9449848</pub-id></citation></ref>
<ref id="B40"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Morant</surname> <given-names>M.</given-names></name> <name><surname>Bak</surname> <given-names>S.</given-names></name> <name><surname>Moller</surname> <given-names>B. L.</given-names></name> <name><surname>Werck-Reichhart</surname> <given-names>D.</given-names></name></person-group> (<year>2003</year>). <article-title>Plant cytochromes P450: tools for pharmacology, plant protection and phytoremediation.</article-title> <source><italic>Curr. Opin. Biotechnol.</italic></source> <volume>14</volume> <fpage>151</fpage>&#x2013;<lpage>162</lpage>. <pub-id pub-id-type="doi">10.1016/s0958-1669(03)00024-7</pub-id></citation></ref>
<ref id="B41"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nakamura</surname> <given-names>T.</given-names></name> <name><surname>Yamada</surname> <given-names>K. D.</given-names></name> <name><surname>Tomii</surname> <given-names>K.</given-names></name> <name><surname>Katoh</surname> <given-names>K.</given-names></name> <name><surname>Hancock</surname> <given-names>J.</given-names></name></person-group> (<year>2018</year>). <article-title>Parallelization of MAFFT for large-scale multiple sequence alignments.</article-title> <source><italic>Bioinformatics</italic></source> <volume>34</volume> <fpage>2490</fpage>&#x2013;<lpage>2492</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty121</pub-id> <pub-id pub-id-type="pmid">29506019</pub-id></citation></ref>
<ref id="B42"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nelson</surname> <given-names>D. R.</given-names></name></person-group> (<year>2009</year>). <article-title>The cytochrome p450 homepage.</article-title> <source><italic>Hum. Genomics</italic></source> <volume>4</volume> <fpage>59</fpage>&#x2013;<lpage>65</lpage>.</citation></ref>
<ref id="B43"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nutzmann</surname> <given-names>H. W.</given-names></name> <name><surname>Osbourn</surname> <given-names>A.</given-names></name></person-group> (<year>2014</year>). <article-title>Gene clustering in plant specialized metabolism.</article-title> <source><italic>Curr. Opin. Biotechnol.</italic></source> <volume>26</volume> <fpage>91</fpage>&#x2013;<lpage>99</lpage>. <pub-id pub-id-type="doi">10.1016/j.copbio.2013.10.009</pub-id> <pub-id pub-id-type="pmid">24679264</pub-id></citation></ref>
<ref id="B44"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ou</surname> <given-names>S.</given-names></name> <name><surname>Su</surname> <given-names>W.</given-names></name> <name><surname>Liao</surname> <given-names>Y.</given-names></name> <name><surname>Chougule</surname> <given-names>K.</given-names></name> <name><surname>Agda</surname> <given-names>J.</given-names></name> <name><surname>Hellinga</surname> <given-names>A. J.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>Benchmarking transposable element annotation methods for creation of a streamlined, comprehensive pipeline.</article-title> <source><italic>Genome Biol.</italic></source> <volume>20</volume>:<issue>275</issue>. <pub-id pub-id-type="doi">10.1186/s13059-019-1905-y</pub-id> <pub-id pub-id-type="pmid">31843001</pub-id></citation></ref>
<ref id="B45"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Price</surname> <given-names>A. L.</given-names></name> <name><surname>Jones</surname> <given-names>N. C.</given-names></name> <name><surname>Pevzner</surname> <given-names>P. A.</given-names></name></person-group> (<year>2005</year>). <article-title>De novo identification of repeat families in large genomes.</article-title> <source><italic>Bioinformatics</italic></source> <volume>21</volume> <issue>(Suppl. 1)</issue>, <fpage>i351</fpage>&#x2013;<lpage>i358</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bti1018</pub-id> <pub-id pub-id-type="pmid">15961478</pub-id></citation></ref>
<ref id="B46"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Reyes-Chin-Wo</surname> <given-names>S.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Yang</surname> <given-names>X.</given-names></name> <name><surname>Kozik</surname> <given-names>A.</given-names></name> <name><surname>Arikit</surname> <given-names>S.</given-names></name> <name><surname>Song</surname> <given-names>C.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Genome assembly with <italic>in vitro</italic> proximity ligation data and whole-genome triplication in lettuce.</article-title> <source><italic>Nat. Commun.</italic></source> <volume>8</volume>:<issue>14953</issue>. <pub-id pub-id-type="doi">10.1038/ncomms14953</pub-id> <pub-id pub-id-type="pmid">28401891</pub-id></citation></ref>
<ref id="B47"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schnable</surname> <given-names>P. S.</given-names></name> <name><surname>Ware</surname> <given-names>D.</given-names></name> <name><surname>Fulton</surname> <given-names>R. S.</given-names></name> <name><surname>Stein</surname> <given-names>J. C.</given-names></name> <name><surname>Wei</surname> <given-names>F.</given-names></name> <name><surname>Pasternak</surname> <given-names>S.</given-names></name><etal/></person-group> (<year>2009</year>). <article-title>The B73 maize genome: complexity, diversity, and dynamics.</article-title> <source><italic>Science</italic></source> <volume>326</volume> <fpage>1112</fpage>&#x2013;<lpage>1115</lpage>. <pub-id pub-id-type="doi">10.1126/science.1178534</pub-id> <pub-id pub-id-type="pmid">19965430</pub-id></citation></ref>
<ref id="B48"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schuler</surname> <given-names>M. A.</given-names></name></person-group> (<year>1996</year>). <article-title>The role of cytochrome P450 monooxygenases in plant-insect interactions.</article-title> <source><italic>Plant Physiol.</italic></source> <volume>112</volume> <fpage>1411</fpage>&#x2013;<lpage>1419</lpage>. <pub-id pub-id-type="doi">10.1104/pp.112.4.1411</pub-id> <pub-id pub-id-type="pmid">8972591</pub-id></citation></ref>
<ref id="B49"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shen</surname> <given-names>Q.</given-names></name> <name><surname>Li</surname> <given-names>L.</given-names></name> <name><surname>Jiang</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>Q.</given-names></name></person-group> (<year>2016a</year>). <article-title>Functional characterization of ent-copalyl diphosphate synthase from <italic>Andrographis paniculata</italic> with putative involvement in andrographolides biosynthesis.</article-title> <source><italic>Biotechnol. Lett.</italic></source> <volume>38</volume> <fpage>131</fpage>&#x2013;<lpage>137</lpage>. <pub-id pub-id-type="doi">10.1007/s10529-015-1961-7</pub-id> <pub-id pub-id-type="pmid">26373739</pub-id></citation></ref>
<ref id="B50"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shen</surname> <given-names>Q.</given-names></name> <name><surname>Liu</surname> <given-names>Q.</given-names></name> <name><surname>Congcong</surname> <given-names>L. I.</given-names></name> <name><surname>Yuping</surname> <given-names>F. U.</given-names></name> <name><surname>Wang</surname> <given-names>Q.</given-names></name></person-group> (<year>2016b</year>). <article-title>Functional characterization of ApCPS involved in andrographolides biosynthesis by virus-induced gene silencing.</article-title> <source><italic>Acta Bot. Boreali</italic></source> <volume>36</volume> <fpage>17</fpage>&#x2013;<lpage>22</lpage>.</citation></ref>
<ref id="B51"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shen</surname> <given-names>Q.</given-names></name> <name><surname>Zhang</surname> <given-names>L.</given-names></name> <name><surname>Liao</surname> <given-names>Z.</given-names></name> <name><surname>Wang</surname> <given-names>S.</given-names></name> <name><surname>Yan</surname> <given-names>T.</given-names></name> <name><surname>Shi</surname> <given-names>P.</given-names></name><etal/></person-group> (<year>2018</year>). <article-title>The genome of <italic>Artemisia annua</italic> provides insight into the evolution of asteraceae family and artemisinin biosynthesis.</article-title> <source><italic>Mol. Plant</italic></source> <volume>11</volume> <fpage>776</fpage>&#x2013;<lpage>788</lpage>. <pub-id pub-id-type="doi">10.1016/j.molp.2018.03.015</pub-id> <pub-id pub-id-type="pmid">29703587</pub-id></citation></ref>
<ref id="B52"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Simao</surname> <given-names>F. A.</given-names></name> <name><surname>Waterhouse</surname> <given-names>R. M.</given-names></name> <name><surname>Ioannidis</surname> <given-names>P.</given-names></name> <name><surname>Kriventseva</surname> <given-names>E. V.</given-names></name> <name><surname>Zdobnov</surname> <given-names>E. M.</given-names></name></person-group> (<year>2015</year>). <article-title>BUSCO: assessing genome assembly and annotation completeness with single-copy orthologs.</article-title> <source><italic>Bioinformatics</italic></source> <volume>31</volume> <fpage>3210</fpage>&#x2013;<lpage>3212</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btv351</pub-id> <pub-id pub-id-type="pmid">26059717</pub-id></citation></ref>
<ref id="B53"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Srivastava</surname> <given-names>N.</given-names></name> <name><surname>Akhila</surname> <given-names>A.</given-names></name></person-group> (<year>2010</year>). <article-title>Biosynthesis of andrographolide in <italic>Andrographis paniculata</italic>.</article-title> <source><italic>Phytochemistry</italic></source> <volume>71</volume> <fpage>1298</fpage>&#x2013;<lpage>1304</lpage>. <pub-id pub-id-type="doi">10.1016/j.phytochem.2010.05.022</pub-id> <pub-id pub-id-type="pmid">20557910</pub-id></citation></ref>
<ref id="B54"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Stanke</surname> <given-names>M.</given-names></name> <name><surname>Keller</surname> <given-names>O.</given-names></name> <name><surname>Gunduz</surname> <given-names>I.</given-names></name> <name><surname>Hayes</surname> <given-names>A.</given-names></name> <name><surname>Waack</surname> <given-names>S.</given-names></name> <name><surname>Morgenstern</surname> <given-names>B.</given-names></name><etal/></person-group> (<year>2006</year>). <article-title>AUGUSTUS: <italic>Ab initio</italic> prediction of alternative transcripts.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>34</volume> <fpage>W435</fpage>&#x2013;<lpage>W439</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkl200</pub-id> <pub-id pub-id-type="pmid">16845043</pub-id></citation></ref>
<ref id="B55"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sun</surname> <given-names>W.</given-names></name> <name><surname>Leng</surname> <given-names>L.</given-names></name> <name><surname>Yin</surname> <given-names>Q.</given-names></name> <name><surname>Xu</surname> <given-names>M.</given-names></name> <name><surname>Huang</surname> <given-names>M.</given-names></name> <name><surname>Xu</surname> <given-names>Z.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>The genome of the medicinal plant <italic>Andrographis paniculata</italic> provides insight into the biosynthesis of the bioactive diterpenoid neoandrographolide.</article-title> <source><italic>Plant J.</italic></source> <volume>97</volume> <fpage>841</fpage>&#x2013;<lpage>857</lpage>. <pub-id pub-id-type="doi">10.1111/tpj.14162</pub-id> <pub-id pub-id-type="pmid">30444296</pub-id></citation></ref>
<ref id="B56"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Trapnell</surname> <given-names>C.</given-names></name> <name><surname>Hendrickson</surname> <given-names>D. G.</given-names></name> <name><surname>Sauvageau</surname> <given-names>M.</given-names></name> <name><surname>Goff</surname> <given-names>L.</given-names></name> <name><surname>Rinn</surname> <given-names>J. L.</given-names></name> <name><surname>Pachter</surname> <given-names>L.</given-names></name><etal/></person-group> (<year>2013</year>). <article-title>Differential analysis of gene regulation at transcript resolution with RNA-seq.</article-title> <source><italic>Nat. Biotechnol.</italic></source> <volume>31</volume> <fpage>46</fpage>&#x2013;<lpage>53</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.2450</pub-id> <pub-id pub-id-type="pmid">23222703</pub-id></citation></ref>
<ref id="B57"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Trapnell</surname> <given-names>C.</given-names></name> <name><surname>Pachter</surname> <given-names>L.</given-names></name> <name><surname>Salzberg</surname> <given-names>S. L.</given-names></name></person-group> (<year>2009</year>). <article-title>TopHat: discovering splice junctions with RNA-Seq.</article-title> <source><italic>Bioinformatics</italic></source> <volume>25</volume> <fpage>1105</fpage>&#x2013;<lpage>1111</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp120</pub-id> <pub-id pub-id-type="pmid">19289445</pub-id></citation></ref>
<ref id="B58"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vining</surname> <given-names>K. J.</given-names></name> <name><surname>Johnson</surname> <given-names>S. R.</given-names></name> <name><surname>Ahkami</surname> <given-names>A.</given-names></name> <name><surname>Lange</surname> <given-names>I.</given-names></name> <name><surname>Parrish</surname> <given-names>A. N.</given-names></name> <name><surname>Trapp</surname> <given-names>S. C.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Draft genome sequence of <italic>Mentha longifolia</italic> and development of resources for mint cultivar improvement.</article-title> <source><italic>Mol. Plant</italic></source> <volume>10</volume> <fpage>323</fpage>&#x2013;<lpage>339</lpage>. <pub-id pub-id-type="doi">10.1016/j.molp.2016.10.018</pub-id> <pub-id pub-id-type="pmid">27867107</pub-id></citation></ref>
<ref id="B59"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vurture</surname> <given-names>G. W.</given-names></name> <name><surname>Sedlazeck</surname> <given-names>F. J.</given-names></name> <name><surname>Nattestad</surname> <given-names>M.</given-names></name> <name><surname>Underwood</surname> <given-names>C. J.</given-names></name> <name><surname>Fang</surname> <given-names>H.</given-names></name> <name><surname>Gurtowski</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>GenomeScope: fast reference-free genome profiling from short reads.</article-title> <source><italic>Bioinformatics</italic></source> <volume>33</volume> <fpage>2202</fpage>&#x2013;<lpage>2204</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btx153</pub-id> <pub-id pub-id-type="pmid">28369201</pub-id></citation></ref>
<ref id="B60"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Walker</surname> <given-names>B. J.</given-names></name> <name><surname>Abeel</surname> <given-names>T.</given-names></name> <name><surname>Shea</surname> <given-names>T.</given-names></name> <name><surname>Priest</surname> <given-names>M.</given-names></name> <name><surname>Abouelliel</surname> <given-names>A.</given-names></name> <name><surname>Sakthikumar</surname> <given-names>S.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>Pilon: an integrated tool for comprehensive microbial variant detection and genome assembly improvement.</article-title> <source><italic>PLoS One</italic></source> <volume>9</volume>:<issue>e112963</issue>. <pub-id pub-id-type="doi">10.1371/journal.pone.0112963</pub-id> <pub-id pub-id-type="pmid">25409509</pub-id></citation></ref>
<ref id="B61"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Lin</surname> <given-names>H. X.</given-names></name> <name><surname>Su</surname> <given-names>P.</given-names></name> <name><surname>Chen</surname> <given-names>T.</given-names></name> <name><surname>Guo</surname> <given-names>J.</given-names></name> <name><surname>Gao</surname> <given-names>W.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>Molecular cloning and functional characterization of multiple geranylgeranyl pyrophosphate synthases (ApGGPPS) from <italic>Andrographis paniculata</italic>.</article-title> <source><italic>Plant Cell Rep.</italic></source> <volume>38</volume> <fpage>117</fpage>&#x2013;<lpage>128</lpage>. <pub-id pub-id-type="doi">10.1007/s00299-018-2353-y</pub-id> <pub-id pub-id-type="pmid">30448883</pub-id></citation></ref>
<ref id="B62"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>L.</given-names></name> <name><surname>Yu</surname> <given-names>S.</given-names></name> <name><surname>Tong</surname> <given-names>C.</given-names></name> <name><surname>Zhao</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>Y.</given-names></name> <name><surname>Song</surname> <given-names>C.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>Genome sequencing of the high oil crop sesame provides insight into oil biosynthesis.</article-title> <source><italic>Genome Biol.</italic></source> <volume>15</volume>:<issue>R39</issue>. <pub-id pub-id-type="doi">10.1186/gb-2014-15-2-r39</pub-id> <pub-id pub-id-type="pmid">24576357</pub-id></citation></ref>
<ref id="B63"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Tang</surname> <given-names>H.</given-names></name> <name><surname>Debarry</surname> <given-names>J. D.</given-names></name> <name><surname>Tan</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name><etal/></person-group> (<year>2012</year>). <article-title><italic>MCScanX</italic>: a toolkit for detection and evolutionary analysis of gene synteny and collinearity.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>40</volume>:<issue>e49</issue>. <pub-id pub-id-type="doi">10.1093/nar/gkr1293</pub-id> <pub-id pub-id-type="pmid">22217600</pub-id></citation></ref>
<ref id="B64"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wicker</surname> <given-names>T.</given-names></name> <name><surname>Keller</surname> <given-names>B.</given-names></name></person-group> (<year>2007</year>). <article-title>Genome-wide comparative analysis of Copia retrotransposons in Triticeae, rice, and <italic>Arabidopsis</italic> reveals conserved ancient evolutionary lineages and distinct dynamics of individual <italic>copia</italic> families.</article-title> <source><italic>Genome Res.</italic></source> <volume>17</volume> <fpage>1072</fpage>&#x2013;<lpage>1081</lpage>. <pub-id pub-id-type="doi">10.1101/gr.6214107</pub-id> <pub-id pub-id-type="pmid">17556529</pub-id></citation></ref>
<ref id="B65"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>J.</given-names></name> <name><surname>Chu</surname> <given-names>Y.</given-names></name> <name><surname>Liao</surname> <given-names>B.</given-names></name> <name><surname>Xiao</surname> <given-names>S.</given-names></name> <name><surname>Yin</surname> <given-names>Q.</given-names></name> <name><surname>Bai</surname> <given-names>R.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Panax ginseng genome examination for ginsenoside biosynthesis.</article-title> <source><italic>Gigascience</italic></source> <volume>6</volume> <fpage>1</fpage>&#x2013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1093/gigascience/gix093</pub-id> <pub-id pub-id-type="pmid">29048480</pub-id></citation></ref>
<ref id="B66"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>Z.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name></person-group> (<year>2007</year>). <article-title>LTR_FINDER: an efficient tool for the prediction of full-length LTR retrotransposons.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>35</volume> <fpage>W265</fpage>&#x2013;<lpage>W268</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkm286</pub-id> <pub-id pub-id-type="pmid">17485477</pub-id></citation></ref>
<ref id="B67"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yan</surname> <given-names>L.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Liu</surname> <given-names>H.</given-names></name> <name><surname>Tian</surname> <given-names>Y.</given-names></name> <name><surname>Lian</surname> <given-names>J.</given-names></name> <name><surname>Yang</surname> <given-names>R.</given-names></name><etal/></person-group> (<year>2015</year>). <article-title>The genome of <italic>Dendrobium officinale</italic> illuminates the biology of the important traditional Chinese orchid herb.</article-title> <source><italic>Mol. Plant</italic></source> <volume>8</volume> <fpage>922</fpage>&#x2013;<lpage>934</lpage>. <pub-id pub-id-type="doi">10.1016/j.molp.2014.12.011</pub-id> <pub-id pub-id-type="pmid">25825286</pub-id></citation></ref>
<ref id="B68"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>G.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Liu</surname> <given-names>H.</given-names></name> <name><surname>Chen</surname> <given-names>W.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Hybrid de novo genome assembly of the Chinese herbal fleabane <italic>Erigeron breviscapus</italic>.</article-title> <source><italic>Gigascience</italic></source> <volume>6</volume> <fpage>1</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1093/gigascience/gix028</pub-id> <pub-id pub-id-type="pmid">28431028</pub-id></citation></ref>
<ref id="B69"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>Z.</given-names></name></person-group> (<year>2007</year>). <article-title>PAML 4: phylogenetic analysis by maximum likelihood.</article-title> <source><italic>Mol. Biol. Evol.</italic></source> <volume>24</volume>, <fpage>1586</fpage>&#x2013;<lpage>1591</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msm088</pub-id> <pub-id pub-id-type="pmid">17483113</pub-id></citation></ref>
<ref id="B70"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yazaki</surname> <given-names>K.</given-names></name> <name><surname>Arimura</surname> <given-names>G. I.</given-names></name> <name><surname>Ohnishi</surname> <given-names>T.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x2018;Hidden&#x2019; terpenoids in plants: their biosynthesis, localization and ecological roles.</article-title> <source><italic>Plant Cell Physiol.</italic></source> <volume>58</volume> <fpage>1615</fpage>&#x2013;<lpage>1621</lpage>. <pub-id pub-id-type="doi">10.1093/pcp/pcx123</pub-id> <pub-id pub-id-type="pmid">29016891</pub-id></citation></ref>
<ref id="B71"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zerikly</surname> <given-names>M.</given-names></name> <name><surname>Challis</surname> <given-names>G. L.</given-names></name></person-group> (<year>2009</year>). <article-title>Strategies for the discovery of new natural products by genome mining.</article-title> <source><italic>Chembiochem</italic></source> <volume>10</volume> <fpage>625</fpage>&#x2013;<lpage>633</lpage>. <pub-id pub-id-type="doi">10.1002/cbic.200800389</pub-id> <pub-id pub-id-type="pmid">19165837</pub-id></citation></ref>
<ref id="B72"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>G.</given-names></name> <name><surname>Tian</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Shu</surname> <given-names>L.</given-names></name> <name><surname>Yang</surname> <given-names>S.</given-names></name> <name><surname>Wang</surname> <given-names>W.</given-names></name><etal/></person-group> (<year>2015</year>). <article-title>Hybrid de novo genome assembly of the Chinese herbal plant danshen (<italic>Salvia miltiorrhiza</italic> Bunge).</article-title> <source><italic>Gigascience</italic></source> <volume>4</volume>:<issue>62</issue>. <pub-id pub-id-type="doi">10.1186/s13742-015-0104-3</pub-id> <pub-id pub-id-type="pmid">26673920</pub-id></citation></ref>
<ref id="B73"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Tian</surname> <given-names>Y.</given-names></name> <name><surname>Yan</surname> <given-names>L.</given-names></name> <name><surname>Zhang</surname> <given-names>G.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Zeng</surname> <given-names>Y.</given-names></name><etal/></person-group> (<year>2016</year>). <article-title>Genome of plant maca (<italic>Lepidium meyenii</italic>) illuminates genomic basis for High-Altitude adaptation in the central andes.</article-title> <source><italic>Mol. Plant</italic></source> <volume>9</volume> <fpage>1066</fpage>&#x2013;<lpage>1077</lpage>. <pub-id pub-id-type="doi">10.1016/j.molp.2016.04.016</pub-id> <pub-id pub-id-type="pmid">27174404</pub-id></citation></ref>
<ref id="B74"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>R.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Ou</surname> <given-names>S.</given-names></name> <name><surname>Li</surname> <given-names>G.</given-names></name></person-group> (<year>2019</year>). <article-title>TEsorter: lineage-level classification of transposable elements using conserved protein domains.</article-title> <source><italic>bioRxiv [Preprint]</italic></source> <pub-id pub-id-type="doi">10.1101/800177</pub-id></citation></ref>
<ref id="B75"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>Q.</given-names></name> <name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Cui</surname> <given-names>M. Y.</given-names></name> <name><surname>Liu</surname> <given-names>J.</given-names></name> <name><surname>Fang</surname> <given-names>Y.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>The reference genome sequence of <italic>Scutellaria baicalensis</italic> provides insights into the evolution of wogonin biosynthesis.</article-title> <source><italic>Mol. Plant</italic></source> <volume>12</volume> <fpage>935</fpage>&#x2013;<lpage>950</lpage>. <pub-id pub-id-type="doi">10.1016/j.molp.2019.04.002</pub-id> <pub-id pub-id-type="pmid">30999079</pub-id></citation></ref>
<ref id="B76"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zi</surname> <given-names>J.</given-names></name> <name><surname>Matsuba</surname> <given-names>Y.</given-names></name> <name><surname>Hong</surname> <given-names>Y. J.</given-names></name> <name><surname>Jackson</surname> <given-names>A. J.</given-names></name> <name><surname>Tantillo</surname> <given-names>D. J.</given-names></name> <name><surname>Pichersky</surname> <given-names>E.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>Biosynthesis of lycosantalonol, a cis-prenyl derived diterpenoid.</article-title> <source><italic>J. Am. Chem. Soc.</italic></source> <volume>136</volume> <fpage>16951</fpage>&#x2013;<lpage>16953</lpage>. <pub-id pub-id-type="doi">10.1021/ja508477e</pub-id> <pub-id pub-id-type="pmid">25406026</pub-id></citation></ref>
</ref-list><glossary>
<title>Abbreviations</title>
<def-list id="DL1">
<def-item><term> BUSCO</term><def><p>Benchmarking Universal Single-Copy Orthologs</p></def></def-item>
<def-item><term>CPS</term><def><p>copalyl diphosphate synthase</p></def></def-item>
<def-item><term>CYP</term><def><p>cytochrome P450</p></def></def-item>
<def-item><term>DMAPP</term><def><p>dimethylallyl diphosphate</p></def></def-item>
<def-item><term>GGPPS</term><def><p>geranylgeranyl pyrophosphate synthase</p></def></def-item>
<def-item><term>GO</term><def><p>gene ontology</p></def></def-item>
<def-item><term>IPP</term><def><p>isopentenyl diphosphate</p></def></def-item>
<def-item><term>KSL</term><def><p>kaurene synthase-like protein</p></def></def-item>
<def-item><term>TE</term><def><p>transposable element.</p></def></def-item>
</def-list>
</glossary>
<fn-group>
<fn id="footnote1">
<label>1</label>
<p><ext-link ext-link-type="uri" xlink:href="http://www.girinst.org/repbase/index.html">http://www.girinst.org/repbase/index.html</ext-link></p></fn>
<fn id="footnote2">
<label>2</label>
<p><ext-link ext-link-type="uri" xlink:href="http://www.repeatmasker.org/">http://www.repeatmasker.org/</ext-link></p></fn>
<fn id="footnote3">
<label>3</label>
<p><ext-link ext-link-type="uri" xlink:href="https://github.com/xzhub/LTR_Finder">https://github.com/xzhub/LTR_Finder</ext-link></p></fn>
<fn id="footnote4">
<label>4</label>
<p><ext-link ext-link-type="uri" xlink:href="http://www.drive5.com/piler/">http://www.drive5.com/piler/</ext-link></p></fn>
<fn id="footnote5">
<label>5</label>
<p><ext-link ext-link-type="uri" xlink:href="http://www.repeatmasker.org/">http://www.repeatmasker.org/</ext-link></p></fn>
<fn id="footnote6">
<label>6</label>
<p><ext-link ext-link-type="uri" xlink:href="http://www.repeatmasker.org/RepeatModeler.html">http://www.repeatmasker.org/RepeatModeler.html</ext-link></p></fn>
<fn id="footnote7">
<label>7</label>
<p><ext-link ext-link-type="uri" xlink:href="http://repeatexplorer.org/">http://repeatexplorer.org/</ext-link></p></fn>
<fn id="footnote8">
<label>8</label>
<p><ext-link ext-link-type="uri" xlink:href="https://itol.embl.de/">https://itol.embl.de/</ext-link></p></fn>
<fn id="footnote9">
<label>9</label>
<p><ext-link ext-link-type="uri" xlink:href="http://www.uniprot.org/">http://www.uniprot.org/</ext-link></p></fn>
<fn id="footnote10">
<label>10</label>
<p><ext-link ext-link-type="uri" xlink:href="http://www.genome.jp/kegg/">http://www.genome.jp/kegg/</ext-link></p></fn>
<fn id="footnote11">
<label>11</label>
<p><ext-link ext-link-type="uri" xlink:href="https://www.ebi.ac.uk/interpro/">https://www.ebi.ac.uk/interpro/</ext-link></p></fn>
<fn id="footnote12">
<label>12</label>
<p><ext-link ext-link-type="uri" xlink:href="http://bioinfogp.cnb.csic.es/tools/venny/index.html">http://bioinfogp.cnb.csic.es/tools/venny/index.html</ext-link></p></fn>
<fn id="footnote13">
<label>13</label>
<p><ext-link ext-link-type="uri" xlink:href="http://www.drive5.com/muscle">http://www.drive5.com/muscle</ext-link></p></fn>
<fn id="footnote14">
<label>14</label>
<p><ext-link ext-link-type="uri" xlink:href="http://drnelson.uthsc.edu/CytochromeP450.html">http://drnelson.uthsc.edu/CytochromeP450.html</ext-link></p></fn>
</fn-group>
</back>
</article>
