<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.3 20070202//EN" "archivearticle.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="data-paper">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Ecol. Evol.</journal-id>
<journal-title>Frontiers in Ecology and Evolution</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Ecol. Evol.</abbrev-journal-title>
<issn pub-type="epub">2296-701X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fevo.2020.550936</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Ecology and Evolution</subject>
<subj-group>
<subject>Data Report</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title><italic>De novo</italic> Genome Assembly, Annotation, and SNP Identification of an Endangered Rockcress, <italic>Boechera fecunda</italic></article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Zhang</surname> <given-names>Hengyou</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/970562/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Mitchell-Olds</surname> <given-names>Thomas</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1085493/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Mujacic</surname> <given-names>Ibro</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Song</surname> <given-names>Bao-Hua</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/356962/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Biological Sciences, University of North Carolina at Charlotte</institution>, <addr-line>Charlotte, NC</addr-line>, <country>United States</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Biology, Center for Genomic and Computational Biology, Duke University</institution>, <addr-line>Durham, NC</addr-line>, <country>United States</country></aff>
<aff id="aff3"><sup>3</sup><institution>Division of Genomic and Molecular Pathology, University of Chicago</institution>, <addr-line>Chicago, IL</addr-line>, <country>United States</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Dapeng Wang, University of Leeds, United Kingdom</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Diego San Mauro, Complutense University of Madrid, Spain; Jesus Lozano-Fernandez, Instituto de Biolog&#x000ED;a Evolutiva (IBE), Spain</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Bao-Hua Song <email>bsong5&#x00040;uncc.edu</email>; <ext-link ext-link-type="uri" xlink:href="https://orcid.org/0000-0003-3537-7783">orcid.org/0000-0003-3537-7783</ext-link></corresp>
<fn fn-type="other" id="fn001"><p>This article was submitted to Phylogenetics, Phylogenomics, and Systematics, a section of the journal Frontiers in Ecology and Evolution</p></fn>
<fn fn-type="present-address" id="fn002"><p>&#x02020;Present address: Hengyou Zhang, Donald Danforth Plant Science Center, Saint Louis, MO, United States</p></fn></author-notes>
<pub-date pub-type="epub">
<day>23</day>
<month>10</month>
<year>2020</year>
</pub-date>
<pub-date pub-type="collection">
<year>2020</year>
</pub-date>
<volume>8</volume>
<elocation-id>550936</elocation-id>
<history>
<date date-type="received">
<day>06</day>
<month>05</month>
<year>2020</year>
</date>
<date date-type="accepted">
<day>11</day>
<month>09</month>
<year>2020</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2020 Zhang, Mitchell-Olds, Mujacic and Song.</copyright-statement>
<copyright-year>2020</copyright-year>
<copyright-holder>Zhang, Mitchell-Olds, Mujacic and Song</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<kwd-group>
<kwd>conservation</kwd>
<kwd>ecological adaptation</kwd>
<kwd>evolution</kwd>
<kwd>genome sequence</kwd>
<kwd>polymorphism</kwd>
<kwd>SNP</kwd>
</kwd-group>
<contract-num rid="cn002">2019-BIG-6507</contract-num>
<contract-sponsor id="cn001">National Institutes of Health<named-content content-type="fundref-id">10.13039/100000002</named-content></contract-sponsor>
<contract-sponsor id="cn002">North Carolina Biotechnology Center<named-content content-type="fundref-id">10.13039/100005562</named-content></contract-sponsor>
<contract-sponsor id="cn003">University of North Carolina at Charlotte<named-content content-type="fundref-id">10.13039/100010942</named-content></contract-sponsor>
<counts>
<fig-count count="1"/>
<table-count count="1"/>
<equation-count count="0"/>
<ref-count count="33"/>
<page-count count="6"/>
<word-count count="4539"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>Introduction</title>
<p>Climate change and many adverse consequences caused by human disturbance have led to the potential and continuing extinction of many plant species (Tilman and Lehman, <xref ref-type="bibr" rid="B29">2001</xref>). These adverse effects are magnified when they are imposed on rare or endangered plants, especially those with small population size and restricted gene flow (Ellstrand and Elam, <xref ref-type="bibr" rid="B4">1993</xref>; Ouborg and Vriezen, <xref ref-type="bibr" rid="B23">2007</xref>). In this regard, there have been increasing emphasis on dissecting the adaptive diversity and prioritizing the conservation of rare, threatened, and endangered plants (Zhang et al., <xref ref-type="bibr" rid="B33">2017</xref>). Some small plant populations have shown adaptation to local or distinct environment conditions (McKay et al., <xref ref-type="bibr" rid="B20">2001</xref>; Song and Mitchell-Olds, <xref ref-type="bibr" rid="B27">2007</xref>), which has been reflected by strong correlations between phenotypic and/or physiological traits and relevant environmental variables (McKay et al., <xref ref-type="bibr" rid="B20">2001</xref>; Blanquart et al., <xref ref-type="bibr" rid="B1">2013</xref>), and adaptive genetic variation has demonstrated a role in contributing to the evolution of plant species (Gehan et al., <xref ref-type="bibr" rid="B7">2015</xref>; Yeaman, <xref ref-type="bibr" rid="B32">2015</xref>). Despite the importance of this issue, the molecular mechanisms by which genetic variation causes local adaptation remains largely unknown. Thus, assembly and annotation of the genome sequences of these rare plant species represent a critical first step to understand plant adaptation mechanisms.</p>
<p><italic>Boechera fecunda</italic> is rare and predominantly inbreeding. It is restricted to areas of calc-silicate soil outcrops in western Montana, United States (<xref ref-type="fig" rid="F1">Figure 1A</xref>) (Song and Mitchell-Olds, <xref ref-type="bibr" rid="B27">2007</xref>; Leamy et al., <xref ref-type="bibr" rid="B14">2014</xref>). A total of 21 <italic>B. fecunda</italic> populations were found to be endemic to two geographic regions (WEST and EAST) in southwestern Montana, separated by a distance of &#x0007E;100 km with different climatic variables and soil water availability (<xref ref-type="fig" rid="F1">Figure 1A</xref>). Briefly, the mean elevation of the WEST region (1525 m) is much lower than that of the EAST region (2195 m), while WEST region (46&#x000B0; 21&#x02032;) is located at higher longitude than EAST region (45&#x000B0; 42&#x02032;). McKay et al. (<xref ref-type="bibr" rid="B20">2001</xref>) found that <italic>B. fecunda</italic> populations growing in the low-elevation region (WEST) experienced higher drought stress than those in high-elevation habitats (EAST), and plants growing in these two highly divergent regions, respectively, are adapted to their local environments (Song and Mitchell-Olds, <xref ref-type="bibr" rid="B27">2007</xref>). This local adaptation was evidenced by the observations of higher mean water use efficiency (WUE), larger leaf area, and greater rosette diameter for populations from the drier, low-elevation habitat compared with those from the EAST region (McKay et al., <xref ref-type="bibr" rid="B20">2001</xref>). Bottleneck analysis suggests that <italic>B. fecunda</italic> populations grown in these two distinct environments have experienced very different evolutionary histories (Song and Mitchell-Olds, <xref ref-type="bibr" rid="B27">2007</xref>; Leamy et al., <xref ref-type="bibr" rid="B14">2014</xref>). These observations and results suggest that <italic>B. fecunda</italic>, a wild relative of Arabidopsis, is an exemplary system that can be used to address a variety of ecological, evolutionary, and conservation question.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p><bold>(A)</bold> Geographic location of two ecotypes of <italic>B. fecunda</italic>. The left panel shows locations of the populations in Montana, United States. The zoomed-in region is illustrated in the right panel. Red solid dots represent all 29 accessions of <italic>B. fecunda</italic> described in McKay et al. (<xref ref-type="bibr" rid="B20">2001</xref>) and Song and Mitchell-Olds (<xref ref-type="bibr" rid="B27">2007</xref>), of which QH and SP were highlighted with cyan and yellow hollow triangles, respectively. This figure was prepared with JMP pro 13.0.0 (SAS Institute Inc., Cary, NC, United States). The imbedded photo shows a Quarter-size <italic>B. fecunda</italic> plant at the stage of flowering in the wild. (<bold>B)</bold> Distribution of 17-mer frequency for QH and SP sequencing reads. <bold>(C)</bold> A list of species to which most sequence was aligned. <bold>(D)</bold>. Phylogenetic tree across 14 plant species including the SP and QH ecotypes of <italic>B. fecunda</italic>. The numbers above the branches indicate the bootstrap value.</p></caption>
<graphic xlink:href="fevo-08-550936-g0001.tif"/>
</fig>
<p>Given the importance of the two populations (EAST and WEST) with morphological and physiological traits adapted to two distinct habitats (McKay and Latta, <xref ref-type="bibr" rid="B21">2002</xref>) but lack of the genomic information, we present the <italic>de novo</italic> assemblies and annotation of the two genomes, the QH and SP ecotypes, locally adapted to the EAST and WEST regions. We also identified polymorphisms in these two highly divergent lineages of <italic>B. fecunda</italic>. Subsequently we conducted genome annotation for each of the two genomes and identified the DNA variants (SNPs and Indels) between them. The genome assemblies and the annotation data will be valuable resources for further dissecting the genetic basis of the ecologically adaptive traits and the evolutionary mechanisms underlying local adaptation in plants.</p>
</sec>
<sec sec-type="methods" id="s2">
<title>Methods</title>
<sec>
<title>Sample Collection and Sequencing</title>
<p>Seeds of two <italic>B. fecunda</italic> (NCBI: txid93887) ecotypes (SP and QH) were collected from their natural habitats located at Spooner (46&#x000B0; 25&#x02032; N, 114&#x000B0; 01&#x02032; W, 1,326 meter elevation, WEST) and Quartz Hill (45&#x000B0; 42&#x02032; N, 112&#x000B0; 54&#x02032; W, 2,438 meter elevation, EAST), respectively, in southwestern Montana, United States (<xref ref-type="fig" rid="F1">Figure 1A</xref>). (According to the Montana Natural Heritage Program (MNHP), there are no legal protections or limitations on research for <italic>B. fecunda</italic> [MNHP, personal communication]). Seeds of each genotype were placed on moist filter paper in a petri dish and cultured in the dark in an environmental chamber (Percival Scientific, United States) at 27&#x000B0;C. Germinated plants were grown in a chamber with 27&#x000B0;C and 12 h/12 h L/D at the University of North Carolina at Charlotte. Leaf tissues were collected, flash frozen with liquid nitrogen, and then stored at &#x02212;80&#x000B0;C until use.</p>
<p>Genomic DNA was extracted with DNeasy Plant Mini Kit (Qiagen, MD, United States) according to the manufacturer-provided Quick Start Protocol. Genomic DNA quality was determined by running the samples on an agarose gel to show the integrity of the genomic DNA. The DNA concentration was quantified using a PicoGreen assay following the manufacturer&#x00027;s protocol. The two paired-end (PE) libraries containing 100 base pair fragments were constructed following the Illumina TruSeq genomic DNA library prep protocol (Illumina Inc, San Diego, CA, United States) using genomic DNA samples. Each library was indexed and quantified using real-time PCR. The two PE libraries were pooled on an equimolar basis, and run on a single lane. Clusters were then generated to load onto a PE read flowcell and sequenced on an Illumina HiSeq 2000 platform at David H. Murdock Research Institute (Kannapolis, NC, United States). The Illumina sequencing produced 47.5 Gb of raw data.</p>
</sec>
<sec>
<title>Quality Control and Error Correction</title>
<p>The quality of the raw sequence data was assessed using FastQC software, version 0.11.5 (<ext-link ext-link-type="uri" xlink:href="https://www.bioinformatics.babraham.ac.uk/projects/fastqc/">https://www.bioinformatics.babraham.ac.uk/projects/fastqc/</ext-link>). The graphical reports generated by FastQC were reviewed, and sequence trimming was subsequently performed as needed using Trimmomatic version 0.36 (Bolger et al., <xref ref-type="bibr" rid="B2">2014</xref>) following the manual. The following steps were performed for quality control: (1) the sequencing center-provided adaptor sequences and overrepresented sequences were removed; (2) the leading and trailing low-quality or N bases below 2 were removed by setting up the LEADING and TRAILING options; (3) all reads were scanned (SLIDINGWINDOW:4:15); (4) read lengths below 36 after the above steps were dropped (MINLEN:36). The trimmed reads were reassessed using FastQC to verify improved data quality. Prior to conducting <italic>de novo</italic> genome assembly, we performed error correction for the QC-passing reads using the program BayesHammer (Nikolenko et al., <xref ref-type="bibr" rid="B22">2013</xref>). The error-corrected reads for each species (QH: 92,280,436 reads; SP: 96,802,022 reads) were used for further study.</p>
</sec>
<sec>
<title>Genome Size and Heterozygosity Estimation</title>
<p>The genome size of the two <italic>B. fecunda</italic> genotypes was estimated by K-mer frequency based Jellyfish (Marcais and Kingsford, <xref ref-type="bibr" rid="B19">2011</xref>) and GenomeScope (Vurture et al., <xref ref-type="bibr" rid="B30">2017</xref>). Clean and error-corrected reads from two libraries were used to determine the distribution of 17-mer (Xiao et al., <xref ref-type="bibr" rid="B31">2017</xref>; Gao et al., <xref ref-type="bibr" rid="B6">2018</xref>) and a frequency graphs (<xref ref-type="fig" rid="F1">Figure 1B</xref>) were generated. The results showed the highest peaks occurring at depths of 24 and 25 for QH and SP, respectively, further revealing approximately 25 and 26-fold coverage of QH and SP genomes, respectively. According to 17-mer analysis, the genome size of both genomes is estimated to be 188.8 Mb and 185.0 Mb for QH and SP, respectively (<xref ref-type="table" rid="T1">Table 1</xref>), which is comparable to the recent release pseudomolecule genome size (183.3 Mb) of <italic>Boechera stricta</italic> (Lee et al., <xref ref-type="bibr" rid="B15">2017</xref>), a closely related congener of <italic>B. fecunda</italic>. GenomeScope was used to analyse the heterozygosity (Vurture et al., <xref ref-type="bibr" rid="B30">2017</xref>), and it showed that both genotypes contains consistent unique (single-copy) regions of the genome assemblies (63.8&#x02013;62.7%), with fairly low heterozygosity levels (0.0155&#x02013;0.0101%), and error rates (0.0973&#x02013;0.0582%). These results indicate that our sequencing data contains extensive data that is appropriate for genome assembly and the follow-up characterization.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Summary of sequencing reads and statistics for genome assembly and SNPs.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Genome assembly</bold></th>
<th valign="top" align="left"><bold>QH</bold></th>
<th valign="top" align="left"><bold>SP</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Estimated genome size</td>
<td valign="top" align="left">188.8 Mb</td>
<td valign="top" align="left">185.0 Mb</td>
</tr>
<tr>
<td valign="top" align="left">Unique sequence</td>
<td valign="top" align="left">63.8%</td>
<td valign="top" align="left">62.7%</td>
</tr>
<tr>
<td valign="top" align="left">Heterozygosity</td>
<td valign="top" align="left">0.0155%</td>
<td valign="top" align="left">0.101%</td>
</tr>
<tr>
<td valign="top" align="left">Repeat sequence</td>
<td valign="top" align="left">0.736%</td>
<td valign="top" align="left">0.598%</td>
</tr>
<tr>
<td valign="top" align="left" colspan="3" style="background-color:#bbbdc0"><bold>CONTIGS</bold></td>
</tr>
<tr>
<td valign="top" align="left">Total number</td>
<td valign="top" align="left">58,947</td>
<td valign="top" align="left">56,883</td>
</tr>
<tr>
<td valign="top" align="left">Largest contig</td>
<td valign="top" align="left">99,796</td>
<td valign="top" align="left">43,811</td>
</tr>
<tr>
<td valign="top" align="left">Number of contigs (&#x02265; 10,000 bp)</td>
<td valign="top" align="left">2,280</td>
<td valign="top" align="left">2,250</td>
</tr>
<tr>
<td valign="top" align="left">N50</td>
<td valign="top" align="left">4,770</td>
<td valign="top" align="left">4,826</td>
</tr>
<tr>
<td valign="top" align="left">GC (%)</td>
<td valign="top" align="left">36.08</td>
<td valign="top" align="left">35.46</td>
</tr>
<tr>
<td valign="top" align="left" colspan="3" style="background-color:#bbbdc0"><bold>SCAFFOLDS</bold></td>
</tr>
<tr>
<td valign="top" align="left">Total number</td>
<td valign="top" align="left">21,170</td>
<td valign="top" align="left">22,972</td>
</tr>
<tr>
<td valign="top" align="left">Largest scaffold</td>
<td valign="top" align="left">390,373</td>
<td valign="top" align="left">287,976</td>
</tr>
<tr>
<td valign="top" align="left">Total length</td>
<td valign="top" align="left">181,072,464</td>
<td valign="top" align="left">174,312,001</td>
</tr>
<tr>
<td valign="top" align="left">Total length (&#x02265;500 bp)</td>
<td valign="top" align="left">181,060,464</td>
<td valign="top" align="left">174,290,501</td>
</tr>
<tr>
<td valign="top" align="left">Number of scaffolds (&#x02265; 10,000 bp)</td>
<td valign="top" align="left">4,759</td>
<td valign="top" align="left">4,579</td>
</tr>
<tr>
<td valign="top" align="left">Number of scaffolds (&#x02265; 25,000 bp)</td>
<td valign="top" align="left">1,940</td>
<td valign="top" align="left">1,832</td>
</tr>
<tr>
<td valign="top" align="left">Total length (&#x02265; 50,000 bp)</td>
<td valign="top" align="left">51,184,385</td>
<td valign="top" align="left">46,294,548</td>
</tr>
<tr>
<td valign="top" align="left">N50</td>
<td valign="top" align="left">27,661</td>
<td valign="top" align="left">25,905</td>
</tr>
<tr>
<td valign="top" align="left">GC (%)</td>
<td valign="top" align="left">36.28</td>
<td valign="top" align="left">35.68</td>
</tr>
<tr>
<td valign="top" align="left" colspan="3" style="background-color:#bbbdc0"><bold>COMPLETENESS EVALUATION</bold></td>
</tr>
<tr>
<td valign="top" align="left">BUSCO evaluation</td>
<td valign="top" align="left">C:96.5% [S:94.3%, D:2.2%],<break/> F:1.3%, M:2.2%, n:1,440</td>
<td valign="top" align="left">C:96.1% [S:94.4%, D:1.7%],<break/> F:1.9%, M:2.0%, n:1,440</td>
</tr>
<tr>
<td valign="top" align="left">Reads map rates to the assembly</td>
<td valign="top" align="left">99.80%</td>
<td valign="top" align="left">99.83%</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><italic>BUSCO, Benchmarking Universal Single-Copy Orthologs; C, Complete; S, Single-copy; D, Duplicated; F, Fragmented; M, Missing, n: gene number</italic>.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec>
<title>Assembly and Completeness Assessment</title>
<p>The genome assembly was done using the program SOAPdenovo-127mer in SOAPdenovo2 v2.04.4 (Luo et al., <xref ref-type="bibr" rid="B18">2012</xref>) with the following parameters: (1) the average insert size of the library was 290 (avg_ins = 290); (2) the PE libraries were forward-reverse sequenced (revers_seq = 0); (3) both contig and scaffold assembly were performed (asm_flags = 3); (4) all reads were used for assembly (rd_len_cutoff = 100); (5) the order the reads were used while scaffolding (rank = 1); and (6) the minimum length aligned to contigs for a reliable read location was 32 (map_len = 32). SOAPdenovo2 was run using the recommended options with the additional use of the options&#x02014;R (resolve repeats by reads) and&#x02014;F (fill gaps in scaffold). To obtain an optimized <italic>k</italic>-mer for accurate assembly, different <italic>k</italic> values ranging from 33- to 83-mers with an increase of <italic>k</italic> = 2 were used. After assessing different <italic>k</italic>-mer sizes using the quality assessment tool QUAST (Gurevich et al., <xref ref-type="bibr" rid="B9">2013</xref>), we found that 63-mers and 59-mers provided the best tradeoff for QH and SP, respectively. With the selected <italic>k</italic>-mers, the error-corrected reads were assembled into 21,170 scaffolds of 181 Mb total (N50 length = 27,661 bp) for QH and 22,972 scaffolds of 174 Mb total (N50 length = 25,905) for SP. The size of both assemblies is slightly smaller than the newly released assembly (183.3 Mb) of <italic>B. stricta</italic> (Lee et al., <xref ref-type="bibr" rid="B15">2017</xref>). Statistics of sequencing depth, assembly, the percentage of reads mapping to assembly, largest and average scaffold, GC percentage, and coverage of assembled scaffolds of the two <italic>B. stricta</italic> genomes studied are presented in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<p>To further evaluate the assembly quality, the error-corrected reads were aligned back to the respective assembled scaffolds using the algorithm BWA-MEM of the aligner BWA (Li and Durbin, <xref ref-type="bibr" rid="B16">2009</xref>), and the percentage of reads aligned was calculated using SAMtools (Li et al., <xref ref-type="bibr" rid="B17">2009</xref>). Over 99% of the corrected reads were aligned to the assembly. On the other hand, the program Benchmarking Universal Single-Copy Orthologs (BUSCOs) (Simao et al., <xref ref-type="bibr" rid="B25">2015</xref>) [embryophyta_odb9, blast&#x0002B; v.2.3.0 (Camacho et al., <xref ref-type="bibr" rid="B3">2009</xref>), Augustus v.3.3.2 (Stanke et al., <xref ref-type="bibr" rid="B28">2008</xref>), and Hmmer v.3.2.1 (Johnson et al., <xref ref-type="bibr" rid="B12">2010</xref>)] was used to evaluate the completeness of the assemblies by comparing their predicted gene content with conserved single-copy orthologs in <italic>A. thaliana</italic>. More than 96% of core <italic>A. thaliana</italic> were successfully identified in both assemblies. The percentage of genes (n) for complete (C), complete and single-copy (S), complete and duplicated (D), fragmented (F), and missing (M) BUSCOs are shown in <xref ref-type="table" rid="T1">Table 1</xref>. The BUSCO result indicates a high completeness of the <italic>B. fecunda</italic> genome assemblies.</p>
</sec>
<sec>
<title>Polymorphism Estimate and Repetitive Element Annotation</title>
<p>The polymorphism estimate between the two divergent ecotypes of <italic>B. fecunda</italic> (QH vs. SP), as well as between the two congener species (<italic>B. fecunda</italic> vs. <italic>B. stricta</italic>) was shown in <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 1</xref>. The resultant genome assemblies were used for repetitive sequences and transposable elements (TEs) annotation. We adopted two strategies, <italic>de novo</italic> identification of repeat families using RepeatScout (Price et al., <xref ref-type="bibr" rid="B24">2005</xref>) and library-based search using RepeatMasker v.4.0.8 (<ext-link ext-link-type="uri" xlink:href="http://www.repeatmasker.org">http://www.repeatmasker.org</ext-link>). First, we used RepeatModeler v. 1.0.11 (<ext-link ext-link-type="uri" xlink:href="http://www.repeatmasker.org/RepeatModeler/">http://www.repeatmasker.org/RepeatModeler/</ext-link>) to identify the <italic>de novo</italic> types of repetitive elements in QH or SP ecotypes. For each genotype, the resultant repeat library was then combined with the latest updated library (RepBase RepeatMasker-Edition) from the repeat database, Repbase Update, to generate a combined genotype-based repeat library. Repbase Update was hosted at the Genetic Information Research Institute (GIRI, <ext-link ext-link-type="uri" xlink:href="https://www.girinst.org/">https://www.girinst.org/</ext-link>). The <italic>de novo</italic>, Repbase, and combined library was then, respectively, used in RepeatMasker to identify the type, content, number, subfamily of TEs in QH or SP (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 2</xref>). Overall, RepeatModeler (32%) predicted two times more than by Repbase (16%). The combined analysis identified appropriately 35% (60 Mb) repetitive sequence in both genomes. The predominant elements were LTR elements, which accounted for an average of 9.23% (16 Mb) of the genomes.</p>
</sec>
<sec>
<title>Genome Annotation</title>
<p>The gene prediction of the two ecotypes was done by homolog-based and <italic>de novo</italic> methods. For homolog-based prediction, protein sequence from closely related plant species (<italic>Arabidopsis thaliana, Brassica rapa, Phaseolus vulgaris, Medicago truncatula, Solanum lycopersicum, Solanum tuberosum</italic>) were downloaded from Phytozome database (<ext-link ext-link-type="uri" xlink:href="https://phytozome.jgi.doe.gov">https://phytozome.jgi.doe.gov</ext-link>) and aligned against with QH and SP assemblies using BLASTp software (Camacho et al., <xref ref-type="bibr" rid="B3">2009</xref>). Two gene annotation tools, GeneID (version 1.4.4) and Exonerate (version 2.2.0) (Slater and Birney, <xref ref-type="bibr" rid="B26">2005</xref>), were used to define gene models. Augustus (version 3.3.2) (Hoff and Stanke, <xref ref-type="bibr" rid="B11">2019</xref>) was used for <italic>de novo</italic> prediction based on the parameter trained from <italic>Arabidopsis annotation</italic>. All the resultant annotation files were integrated into a consensus gene set using EVidenceModeler (EVM, version 1.1.1) (Haas et al., <xref ref-type="bibr" rid="B10">2008</xref>). We found 28,501 and 27,342 genes (amino acid &#x02265;50) predicted to be present in QH and SP ecotypes (amino acid &#x02265;50). We also provide the sequences of predicted gene models, protein sequences, and the corresponding gene annotation file (gff3) in this study.</p>
<p>To obtain functional annotation of the protein-encoding genes, we performed functional annotation of the two ecotypes based on the best match from the alignments between predicted proteins and NCBI non-redundant (nr) protein database and Uniprot using BLASTp (Camacho et al., <xref ref-type="bibr" rid="B3">2009</xref>), with an <italic>E</italic>-value of 1e-5. The reason we used this relatively relaxed E-value is that <italic>B. fecunda</italic> is closely related to the well-annotated model species <italic>Arabidopsis thaliana</italic>. The output of Blastp were loaded into BLAST2GO (Gotz et al., <xref ref-type="bibr" rid="B8">2008</xref>) for annotation including Gene Ontology (GO) mapping, InterProScan, Kyoto Encyclopedia of Genes and Genomes (KEGG) mapping. The best hits of each annotation were used to describe the predicted genes. The top-hit species for both <italic>B. fecunda</italic> ecotypes were all from the Brassicaceae family including <italic>Camelina sativa, Arabidopsis</italic>, and <italic>Capsella rubella, Eutrema salsugineum</italic> (formerly <italic>Thellungiella haplohila</italic>) (<xref ref-type="fig" rid="F1">Figure 1C</xref>). Overall, 74.4% (21,208) and 73.8% (20,173) of the total genes in QH and SP were annotated with at least one GO term (<xref ref-type="supplementary-material" rid="SM4">Supplementary Figure 1</xref>). The statistics of the annotation was illustrated in <xref ref-type="supplementary-material" rid="SM4">Supplementary Figure 1</xref>.</p>
</sec>
<sec>
<title>Gene Family Analysis</title>
<p>Gene family analysis was performed using OrthoFinder (Emms and Kelly, <xref ref-type="bibr" rid="B5">2015</xref>) on all the protein-coding genes of QH and SP and 15 additional species (<italic>A. thaliana, B. rapa, B. stricta, Cardamine hirsuta, Citrus clementina, Capsella grandiflora, Carica papaya, Capsella rubella, Eutrema salsugineum, Glycine max, Medicago truncatula, Oryza sativa, Populus trichocarpa, Sorghum bicolor, Thellungiella parvula</italic>). Among the total identified genes in both ecotypes, 89.4 and 92.9% could be classified into 16850 and 16833 families in QH and SP, respectively (<xref ref-type="supplementary-material" rid="SM3">Supplementary Table 3</xref>). The amount of the gene families identified for the both ecotypes of <italic>B. fecunda</italic> is comparable with those closely related species, <italic>B. stricta</italic> and <italic>A. thaliana</italic>.</p>
</sec>
<sec>
<title>Phylogenetic Analysis</title>
<p>To examine the evolutionary position of both ecotypes of <italic>B. fecunda</italic>, we downloaded entire protein sequences of 13 plant species from Phytozome V12 and constructed a phylogenetic tree. These 13 species include <italic>Boechera stricta, Capsella rubella, Capsella grandiflora, Arabidopsis thaliana, Cardamine hirsute, Eutrema salsugineum, Thellunigiella parvula, Brassica rapa, Carica papaya, Citrus clementia, Populus trichocarpa, Glycine max</italic>, and <italic>Medicago truncatula</italic>. The 167 single orthologous genes across the selected species that were identified from OrthoFinder (described above) were used to construct the tree employing the built-in program MAFFT with Maximum likelihood model (Katoh et al., <xref ref-type="bibr" rid="B13">2002</xref>). The phylogenetic relationship of the two studied ecotypes of <italic>B. fecunda</italic> and the other plant species is shown in <xref ref-type="fig" rid="F1">Figure 1D</xref>.</p>
</sec>
<sec>
<title>Data Records</title>
<p>All sequencing raw reads for both <italic>B. fecunda</italic> ecotypes have been deposited in the National Center for Biotechnology Information (NCBI, <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov">https://www.ncbi.nlm.nih.gov</ext-link>) Sequence Read Archive (SRA) (BioProject PRJNA6574120). The assembled genome sequences (QH and SP), VCF files containing all the sequence variants, the annotation files comprising predicted transcripts/CDSs/genes and proteins have been deposited in GigaDB (<ext-link ext-link-type="uri" xlink:href="http://gigadb.org/">http://gigadb.org/</ext-link>) (<italic>All data will be available upon acceptance</italic>).</p>
</sec>
<sec>
<title>Technical Validation</title>
<p>To ensure high quality of the genome assembly, we performed a series of evaluations as the experiment was carried out. We obtained good integrity and adequate concentration of the genomic DNA, which is essential for genome sequencing with desired sequencing depth and followed-up accurate genome assembly. Prior to genome assembly, raw read trimming and error correction were carried out to obtain high quality sequencing reads (QC &#x02265;30) with the adaptors. Low-quality bases/reads were removed, which is extremely important for creating correct kmer overlaps for <italic>de novo</italic> genome assembly. The kmer test from 33- to 83-mers allows us to select the best kmers for genome assembly, which is 63-mers and 59-mers for QH and SP, respectively. Genome completeness analysis using BUSCO/ GenomeScope showed more than 96% of core <italic>A. thaliana</italic> were successfully identified in both assemblies, indicating a high completeness of the <italic>B. fecunda</italic> genome assembly. The completeness of the assemblies was further supported by the comparable amount of gene families and protein-encoding gene models, compared with its congener species <italic>B. stricta</italic> and close relative <italic>Arabidopsis thaliana</italic>.</p>
</sec>
</sec>
<sec id="s3">
<title>Code Availability</title>
<p>Trimmomatic version (v. 0.36): LEADING 2 TRAILING 2 SLIDINGWINDOW:4:15 MINLEN:36; RepeatMasker: <italic>de novo</italic>: RepeatMasker &#x02013;pa 2 &#x02013;s &#x02013;lib lib &#x02013;dir directory &#x02013;html &#x02013;gff &#x02013;e ncbi FILE, Repbase: RepeatMasker &#x02013;pa 2 &#x02013;s &#x02013;lib Repbase_library &#x02013;dir directory &#x02013;html &#x02013;gff &#x02013;e ncbi FILE; Augustus: &#x02013;outfile=FILE &#x02013;gff3=on &#x02013;strand=both &#x02013;genemodel=partial &#x02013;species=Arabidopsis; BLASTp: blastp &#x02013;db database &#x02013;outfmt 5 &#x02013;evalue 1e&#x02013;5 &#x02013;number_alignments 5 &#x02013;num_threads 16 &#x02013;show_gis &#x02013;out FILE &#x02013;query protein.fasta; EVidenceModeler (EVM): refer <ext-link ext-link-type="uri" xlink:href="https://evidencemodeler.github.io">https://evidencemodeler.github.io</ext-link>; GATK: version 3.70, bwa mem &#x02013;t 12, java &#x02013;jar picard.jar SortSam I=FILE O=FILE SORT_ORDER=coordinate, java &#x02013;jar picard.jar AddOrReplaceReadGroups I=FILE O=FILE ID=X1 LB=X2 PL=Illumina PU=hiseq SM=X3, java &#x02013;jar picard.jar MarkDuplicates I=FILE O=FILE METRICS_FILE=FILENAME ASSUME_SORTED=ture, GenomeAnalysisTK.jar &#x02013;T IndelRealiger &#x02013;R REFERENCEFILE &#x02013;I INPUT &#x02013;O OUTFILE, GenomeAnalysisTK.jar &#x02013;T HaplotypeCaller &#x02013;R REFERENCEFILE &#x02013;I FILE &#x02013;O FILE &#x02013;stand_call_conf 30 &#x02013;min_base_quality_score 10 &#x02013;minReadsPerAlignmentStart 10 &#x02013;ploidy 2, GenomeAnalysisTK.jar &#x02013;T SelecVariants &#x02013;R FILE &#x02013;V FILE E &#x02013;selectType SNP &#x02013;o FILE, GenomeAnalysisTK.jar &#x02013;T VariantFilteration &#x02013;R FILE &#x02013;V FILE &#x02013;filterExpression &#x0201C;QD&#x0003C;2.0 || MQ&#x0003C;40.0 || FS&#x0003E;60.0 || SOR&#x0003E;3.0 || MQRankSum&#x0003C;&#x02013;12.5 || ReadPosRankSum&#x0003C;&#x02013;8.0&#x0201D; &#x02013;filterName FILE &#x02013;O OUTFILE; BUSCO: version3.0.2, &#x02013;i FILE &#x02013;o FILE &#x02013;m geno &#x02013;l Database &#x02013;sp FILE &#x02013;c 16; OrthoFinder (v.2.27): &#x02013;f FILE &#x02013;S diamond &#x02013;t 16 &#x02013;M msa; &#x0201C;FILE&#x0201D; in the code indicates the input or output files following the software manual.</p>
</sec>
<sec sec-type="data-availability-statement" id="s4">
<title>Data Availability Statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found at: <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/bioproject/PRJNA657412">https://www.ncbi.nlm.nih.gov/bioproject/PRJNA657412</ext-link>.</p>
</sec>
<sec id="s5">
<title>Author Contributions</title>
<p>B-HS initiated the study, designed the experiment, and collected the data. HZ and IM conducted the data analysis. HZ, TM-O, and B-HS wrote the manuscript. All authors approved the final manuscript. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec id="s6">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
</body>
<back>
<sec sec-type="supplementary-material" id="s7">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fevo.2020.550936/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fevo.2020.550936/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Table_1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_2.pdf" id="SM2" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_3.pdf" id="SM3" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Presentation_1.pdf" id="SM4" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Blanquart</surname> <given-names>F.</given-names></name> <name><surname>Kaltz</surname> <given-names>O.</given-names></name> <name><surname>Nuismer</surname> <given-names>S. L.</given-names></name> <name><surname>Gandon</surname> <given-names>S.</given-names></name></person-group> (<year>2013</year>). <article-title>A practical guide to measuring local adaptation</article-title>. <source>Ecol. Lett</source>. <volume>16</volume>, <fpage>1195</fpage>&#x02013;<lpage>1205</lpage>. <pub-id pub-id-type="doi">10.1111/ele.12150</pub-id><pub-id pub-id-type="pmid">23848550</pub-id></citation></ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bolger</surname> <given-names>A. M.</given-names></name> <name><surname>Lohse</surname> <given-names>M.</given-names></name> <name><surname>Usadel</surname> <given-names>B.</given-names></name></person-group> (<year>2014</year>). <article-title>Trimmomatic: a flexible trimmer for Illumina sequence data</article-title>. <source>Bioinformatics</source> <volume>30</volume>, <fpage>2114</fpage>&#x02013;<lpage>2120</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btu170</pub-id><pub-id pub-id-type="pmid">24695404</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Camacho</surname> <given-names>C.</given-names></name> <name><surname>Coulouris</surname> <given-names>G.</given-names></name> <name><surname>Avagyan</surname> <given-names>V.</given-names></name> <name><surname>Ma</surname> <given-names>N.</given-names></name> <name><surname>Papadopoulos</surname> <given-names>J.</given-names></name> <name><surname>Bealer</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>BLAST plus: architecture and applications</article-title>. <source>BMC Bioinformatics</source> <volume>10</volume>:<fpage>421</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-10-421</pub-id></citation></ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ellstrand</surname> <given-names>N. C.</given-names></name> <name><surname>Elam</surname> <given-names>D. R.</given-names></name></person-group> (<year>1993</year>). <article-title>Population genetic consequences of small population-size - Implications for plant conservation</article-title>. <source>Ann. Rev. Ecol. Syst.</source> <volume>24</volume>, <fpage>217</fpage>&#x02013;<lpage>242</lpage>. <pub-id pub-id-type="doi">10.1146/annurev.es.24.110193.001245</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Emms</surname> <given-names>D. M.</given-names></name> <name><surname>Kelly</surname> <given-names>S.</given-names></name></person-group> (<year>2015</year>). <article-title>OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy</article-title>. <source>Genome Biol.</source> <volume>16</volume>:<fpage>157</fpage>. <pub-id pub-id-type="doi">10.1186/s13059-015-0721-2</pub-id><pub-id pub-id-type="pmid">26243257</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gao</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>H. B.</given-names></name> <name><surname>Liu</surname> <given-names>C.</given-names></name> <name><surname>Chu</surname> <given-names>H. L.</given-names></name> <name><surname>Dai</surname> <given-names>D. Q.</given-names></name> <name><surname>Song</surname> <given-names>S. N.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title><italic>De novo</italic> genome assembly of the red silk cotton tree (<italic>Bombax ceiba</italic>)</article-title>. <source>Gigascience</source> <volume>7</volume>, <fpage>1</fpage>&#x02013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1093/gigascience/giy051</pub-id><pub-id pub-id-type="pmid">29757382</pub-id></citation></ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gehan</surname> <given-names>M. A.</given-names></name> <name><surname>Park</surname> <given-names>S.</given-names></name> <name><surname>Gilmour</surname> <given-names>S. J.</given-names></name> <name><surname>An</surname> <given-names>C. F.</given-names></name> <name><surname>Lee</surname> <given-names>C. M.</given-names></name> <name><surname>Thomashow</surname> <given-names>M. F.</given-names></name></person-group> (<year>2015</year>). <article-title>Natural variation in the C-repeat binding factor cold response pathway correlates with local adaptation of <italic>Arabidopsis</italic> ecotypes</article-title>. <source>Plant J.</source> <volume>84</volume>, <fpage>682</fpage>&#x02013;<lpage>693</lpage>. <pub-id pub-id-type="doi">10.1111/tpj.13027</pub-id><pub-id pub-id-type="pmid">26369909</pub-id></citation></ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gotz</surname> <given-names>S.</given-names></name> <name><surname>Garcia-Gomez</surname> <given-names>J. M.</given-names></name> <name><surname>Terol</surname> <given-names>J.</given-names></name> <name><surname>Williams</surname> <given-names>T. D.</given-names></name> <name><surname>Nagaraj</surname> <given-names>S. H.</given-names></name> <name><surname>Nueda</surname> <given-names>M. J.</given-names></name> <etal/></person-group>. (<year>2008</year>). <article-title>High-throughput functional annotation and data mining with the Blast2GO suite</article-title>. <source>Nucleic Acids Res.</source> <volume>36</volume>, <fpage>3420</fpage>&#x02013;<lpage>3435</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkn176</pub-id><pub-id pub-id-type="pmid">18445632</pub-id></citation></ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gurevich</surname> <given-names>A.</given-names></name> <name><surname>Saveliev</surname> <given-names>V.</given-names></name> <name><surname>Vyahhi</surname> <given-names>N.</given-names></name> <name><surname>Tesler</surname> <given-names>G.</given-names></name></person-group> (<year>2013</year>). <article-title>QUAST: quality assessment tool for genome assemblies</article-title>. <source>Bioinformatics</source> <volume>29</volume>, <fpage>1072</fpage>&#x02013;<lpage>1075</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btt086</pub-id><pub-id pub-id-type="pmid">23422339</pub-id></citation></ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Haas</surname> <given-names>B. J.</given-names></name> <name><surname>Salzberg</surname> <given-names>S. L.</given-names></name> <name><surname>Zhu</surname> <given-names>W.</given-names></name> <name><surname>Pertea</surname> <given-names>M.</given-names></name> <name><surname>Allen</surname> <given-names>J. E.</given-names></name> <name><surname>Orvis</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2008</year>). <article-title>Automated eukaryotic gene structure annotation using evidence modeler and the program to assemble spliced alignments</article-title>. <source>Genome Biol.</source> <volume>9</volume>:<fpage>R7</fpage>. <pub-id pub-id-type="doi">10.1186/gb-2008-9-1-r7</pub-id><pub-id pub-id-type="pmid">18190707</pub-id></citation></ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hoff</surname> <given-names>K. J.</given-names></name> <name><surname>Stanke</surname> <given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>Predicting genes in single genomes with AUGUSTUS</article-title>. <source>Curr. Protoc. Bioinform.</source> <volume>65</volume>:<fpage>e57</fpage>. <pub-id pub-id-type="doi">10.1002/cpbi.57</pub-id><pub-id pub-id-type="pmid">30466165</pub-id></citation></ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Johnson</surname> <given-names>L. S.</given-names></name> <name><surname>Eddy</surname> <given-names>S. R.</given-names></name> <name><surname>Portugaly</surname> <given-names>E.</given-names></name></person-group> (<year>2010</year>). <article-title>Hidden Markov model speed heuristic and iterative HMM search procedure</article-title>. <source>BMC Bioinform.</source> <volume>11</volume>:<fpage>431</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-11-431</pub-id><pub-id pub-id-type="pmid">20718988</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Katoh</surname> <given-names>K.</given-names></name> <name><surname>Misawa</surname> <given-names>K.</given-names></name> <name><surname>Kuma</surname> <given-names>K.</given-names></name> <name><surname>Miyata</surname> <given-names>T.</given-names></name></person-group> (<year>2002</year>). <article-title>MAFFT: a novel method for rapid multiple sequence alignment based on fast Fourier transform</article-title>. <source>Nucleic Acids Res.</source> <volume>30</volume>, <fpage>3059</fpage>&#x02013;<lpage>3066</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkf436</pub-id><pub-id pub-id-type="pmid">12136088</pub-id></citation></ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Leamy</surname> <given-names>L. J.</given-names></name> <name><surname>Lee</surname> <given-names>C. R.</given-names></name> <name><surname>Cousins</surname> <given-names>V.</given-names></name> <name><surname>Mujacic</surname> <given-names>I.</given-names></name> <name><surname>Manzaneda</surname> <given-names>A. J.</given-names></name> <name><surname>Prasad</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>Large-scale adaptive divergence in <italic>Boechera fecunda</italic>, an endangered wild relative of <italic>Arabidopsis</italic></article-title>. <source>Ecol. Evol.</source> <volume>4</volume>, <fpage>3175</fpage>&#x02013;<lpage>3186</lpage>. <pub-id pub-id-type="doi">10.1002/ece3.1148</pub-id><pub-id pub-id-type="pmid">25473471</pub-id></citation></ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lee</surname> <given-names>C. R.</given-names></name> <name><surname>Wang</surname> <given-names>B. S.</given-names></name> <name><surname>Mojica</surname> <given-names>J. P.</given-names></name> <name><surname>Mandakova</surname> <given-names>T.</given-names></name> <name><surname>Prasad</surname> <given-names>K. Y. S. K.</given-names></name> <name><surname>Goicoechea</surname> <given-names>J. L.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Young inversion with multiple linked QTLs under selection in a hybrid zone</article-title>. <source>Nat. Ecol. Evol.</source> <volume>1</volume>:<fpage>0119</fpage>. <pub-id pub-id-type="doi">10.1038/s41559-017-0310-8</pub-id></citation></ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Durbin</surname> <given-names>R.</given-names></name></person-group> (<year>2009</year>). <article-title>Fast and accurate short read alignment with burrows-wheeler transform</article-title>. <source>Bioinformatics</source> <volume>25</volume>, <fpage>1754</fpage>&#x02013;<lpage>1760</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp324</pub-id><pub-id pub-id-type="pmid">19451168</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Handsaker</surname> <given-names>B.</given-names></name> <name><surname>Wysoker</surname> <given-names>A.</given-names></name> <name><surname>Fennell</surname> <given-names>T.</given-names></name> <name><surname>Ruan</surname> <given-names>J.</given-names></name> <name><surname>Homer</surname> <given-names>N.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>The sequence alignment/map format and SAMtools</article-title>. <source>Bioinformatics</source> <volume>25</volume>, <fpage>2078</fpage>&#x02013;<lpage>2079</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp352</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>R. B.</given-names></name> <name><surname>Liu</surname> <given-names>B. H.</given-names></name> <name><surname>Xie</surname> <given-names>Y. L.</given-names></name> <name><surname>Li</surname> <given-names>Z. Y.</given-names></name> <name><surname>Huang</surname> <given-names>W. H.</given-names></name> <name><surname>Yuan</surname> <given-names>J. Y.</given-names></name> <etal/></person-group>. (<year>2012</year>). <article-title>SOAPdenovo2: an empirically improved memory-efficient short-read <italic>de novo</italic> assembler</article-title>. <source>Gigascience</source> <volume>1</volume>:<fpage>18</fpage>. <pub-id pub-id-type="doi">10.1186/2047-217X-1-18</pub-id><pub-id pub-id-type="pmid">23587118</pub-id></citation></ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Marcais</surname> <given-names>G.</given-names></name> <name><surname>Kingsford</surname> <given-names>C.</given-names></name></person-group> (<year>2011</year>). <article-title>A fast, lock-free approach for efficient parallel counting of occurrences of k-mers</article-title>. <source>Bioinformatics</source> <volume>27</volume>, <fpage>764</fpage>&#x02013;<lpage>770</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btr011</pub-id><pub-id pub-id-type="pmid">21217122</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>McKay</surname> <given-names>J. K.</given-names></name> <name><surname>Bishop</surname> <given-names>J. G.</given-names></name> <name><surname>Lin</surname> <given-names>J. Z.</given-names></name> <name><surname>Richards</surname> <given-names>J. H.</given-names></name> <name><surname>Sala</surname> <given-names>A.</given-names></name> <name><surname>Mitchell-Olds</surname> <given-names>T.</given-names></name></person-group> (<year>2001</year>). <article-title>Local adaptation across a climatic gradient despite small effective population size in the rare sapphire rockcress</article-title>. <source>Proc. R. Soc. B Biol. Sci.</source> <volume>268</volume>, <fpage>1715</fpage>&#x02013;<lpage>1721</lpage>. <pub-id pub-id-type="doi">10.1098/rspb.2001.1715</pub-id><pub-id pub-id-type="pmid">11506685</pub-id></citation></ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>McKay</surname> <given-names>J. K.</given-names></name> <name><surname>Latta</surname> <given-names>R. G.</given-names></name></person-group> (<year>2002</year>). <article-title>Adaptive population divergence: markers, QTL and traits</article-title>. <source>Trends Ecol. Evol.</source> <volume>17</volume>, <fpage>285</fpage>&#x02013;<lpage>291</lpage>. <pub-id pub-id-type="doi">10.1016/S0169-5347(02)02478-3</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nikolenko</surname> <given-names>S. I.</given-names></name> <name><surname>Korobeynikov</surname> <given-names>A. I.</given-names></name> <name><surname>Alekseyev</surname> <given-names>M. A.</given-names></name></person-group> (<year>2013</year>). <article-title>BayesHammer: Bayesian clustering for error correction in single-cell sequencing</article-title>. <source>BMC Genomics</source> <volume>14</volume>:<fpage>S7</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2164-14-S1-S7</pub-id><pub-id pub-id-type="pmid">23368723</pub-id></citation></ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ouborg</surname> <given-names>N. J.</given-names></name> <name><surname>Vriezen</surname> <given-names>W. H.</given-names></name></person-group> (<year>2007</year>). <article-title>An ecologist&#x00027;s guide to ecogenomics</article-title>. <source>J. Ecol.</source> <volume>95</volume>, <fpage>8</fpage>&#x02013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.1111/j.1365-2745.2006.01197.x</pub-id></citation></ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Price</surname> <given-names>A. L.</given-names></name> <name><surname>Jones</surname> <given-names>N. C.</given-names></name> <name><surname>Pevzner</surname> <given-names>P. A.</given-names></name></person-group> (<year>2005</year>). <article-title><italic>De novo</italic> identification of repeat families in large genomes</article-title>. <source>Bioinformatics</source> <volume>21</volume>, <fpage>I351</fpage>&#x02013;<lpage>I358</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bti1018</pub-id><pub-id pub-id-type="pmid">15961478</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Simao</surname> <given-names>F. A.</given-names></name> <name><surname>Waterhouse</surname> <given-names>R. M.</given-names></name> <name><surname>Ioannidis</surname> <given-names>P.</given-names></name> <name><surname>Kriventseva</surname> <given-names>E. V.</given-names></name> <name><surname>Zdobnov</surname> <given-names>E. M.</given-names></name></person-group> (<year>2015</year>). <article-title>BUSCO: assessing genome assembly and annotation completeness with single-copy orthologs</article-title>. <source>Bioinformatics</source> <volume>31</volume>, <fpage>3210</fpage>&#x02013;<lpage>3212</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btv351</pub-id><pub-id pub-id-type="pmid">26059717</pub-id></citation></ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Slater</surname> <given-names>G. S.</given-names></name> <name><surname>Birney</surname> <given-names>E.</given-names></name></person-group> (<year>2005</year>). <article-title>Automated generation of heuristics for biological sequence comparison</article-title>. <source>BMC Bioinformatics</source> <volume>6</volume>:<fpage>31</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-6-31</pub-id><pub-id pub-id-type="pmid">15713233</pub-id></citation></ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Song</surname> <given-names>B. H.</given-names></name> <name><surname>Mitchell-Olds</surname> <given-names>T.</given-names></name></person-group> (<year>2007</year>). <article-title>High genetic diversity and population differentiation in <italic>Boechera fecunda</italic>, a rare relative of <italic>Arabidopsis</italic></article-title>. <source>Mol. Ecol.</source> <volume>16</volume>, <fpage>4079</fpage>&#x02013;<lpage>4088</lpage>. <pub-id pub-id-type="doi">10.1111/j.1365-294X.2007.03500.x</pub-id><pub-id pub-id-type="pmid">17784916</pub-id></citation></ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Stanke</surname> <given-names>M.</given-names></name> <name><surname>Diekhans</surname> <given-names>M.</given-names></name> <name><surname>Baertsch</surname> <given-names>R.</given-names></name> <name><surname>Haussler</surname> <given-names>D.</given-names></name></person-group> (<year>2008</year>). <article-title>Using native and syntenically mapped cDNA alignments to improve <italic>de novo</italic> gene finding</article-title>. <source>Bioinformatics</source> <volume>24</volume>, <fpage>637</fpage>&#x02013;<lpage>644</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btn013</pub-id><pub-id pub-id-type="pmid">18218656</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tilman</surname> <given-names>D.</given-names></name> <name><surname>Lehman</surname> <given-names>C.</given-names></name></person-group> (<year>2001</year>). <article-title>Human-caused environmental change: Impacts on plant diversity and evolution</article-title>. <source>Proc. Natl. Acad. Sci. U.S.A.</source> <volume>98</volume>, <fpage>5433</fpage>&#x02013;<lpage>5440</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.091093198</pub-id><pub-id pub-id-type="pmid">11344290</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vurture</surname> <given-names>G. W.</given-names></name> <name><surname>Sedlazeck</surname> <given-names>F. J.</given-names></name> <name><surname>Nattestad</surname> <given-names>M.</given-names></name> <name><surname>Underwood</surname> <given-names>C. J.</given-names></name> <name><surname>Fang</surname> <given-names>H.</given-names></name> <name><surname>Gurtowski</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>GenomeScope: fast reference-free genome profiling from short reads</article-title>. <source>Bioinformatics</source> <volume>33</volume>, <fpage>2202</fpage>&#x02013;<lpage>2204</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btx153</pub-id><pub-id pub-id-type="pmid">28369201</pub-id></citation></ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xiao</surname> <given-names>Y.</given-names></name> <name><surname>Xu</surname> <given-names>P. W.</given-names></name> <name><surname>Fan</surname> <given-names>H. K.</given-names></name> <name><surname>Baudouin</surname> <given-names>L.</given-names></name> <name><surname>Xia</surname> <given-names>W.</given-names></name> <name><surname>Bocs</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>The genome draft of coconut (<italic>Cocos nucifera</italic>)</article-title>. <source>Gigascience</source> <volume>6</volume>, <fpage>1</fpage>&#x02013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.1093/gigascience/gix095</pub-id><pub-id pub-id-type="pmid">29048487</pub-id></citation></ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yeaman</surname> <given-names>S.</given-names></name></person-group> (<year>2015</year>). <article-title>Local adaptation by alleles of small effect</article-title>. <source>Am. Nat.</source> <volume>186</volume>, <fpage>S74</fpage>&#x02013;<lpage>S89</lpage>. <pub-id pub-id-type="doi">10.1086/682405</pub-id><pub-id pub-id-type="pmid">26656219</pub-id></citation></ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>H. Y.</given-names></name> <name><surname>Mittal</surname> <given-names>N.</given-names></name> <name><surname>Leamy</surname> <given-names>L. J.</given-names></name> <name><surname>Barazani</surname> <given-names>O.</given-names></name> <name><surname>Song</surname> <given-names>B. H.</given-names></name></person-group> (<year>2017</year>). <article-title>Back into the wild-apply untapped genetic diversity of wild relatives for crop improvement</article-title>. <source>Evol. Appl.</source> <volume>10</volume>, <fpage>5</fpage>&#x02013;<lpage>24</lpage>. <pub-id pub-id-type="doi">10.1111/eva.12434</pub-id><pub-id pub-id-type="pmid">28035232</pub-id></citation></ref>
</ref-list>
<fn-group>
<fn fn-type="financial-disclosure"><p><bold>Funding.</bold> B-HS was supported by the University of North Carolina at Charlotte. The National Institute of General Medical Sciences of the National Institutes of Health, Award Number: R15GM122029; and North Carolina Biotechnology Center, Award Numbers: 2019-BIG-6507 and 2020-FLG-3806. TM-O was supported by grant R01 GM086496 from the National Institutes of Health.</p>
</fn>
</fn-group>
</back>
</article>