<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">733188</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2021.733188</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Data Report</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Chromosome-Scale Genome Assemblies of Two Korean Cucumber Inbred Lines</article-title>
<alt-title alt-title-type="left-running-head">Song et&#x20;al.</alt-title>
<alt-title alt-title-type="right-running-head">Genomes for Korean Cucumbers</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Song</surname>
<given-names>Kihwan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1390945/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Shin</surname>
<given-names>Younhee</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/600188/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Jung</surname>
<given-names>Myunghee</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Subramaniyam</surname>
<given-names>Sathiyamoorthy</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lee</surname>
<given-names>Keun Pyo</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Oh</surname>
<given-names>Eun-A</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Jeong</surname>
<given-names>Jin Ho</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Kim</surname>
<given-names>Jeong-Gu</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/993582/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<label>
<sup>1</sup>
</label>Department of Bioresources Engineering, Sejong University, <addr-line>Seoul</addr-line>, <country>South Korea</country>
</aff>
<aff id="aff2">
<label>
<sup>2</sup>
</label>Research and Development Center, Insilicogen Inc., <addr-line>Gyeonggi-do</addr-line>, <country>South Korea</country>
</aff>
<aff id="aff3">
<label>
<sup>3</sup>
</label>Genomics Division, National Institute of Agricultural Sciences, Nongsaengmyeong, <addr-line>Jeonju</addr-line>, <country>South Korea</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/187698/overview">Yiqun Weng</ext-link>, University of Wisconsin-Madison, United&#x20;States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/336267/overview">Jarkko Saloj&#xe4;rvi</ext-link>, Nanyang Technological University, Singapore</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1345262/overview">Yuhui Wang</ext-link>, Nanjing Agricultural University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/601964/overview">Shan Wu</ext-link>, Boyce Thompson Institute, United&#x20;States</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Jeong-Gu Kim, <email>jkim5aug@korea.kr</email>
</corresp>
<fn fn-type="equal" id="fn1">
<label>
<sup>&#x2020;</sup>
</label>
<p>These authors have contributed equally to this&#x20;work</p>
</fn>
<fn fn-type="other">
<p>This article was submitted to Plant Genomics, a section of the journal Frontiers in Genetics</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>19</day>
<month>11</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>12</volume>
<elocation-id>733188</elocation-id>
<history>
<date date-type="received">
<day>30</day>
<month>06</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>27</day>
<month>10</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2021 Song, Shin, Jung, Subramaniyam, Lee, Oh, Jeong and Kim.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Song, Shin, Jung, Subramaniyam, Lee, Oh, Jeong and Kim</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these&#x20;terms.</p>
</license>
</permissions>
<kwd-group>
<kwd>Korean cucumber</kwd>
<kwd>genome</kwd>
<kwd>kimchi</kwd>
<kwd>slicer</kwd>
<kwd>Cucumis sativus</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>Practicing traditional food habits and using traditional ingredients are of major importance for maintaining a diet with good nutritional value. South Korea is well known for its fermented foods, particularly <italic>banchan</italic> (fermented side dishes such as kimchi), which are deeply rooted in Korean food culture. Moreover, Korean cuisine has unique characteristics that are widely accepted to provide various health benefits (<xref ref-type="bibr" rid="B8">Kim et&#x20;al., 2016</xref>), with Korean food culture involving high consumption of vegetables due to the characteristics of its long agricultural history. According to the Korean Ministry of Agriculture definition of the standards and fundamentals of Korean food, one major constraint is that only food prepared with ingredients produced or cultivated in Korea can be considered Korean food. For example, kimchi prepared from imported Chinese cabbage cannot be considered Korean food; the same applies to other <italic>banchan</italic>. As part of the process of preserving Korean food culture, we have initiated the development of genetic resources for Korean varieties of cucumber (<italic>Cucumis sativus</italic> var. <italic>sativus</italic> L.), which is widely cultivated in Korea for both fresh and processed consumption. Cucumber originated in India and spread to other parts of the world through adaptation to various environmental factors and indigenous food habits (<xref ref-type="bibr" rid="B15">Sebastian et&#x20;al., 2010</xref>). This process has led it to become the sixth most widely cultivated vegetable crop in the world, with 2.1 million hectares under cultivation (FAOSTAT, 2020). South Korea is the 16th largest producer of cucumber in the world, with three major cultivar groups being grown: the Baekdadagi-type, Nakhap-type, and Gasi-type cultivars (<xref ref-type="bibr" rid="B14">Park et&#x20;al., 2021</xref>). In this study, we aimed to obtain detailed insights into the genetics of cucumber varieties by constructing chromosome-scale genome assemblies for two Korean cucumber inbred lines: JEF (semi-white Baekdadagi-type, mainly used for kimchi and other fermented foods) and KWS (Korean solid green, Nakhap-type, a slicer used fresh for salads and <italic>gimbap</italic> or Korean cold noodles).</p>
<p>As shown by previous studies of model plants and crops, a single reference genome is inadequate to capture the variation among different genetic lineages. For example, significant structural variation among maize inbred lines has been identified through analysis of multiple genomes (<xref ref-type="bibr" rid="B20">Tao et&#x20;al., 2019</xref>). Furthermore, the cost of assembling multiple genomes has been significantly reduced by third-generation sequencing technologies and computational methods, leading to the construction of chromosome-scale genome assemblies for various crops with the aim of obtaining detailed insights into gene&#x2013;trait associations (<xref ref-type="bibr" rid="B23">Yang et&#x20;al., 2019</xref>). The first version of the cucumber draft genome was released in 2009 for inbred line 9,930, a lineage of the &#x2018;Chinese Long&#x2019; cultivar (<xref ref-type="bibr" rid="B7">Huang et&#x20;al., 2009</xref>); the genome has since been updated to version 3 (<xref ref-type="bibr" rid="B11">Li et&#x20;al., 2019</xref>) and the chromosomal level Northern American cucumber genome published in 2012 (<xref ref-type="bibr" rid="B22">Yang et&#x20;al., 2012</xref>). Further insight into variations among and within varieties has recently been provided by the publication of information on the genome of the pickling cucumber &#x201c;Borszczagowski&#x201d; (line B10) (<xref ref-type="bibr" rid="B13">Osipowski et&#x20;al., 2020</xref>). As the chromosome-scale haploid genome assembly of &#x201c;Chinese Long&#x201d; line 9,930 (2n &#x3d; 2x &#x3d; 14, haploid number 7) is readily available to the public, we used it as our reference for the construction of chromosome-scale assemblies for the two Korean highly inbred&#x20;lines.</p>
</sec>
<sec id="s2">
<title>Value of the Data</title>
<p>These new genomes will serve as an additional genetic resource that can be used as a basis and reference for more detailed study into genetic variation and domestication history among Korean cucumber varieties. In addition, they may be valuable for conducting comparative analysis among and within the species in the genus <italic>Cucumis</italic>, which could improve the genome selection process in molecular-assisted breeding.</p>
</sec>
<sec sec-type="materials|methods" id="s3">
<title>Materials and Methods</title>
<sec id="s3-1">
<title>Sample Collection and Genomic DNA Extraction</title>
<p>The inbreed lines (i.e.,&#x20;JEF and KWS) are obtained from the leading varieties &#x201c;Joeun Baekdadagi&#x201d; and &#x201c;Gyeoulsal-i Cheongjang&#x201d; from Fomer Heungnong Seeds Co. After selecting the individual that best characteristics represent of each group in the F<sub>2</sub> populations, two inbreeds were raised through self-fertilization. The resulted breed line i.e.,&#x20;JEF is gynoecious, which is semi-white fruit skin color with white spine and KWS is monoecious which is uniform dark green skin color with black spine (<xref ref-type="fig" rid="F1">Figure&#x20;1A</xref>). The <italic>Cucumis sativus</italic> breeding line plants were directly harvested in June 2018 in a field in Wanju, Jeollabuk-do, South Korea (35&#xb0;90&#x2032; N, 127&#xb0;15&#x2032; E), near the National Institute of Agricultural Sciences. Sampled fruits are shown in <xref ref-type="fig" rid="F1">Figure&#x20;1A</xref>, and the complete work flow followed in this study is given in <xref ref-type="sec" rid="s9">Supplementary Figure&#x20;S1</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Summary of the sequencing. <bold>(A)</bold>. Cucumber fruits of the varieties sequenced in this article. <bold>(B)</bold>. Genome size estimation. <bold>(C)</bold>. Phylogenetic tree from the single-copy genes along with summaries of gene gain and loss.</p>
</caption>
<graphic xlink:href="fgene-12-733188-g001.tif"/>
</fig>
</sec>
<sec id="s3-2">
<title>DNA Sequencing and <italic>de novo</italic> Genome Assembly</title>
<p>Total DNA was isolated from the samples individually according to sequencing protocols. The isolated DNA was sequenced using two different sequencing systems, PacBio Sequel (Pacific Biosciences, Menlo Park, CA, United&#x20;States) and Illumina HiSeq 2,500 (Illumina, San Diego, CA, United&#x20;States), which are widely used in long- and short-read sequencing. For Illumina sequencing, DNA was prepared using the TruSeq Nano DNA Library Prep Kit (Illumina). For PacBio sequencing, DNA was prepared using the SMRTbell Express Template Prep Kit (Pacific Biosciences; catalog no. 101&#x2013;357&#x2013;000). The experimental procedures were fully conducted by DNA Link (Seoul, Korea), an authorized service provider in South Korea. The Illumina paired-end sequences were initially subjected to filtering of technical artifacts (i.e.,&#x20;base-calling errors [Phred quality score &#x2264; Q20]) and adapters using Trimmomatic v. 0.32 (<xref ref-type="bibr" rid="B3">Bolger et&#x20;al., 2014</xref>). These Illumina reads were used for error correction of PacBio reads in CLC Assembly Cell v. 5.1.1 (Qiagen, Hilden, Germany). The corrected PacBio reads were used to prepare the initial draft version of the cucumber genomes in FALCON-Unzip v. 0.30, a haplotype assembler program (<xref ref-type="bibr" rid="B5">Chin et&#x20;al., 2016</xref>). Finally, using the RaGOO method (<xref ref-type="bibr" rid="B1">Alonge et&#x20;al., 2019</xref>), the genome contigs were clustered and reordered according to their alignment with chromosomal units in the reference genome (&#x2018;Chinese Long&#x2019; 9,930). The assembled genomes were assessed for completeness using BUSCO v. 4.1.4 with the Viridiplantae_odb10 reference dataset (<xref ref-type="bibr" rid="B16">Seppey et&#x20;al., 2019</xref>).</p>
</sec>
<sec id="s3-3">
<title>Reference Mapping of Bacterial and Organelle Genes</title>
<p>To prepare a clean reference genome, it was necessary to remove bacterial contamination and organelle genomes from the database. The complete GenBank database, which contains draft and reference genomes of bacteria and organelles (mitochondria and plastids), was used as the reference to determine which reads should be removed from the raw sequences. All reference mapping of preprocessed reads was conducted using Bowtie 2 v. 2.2.8 (<xref ref-type="bibr" rid="B9">Langmead and Salzberg, 2012</xref>). Details regarding reference paths and sizes are given in <xref ref-type="sec" rid="s9">Supplementary Table S1</xref>, and mapping statistics are given in <xref ref-type="sec" rid="s9">Supplementary Table&#x20;S2</xref>.</p>
</sec>
<sec id="s3-4">
<title>Genome Size Estimation</title>
<p>All the Illumina-preprocessed sequences from the paired-end library were subjected to genome size estimation based on <italic>k</italic>-mers. The <italic>k</italic>-mer frequencies (<italic>k</italic>-mer size &#x3d; 17) were obtained using Jellyfish v. 2.0 (<xref ref-type="bibr" rid="B12">Mar&#xe7;ais and Kingsford, 2011</xref>), and the genome size was calculated from the following formulas: genome coverage depth &#x3d; (<italic>k</italic>-mer coverage depth &#xd7; average read length)/(average read length&#x2013;<italic>k</italic>-mer size &#x2b;1); genome size &#x3d; total base number/genome coverage depth. Here, the <italic>k</italic>-mer coverage depth is the major peak of the <italic>k</italic>-mer distribution.</p>
</sec>
<sec id="s3-5">
<title>Prediction and Classification of Repeat Regions</title>
<p>Repeat regions in the cucumber genomes were predicted using RepeatModeler (<ext-link ext-link-type="uri" xlink:href="http://www.repeatmasker.org/RepeatModeler/">www.repeatmasker.org/RepeatModeler/</ext-link>) and classified into subclasses using the repbase v. 20.08 reference database (<ext-link ext-link-type="uri" xlink:href="http://www.girinst.org/repbase/">www.girinst.org/repbase/</ext-link>) (<xref ref-type="bibr" rid="B2">Bao et&#x20;al., 2015</xref>). Finally, the repeats were masked in the genome using RepeatMasker v. 4.0.5 (<ext-link ext-link-type="uri" xlink:href="http://www.repeatmasker.org">www.repeatmasker.org</ext-link>) with RMBlastn v. 2.2.27&#x2b;. The results are shown in <xref ref-type="sec" rid="s9">Supplementary Figure&#x20;S3</xref>.</p>
</sec>
<sec id="s3-6">
<title>RNA Sequencing</title>
<p>The mRNA library from the collected samples was prepared according to the TruSeq Stranded mRNA Prep Kit protocol (Illumina). The isolated mRNA was sequenced using the Illumina sequencer (<xref ref-type="sec" rid="s9">Supplementary Tables S4</xref> and <xref ref-type="sec" rid="s9">Supplementary Tables&#x20;S5</xref>).</p>
</sec>
<sec id="s3-7">
<title>Gene Prediction and Annotation</title>
<p>The genes from the cucumber draft genomes were predicted using an in-house gene prediction tool that includes three modules: an evidence-based gene modeler (EVM), an <italic>ab-initio</italic> gene modeler, and a consensus gene modeler. The Illumina-sequenced transcriptomes were mapped to the respective repeat-masked draft genomes using TopHat, and Trinity v2.5.1 method was used to assemble the transcripts and mark gene structural boundaries (<xref ref-type="bibr" rid="B21">Trapnell et&#x20;al., 2012</xref>). The <italic>ab-initio</italic> gene modeler and EVM, which included Exonerate (<xref ref-type="bibr" rid="B17">Slater and Birney, 2005</xref>), Geneid and AUGUSTUS (<xref ref-type="bibr" rid="B18">Stanke et&#x20;al., 2006</xref>), were trained with several genomes. The final gene and transcript models were optimized using a consensus gene modeler and annotated using Trinotate v. 3.0.1 (<xref ref-type="bibr" rid="B4">Bryant et&#x20;al., 2017</xref>).</p>
</sec>
<sec id="s3-8">
<title>Comparative Genome Analysis</title>
<p>Total proteins from the two cucumber genomes were subjected to ortholog analysis to provide insight into the differences between cucumber proteins and those of other plants. In total, 14 genomes from Cucurbitaceae (including the two assembled in this study) were used in the ortholog analysis, with Brassicaceae as outliers (<xref ref-type="fig" rid="F1">Figure&#x20;1D</xref> and <xref ref-type="sec" rid="s9">Supplementary Table S3</xref>). The complete proteins of the selected genomes were also subjected to ortholog analysis using OrthoMCL (<xref ref-type="bibr" rid="B10">Li et&#x20;al., 2003</xref>). The single-copy genes from the given genomes were subjected to phylogenetic tree reconstruction using BEAST (Bayesian Evolutionary Analysis Sampling Trees) to assess the evolutionary time and the degree of similarity among the given genomes (<xref ref-type="bibr" rid="B19">Suchard et&#x20;al., 2018</xref>). Furthermore, to assess the gain and loss of genes in the given genomes, the proteins were analyzed using CAFE v. 3.1 (<xref ref-type="bibr" rid="B6">Han et&#x20;al., 2013</xref>).</p>
</sec>
<sec id="s3-9">
<title>Preliminary Analysis Report</title>
<p>Initially, the sizes of the cucumber genomes were estimated to be 267.7 (JEF) and 276.4&#xa0;MB (KWS) (<xref ref-type="fig" rid="F1">Figure&#x20;1B</xref>) based on &#x223c;50&#xa0;GB of short-read sequences (<xref ref-type="table" rid="T1">Table&#x20;1A</xref> and <xref ref-type="sec" rid="s9">Supplementary Table S4</xref>), but 230.8&#xa0;MB (JEF) and 231.1&#xa0;MB (KWS) based on the representative scaffolds assembled from &#x223c;30&#xa0;GB of error-corrected long-read sequences (<xref ref-type="table" rid="T1">Table&#x20;1A,B</xref>). The N50s of the assembled genomes were 30.5&#xa0;MB (JEF) and 31.3&#xa0;MB (KWS), and 40% of the assembled contigs were covered by repeats, in which the long terminal repeat (LTR) elements dominated, accounting for 36% of contigs (<xref ref-type="sec" rid="s9">Supplementary Figure S3</xref>). In total, 25,968 genes were predicted from the JEF genome and 26,011 from KWS, with average sizes of 4,111 and 4,114 bases respectively, and BUSCO scores of 97.88 and 98.35% completeness respectively. (<xref ref-type="table" rid="T1">Table&#x20;1C</xref>). Finally, 66.54% of JEF genes and 65.96% of KWS genes had homologous sequences in GenBank, while 60.25% of JEF genes and 59.82% of KWS genes had gene ontology descriptions (<xref ref-type="table" rid="T1">Table&#x20;1D</xref>). The two genomes were scaffolded onto the reference &#x201c;Chinese Long&#x201d; 9,930 genome using the RaGOO method. Overall, these genome assemblies have &#x223c;5&#xa0;MB of additional bases compared with the reference and similar BUSCO completeness scores, indicating that they are of good quality. Additionally, an average of 99% of both DNA and RNA sequences were mapped to the reference assembly as an additional measure to ensure the quality of the new assemblies (<xref ref-type="sec" rid="s9">Supplementary Figure S2</xref>). The ortholog analysis revealed genome-specific genes, as well as gain and loss of genes, in the selected cucumber genomes (<xref ref-type="fig" rid="F1">Figure&#x20;1C</xref> and <xref ref-type="sec" rid="s9">Supplementary Figure S4</xref>). In addition, the RNA samples were collected from five different developmental stages, revealing that both genomes contain genes expressed differentially in different organs or at different stages (<xref ref-type="sec" rid="s9">Supplementary Figures S5</xref> and <xref ref-type="sec" rid="s9">Supplementary Figures&#x20;S6</xref>).</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Summary of the sequencing to annotation of the cucumber draft genomes along with the reference.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="center">JEF</th>
<th align="center">KWS</th>
<th align="center">Cucumber_9,930_v3 (GCF_000,004,075.3)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td colspan="4" align="left">(A) Sequencing</td>
</tr>
<tr>
<td align="left">&#x2003;Short Read</td>
<td align="center">72.7&#xa0;GB (315.10X)</td>
<td align="center">75.9&#xa0;GB (328.69X)</td>
<td align="left"/>
</tr>
<tr>
<td align="left">&#x2003;Long Reads</td>
<td align="center">31.7&#xa0;GB (137.48X)</td>
<td align="center">37.0&#xa0;GB (160.38X)</td>
<td align="left"/>
</tr>
<tr>
<td colspan="4" align="left">(B) Assembly</td>
</tr>
<tr>
<td align="left">&#x2003;Genome size estimation</td>
<td align="center">267,736,921</td>
<td align="center">276,372,239</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="left">&#x2003;Total length, bp</td>
<td align="center">230,754,408</td>
<td align="center">231,006,969</td>
<td align="center">226,211,662</td>
</tr>
<tr>
<td align="left">&#x2003;Total length/Estimation</td>
<td align="center">86.19%</td>
<td align="center">83.59%</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="left">&#x2003;No. of contigs</td>
<td align="center">7 Chr &#x2b;54 unplaced</td>
<td align="center">7 Chr &#x2b;57 unplaced</td>
<td align="center">7 Chr &#x2b;77 unplaced</td>
</tr>
<tr>
<td align="left">&#x2003;Scaffold N50 (Contig N50)</td>
<td align="center">30,569,742 (7,362,017)</td>
<td align="center">31,270,087 (8,654,608)</td>
<td align="center">31,125,843</td>
</tr>
<tr>
<td align="left">&#x2003;N (%)</td>
<td align="center">0.01%</td>
<td align="center">0.01%</td>
<td align="center">0.02%</td>
</tr>
<tr>
<td align="left">&#x2003;GC (%)</td>
<td align="center">33.23%</td>
<td align="center">33.25%</td>
<td align="center">32.82%</td>
</tr>
<tr>
<td align="left">&#x2003;Repeats (MB)</td>
<td align="center">93.47 (40.50%)</td>
<td align="center">94.41 (40.87%)</td>
<td align="left"/>
</tr>
<tr>
<td align="left">&#x2003;Repeats against references (9,930) 91.00 (39.44%)</td>
<td align="center">91.00 (39.44%)</td>
<td align="center">91.69 (39.69%)</td>
<td align="center">84.02 (37.14%)&#x2013;in our method</td>
</tr>
<tr>
<td align="left">&#x2003;BUSCO</td>
<td align="center">99.06%</td>
<td align="center">98.82%</td>
<td align="center">98.82%</td>
</tr>
<tr>
<td colspan="4" align="left">(C) Structural annotations</td>
</tr>
<tr>
<td align="left">&#x2003;No. of genes</td>
<td align="center">25,968</td>
<td align="center">26,011</td>
<td align="center">24,317</td>
</tr>
<tr>
<td align="left">&#x2003;Average gene length (bp)</td>
<td align="center">4,111.58</td>
<td align="center">4,114.07</td>
<td align="center">4,068.49</td>
</tr>
<tr>
<td align="left">&#x2003;BUSCO (Viridiplantae)</td>
<td align="center">97.88%</td>
<td align="center">98.35%</td>
<td align="center">100.00%</td>
</tr>
<tr>
<td colspan="4" align="left">(D) Functional annotations</td>
</tr>
<tr>
<td align="left">&#x2003;No. hits</td>
<td align="center">8,690 (33.46%)</td>
<td align="center">8,853 (34.04%)</td>
<td align="left"/>
</tr>
<tr>
<td align="left">&#x2003;Blast hits</td>
<td align="center">17,278 (66.54%)</td>
<td align="center">17,158 (65.96%)</td>
<td align="left"/>
</tr>
<tr>
<td align="left">&#x2003;Gene Ontology</td>
<td align="center">15,645 (60.25%)</td>
<td align="center">15,561 (59.82%)</td>
<td align="left"/>
</tr>
<tr>
<td align="left">&#x2003;KEGG</td>
<td align="center">3,725 (66.54%)</td>
<td align="center">3,718 (14.29%)</td>
<td align="left"/>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
</body>
<back>
<sec id="s4">
<title>Data Availability Statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found in the article/<xref ref-type="sec" rid="s9">Supplementary Material</xref>.</p>
</sec>
<sec id="s5">
<title>Author Contributions</title>
<p>YS, MJ, and SS: genome assembly and annotation. KS and SS: manuscript preparation. KL, E-AO, JJ, and J-GK: sampling and sequencing. KS and J-GK: funding and modeling the&#x20;study.</p>
</sec>
<sec id="s6">
<title>Funding</title>
<p>This work was supported by the Cooperative Research for Agriculture Science and Technology Development (PJ01343202) of the Rural Development Administration, Republic of Korea.</p>
</sec>
<sec sec-type="COI-statement" id="s7">
<title>Conflict of Interest</title>
<p>YS, MJ and SS were employed by Insilicogen Inc.</p>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s8">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s9">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2021.733188/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2021.733188/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Presentation1.PPTX" id="SM1" mimetype="application/PPTX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table1.XLSX" id="SM2" mimetype="application/XLSX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alonge</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Soyk</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ramakrishnan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Goodwin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sedlazeck</surname>
<given-names>F. J.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>RaGOO: Fast and Accurate Reference-Guided Scaffolding of Draft Genomes</article-title>. <source>Genome Biol.</source> <volume>20</volume>, <fpage>224</fpage>. <pub-id pub-id-type="doi">10.1186/s13059-019-1829-6</pub-id> </citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bao</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Kojima</surname>
<given-names>K. K.</given-names>
</name>
<name>
<surname>Kohany</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Repbase Update, a Database of Repetitive Elements in Eukaryotic Genomes</article-title>. <source>Mobile DNA</source> <volume>6</volume>, <fpage>11</fpage>. <pub-id pub-id-type="doi">10.1186/s13100-015-0041-9</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bolger</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Lohse</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Usadel</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Trimmomatic: a Flexible Trimmer for Illumina Sequence Data</article-title>. <source>Bioinformatics</source> <volume>30</volume>, <fpage>2114</fpage>&#x2013;<lpage>2120</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btu170</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bryant</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>Johnson</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>DiTommaso</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Tickle</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Couger</surname>
<given-names>M. B.</given-names>
</name>
<name>
<surname>Payzin-Dogru</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>A Tissue-Mapped Axolotl De Novo Transcriptome Enables Identification of Limb Regeneration Factors</article-title>. <source>Cel Rep.</source> <volume>18</volume>, <fpage>762</fpage>&#x2013;<lpage>776</lpage>. <pub-id pub-id-type="doi">10.1016/j.celrep.2016.12.063</pub-id> </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chin</surname>
<given-names>C.-S.</given-names>
</name>
<name>
<surname>Peluso</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Sedlazeck</surname>
<given-names>F. J.</given-names>
</name>
<name>
<surname>Nattestad</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Concepcion</surname>
<given-names>G. T.</given-names>
</name>
<name>
<surname>Clum</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Phased Diploid Genome Assembly with Single-Molecule Real-Time Sequencing</article-title>. <source>Nat. Methods</source> <volume>13</volume>, <fpage>1050</fpage>&#x2013;<lpage>1054</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.4035</pub-id> </citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Han</surname>
<given-names>M. V.</given-names>
</name>
<name>
<surname>Thomas</surname>
<given-names>G. W. C.</given-names>
</name>
<name>
<surname>Lugo-Martinez</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hahn</surname>
<given-names>M. W.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Estimating Gene Gain and Loss Rates in the Presence of Error in Genome Assembly and Annotation Using CAFE 3</article-title>. <source>Mol. Biol. Evol.</source> <volume>30</volume>, <fpage>1987</fpage>&#x2013;<lpage>1997</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/mst100</pub-id> </citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>The Genome of the Cucumber, Cucumis Sativus L</article-title>. <source>Nat. Genet.</source> <volume>41</volume>, <fpage>1275</fpage>&#x2013;<lpage>1281</lpage>. <pub-id pub-id-type="doi">10.1038/ng.475</pub-id> </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>S. H.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>Y. S.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>H. J.</given-names>
</name>
<name>
<surname>Kang</surname>
<given-names>S. A.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Korean Diet: Characteristics and Historical Background</article-title>. <source>J.&#x20;Ethnic Foods</source> <volume>3</volume>, <fpage>26</fpage>&#x2013;<lpage>31</lpage>. <pub-id pub-id-type="doi">10.1016/j.jef.2016.03.002</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Langmead</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Salzberg</surname>
<given-names>S. L.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Fast Gapped-Read Alignment with Bowtie 2</article-title>. <source>Nat. Methods</source> <volume>9</volume>, <fpage>357</fpage>&#x2013;<lpage>359</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.1923</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Stoeckert</surname>
<given-names>C. J.</given-names>
<suffix>Jr.</suffix>
</name>
<name>
<surname>Roos</surname>
<given-names>D. S.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>OrthoMCL: Identification of Ortholog Groups for Eukaryotic Genomes</article-title>. <source>Genome Res.</source> <volume>13</volume>, <fpage>2178</fpage>&#x2013;<lpage>2189</lpage>. <pub-id pub-id-type="doi">10.1101/gr.1224503</pub-id> </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>A Chromosome-Scale Genome Assembly of Cucumber (Cucumis Sativus L.)</article-title>. <source>GigaScience</source> <volume>8</volume>. <pub-id pub-id-type="doi">10.1093/gigascience/giz072</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mar&#xe7;ais</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kingsford</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>A Fast, Lock-free Approach for Efficient Parallel Counting of Occurrences of K-Mers</article-title>. <source>Bioinformatics</source> <volume>27</volume>, <fpage>764</fpage>&#x2013;<lpage>770</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btr011</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Osipowski</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Pawe&#x142;kowicz</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wojcieszek</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Skarzy&#x144;ska</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Przybecki</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Pl&#x105;der</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A High-Quality Cucumber Genome Assembly Enhances Computational Comparative Genomics</article-title>. <source>Mol. Genet. Genomics</source> <volume>295</volume>, <fpage>177</fpage>&#x2013;<lpage>193</lpage>. <pub-id pub-id-type="doi">10.1007/s00438-019-01614-3</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Park</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Jung</surname>
<given-names>J.&#x20;K.</given-names>
</name>
<name>
<surname>Shim</surname>
<given-names>E. J.</given-names>
</name>
<name>
<surname>Kang</surname>
<given-names>M. Y.</given-names>
</name>
<name>
<surname>Sim</surname>
<given-names>S. C.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Genetic Diversity Assessment and Cultivar Identification of Cucumber (Cucumis Sativus L.) Using the Fluidigm Single Nucleotide Polymorphism Assay</article-title>. <source>Plants (Basel)</source> <volume>10</volume>. <pub-id pub-id-type="doi">10.3390/plants10020395</pub-id> </citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sebastian</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Schaefer</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Telford</surname>
<given-names>I. R. H.</given-names>
</name>
<name>
<surname>Renner</surname>
<given-names>S. S.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Cucumber (<italic>Cucumis Sativus</italic>) and Melon (<italic>C. Melo</italic>) Have Numerous Wild Relatives in Asia and Australia, and the Sister Species of Melon Is from Australia</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>107</volume>, <fpage>14269</fpage>&#x2013;<lpage>14273</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1005338107</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Seppey</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Manni</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zdobnov</surname>
<given-names>E. M.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>BUSCO: Assessing Genome Assembly and Annotation Completeness</article-title>,&#x201d; in <source>Gene Prediction: Methods and Protocols</source>. Editor <person-group person-group-type="editor">
<name>
<surname>Kollmar</surname>
<given-names>M.</given-names>
</name>
</person-group> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>Springer New York</publisher-name>), <fpage>227</fpage>&#x2013;<lpage>245</lpage>. <pub-id pub-id-type="doi">10.1007/978-1-4939-9173-0_14</pub-id> </citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Slater</surname>
<given-names>G. S. C.</given-names>
</name>
<name>
<surname>Birney</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Automated Generation of Heuristics for Biological Sequence Comparison</article-title>. <source>BMC Bioinformatics</source> <volume>6</volume>, <fpage>31</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-6-31</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stanke</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sch&#xf6;ffmann</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Morgenstern</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Waack</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Gene Prediction in Eukaryotes with a Generalized Hidden Markov Model that Uses Hints from External Sources</article-title>. <source>BMC Bioinformatics</source> <volume>7</volume>, <fpage>62</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-7-62</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Suchard</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Lemey</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Baele</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Ayres</surname>
<given-names>D. L.</given-names>
</name>
<name>
<surname>Drummond</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Rambaut</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Bayesian Phylogenetic and Phylodynamic Data Integration Using BEAST 1.10</article-title>. <source>Virus. Evol.</source> <volume>4</volume>. <pub-id pub-id-type="doi">10.1093/ve/vey016</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Jordan</surname>
<given-names>D. R.</given-names>
</name>
<name>
<surname>Mace</surname>
<given-names>E. S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Crop Genomics Goes beyond a Single Reference Genome</article-title>. <source>Trends Plant Sci.</source> <volume>24</volume>, <fpage>1072</fpage>&#x2013;<lpage>1074</lpage>. <pub-id pub-id-type="doi">10.1016/j.tplants.2019.10.001</pub-id> </citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Trapnell</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Roberts</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Goff</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Pertea</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Kelley</surname>
<given-names>D. R.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Differential Gene and Transcript Expression Analysis of RNA-Seq Experiments with TopHat and Cufflinks</article-title>. <source>Nat. Protoc.</source> <volume>7</volume>, <fpage>562</fpage>. <pub-id pub-id-type="doi">10.1038/nprot.2012.016</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Koo</surname>
<given-names>D. H.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Luan</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Havey</surname>
<given-names>M. J.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Chromosome Rearrangements during Domestication of Cucumber as Revealed by High-Density Genetic Mapping and Draft Genome Assembly</article-title>. <source>Plant J.</source> <volume>71</volume>, <fpage>895</fpage>&#x2013;<lpage>906</lpage>. <pub-id pub-id-type="doi">10.1111/j.1365-313x.2012.05017.x</pub-id> </citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Gui</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Genome Assembly of a Tropical maize Inbred Line Provides Insights into Structural Variation and Crop Improvement</article-title>. <source>Nat. Genet.</source> <volume>51</volume>, <fpage>1052</fpage>&#x2013;<lpage>1059</lpage>. <pub-id pub-id-type="doi">10.1038/s41588-019-0427-6</pub-id> </citation>
</ref>
</ref-list>
</back>
</article>