<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2022.984422</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Whole-genome resequencing reveals the origin of tea in Lincang</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Lei</surname>
<given-names>Yahui</given-names>
</name>
<xref rid="aff1" ref-type="aff"><sup>1</sup></xref>
<xref rid="fn0001" ref-type="author-notes"><sup>&#x2020;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1879714/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yang</surname>
<given-names>Ling</given-names>
</name>
<xref rid="aff1" ref-type="aff"><sup>1</sup></xref>
<xref rid="fn0001" ref-type="author-notes"><sup>&#x2020;</sup></xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Duan</surname>
<given-names>Shengchang</given-names>
</name>
<xref rid="aff2" ref-type="aff"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ning</surname>
<given-names>Siqi</given-names>
</name>
<xref rid="aff3" ref-type="aff"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Dawei</given-names>
</name>
<xref rid="aff4" ref-type="aff"><sup>4</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1911743/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Zijun</given-names>
</name>
<xref rid="aff4" ref-type="aff"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xiang</surname>
<given-names>Guisheng</given-names>
</name>
<xref rid="aff4" ref-type="aff"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yang</surname>
<given-names>Ling</given-names>
</name>
<xref rid="aff4" ref-type="aff"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Chunping</given-names>
</name>
<xref rid="aff4" ref-type="aff"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Shiyu</given-names>
</name>
<xref rid="aff4" ref-type="aff"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Shuangyan</given-names>
</name>
<xref rid="aff4" ref-type="aff"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ye</surname>
<given-names>Shuang</given-names>
</name>
<xref rid="aff4" ref-type="aff"><sup>4</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1697586/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Kui</surname>
<given-names>Ling</given-names>
</name>
<xref rid="aff5" ref-type="aff"><sup>5</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/371058/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Singh</surname>
<given-names>Pratiksha</given-names>
</name>
<xref rid="aff6" ref-type="aff"><sup>6</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1072522/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sheng</surname>
<given-names>Jun</given-names>
</name>
<xref rid="aff7" ref-type="aff"><sup>7</sup></xref>
<xref rid="aff8" ref-type="aff"><sup>8</sup></xref>
<xref rid="c002" ref-type="corresp"><sup>&#x002A;</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Dong</surname>
<given-names>Yang</given-names>
</name>
<xref rid="aff7" ref-type="aff"><sup>7</sup></xref>
<xref rid="aff8" ref-type="aff"><sup>8</sup></xref>
<xref rid="c001" ref-type="corresp"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/694956/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>College of Food Science and Technology, Yunnan Agricultural University</institution>, <addr-line>Kunming</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>Nowbio Biotechnology Company</institution>, <addr-line>Kunming</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>Experimental Middle School of Yunnan Normal University</institution>, <addr-line>Kunming</addr-line>, <country>China</country></aff>
<aff id="aff4"><sup>4</sup><institution>Yunnan Agricultural University Applied Genomics Technology Laboratory, School of Biological Big Data, Yunnan Agricultural University</institution>, <addr-line>Kunming</addr-line>, <country>China</country></aff>
<aff id="aff5"><sup>5</sup><institution>Shenzhen Qianhai Shekou Free Trade Zone Hospital</institution>, <addr-line>Shenzhen</addr-line>, <country>China</country></aff>
<aff id="aff6"><sup>6</sup><institution>State Key Laboratory of Non-Food Biomass and Enzyme Technology, Guangxi Academy of Sciences</institution>, <addr-line>Nanning, Guangxi</addr-line>, <country>China</country></aff>
<aff id="aff7"><sup>7</sup><institution>State Key Laboratory for Conservation and Utilization of Bio-Resources in Yunnan, Yunnan Agricultural University</institution>, <addr-line>Kunming</addr-line>, <country>China</country></aff>
<aff id="aff8"><sup>8</sup><institution>Yunnan Research Institute for Local Plateau Agriculture and Industry</institution>, <addr-line>Kunming</addr-line>, <country>China</country></aff>
<author-notes>
<fn id="fn0002" fn-type="edited-by"><p>Edited by: Jihong Hu, Northwest A&#x0026;F University, China</p></fn>
<fn id="fn0003" fn-type="edited-by"><p>Reviewed by: Peng Cui, Agricultural Genomics Institute at Shenzhen (CAAS), China; Liangsheng Zhang, Zhejiang University, China</p></fn>
<corresp id="c001">&#x002A;Correspondence: Yang Dong, <email>loyalyang@163.com</email></corresp>
<corresp id="c002">Jun Sheng, <email>shengjun@dongyang-lab.org</email></corresp>
<fn id="fn0001" fn-type="equal"><p><sup>&#x2020;</sup>These authors have contributed equally to this work</p></fn>
<fn id="fn0004" fn-type="other"><p>This article was submitted to Plant Bioinformatics a section of the journal Frontiers in Plant Science</p></fn>
</author-notes>
<pub-date pub-type="epub">
<day>15</day>
<month>09</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>13</volume>
<elocation-id>984422</elocation-id>
<history>
<date date-type="received">
<day>02</day>
<month>07</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>22</day>
<month>08</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2022 Lei, Yang, Duan, Ning, Li, Wang, Xiang, Yang, Wang, Zhang, Zhang, Ye, Kui, Singh, Sheng and Dong.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Lei, Yang, Duan, Ning, Li, Wang, Xiang, Yang, Wang, Zhang, Zhang, Ye, Kui, Singh, Sheng and Dong</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Phylogeographic, population genetics and diversity analysis are crucial for local tea resource conservation and breeding programs. Lincang in Yunnan has been known as the possible place of domestication for tea worldwide, yet, its genetic makeup and unique Lincang origin are little understood. Here, we reported a large-scale whole-genome resequencing based population genomic analysis in eight main tea-producing areas of Lincang in Yunnan (1,350 accessions), and the first comprehensive map of tea genome variation in Lincang was constructed. Based on the population structure, tea sample in Lincang was divided into three subgroups, and inferred Xigui and Nahan Tea Mountain in Linxiang, Baiying Mountain Ancient Tea Garden in Yun, and Jinxiu Village of Xiaowan Town in Fengqing, which belong to the birthplace of the three subgroups, were all likely to be the origin center of Lincang tea. Meanwhile, the history population sizes analysis show that similar evolutionary patterns were observed for the three subgroups of Lincang. It also was observed that the hybrid among eight areas of Lincang was noticeable, resulting in insignificant genetic differentiation between geographical populations and low genetic diversity. The findings of this study clarified the genetic make-up and evolutionary traits of the local population of tea, which gave some insight into the development of Lincang tea.</p>
</abstract>
<kwd-group>
<kwd><italic>Camellia sinensis</italic></kwd>
<kwd>SNPs</kwd>
<kwd>whole genome re-sequencing</kwd>
<kwd>origin</kwd>
<kwd>population structure</kwd>
</kwd-group>
<contract-num rid="cn3">202002AA100007</contract-num>
<contract-num rid="cn3">2019ZG00908</contract-num>
<contract-num rid="cn4">2019008</contract-num>
<contract-sponsor id="cn1">Digitalization of Biological Resource Project</contract-sponsor>
<contract-sponsor id="cn2">Yunnan Eco-friendly Food International Cooperation Research Center</contract-sponsor>
<contract-sponsor id="cn3">Yunnan Provincial Science and Technology Department<named-content content-type="fundref-id">10.13039/501100008871</named-content>
</contract-sponsor>
<contract-sponsor id="cn4">Development and Application of Biological Resource Digitalization</contract-sponsor>
<counts>
<fig-count count="6"/>
<table-count count="4"/>
<equation-count count="0"/>
<ref-count count="48"/>
<page-count count="14"/>
<word-count count="8190"/>
</counts>
</article-meta>
</front>
<body>
<sec id="sec1" sec-type="intro">
<title>Introduction</title>
<p>The tea plant (2<italic>n</italic>&#x2009;=&#x2009;30; <italic>Camellia sinensis</italic>), a member of the genus <italic>Camellia</italic> (<italic>Theaceae</italic>), is one of the world&#x2019;s important economic crops (<xref ref-type="bibr" rid="ref26">Mondal et al., 2004</xref>). It is believed that the origin of the tea plant could trace back to southwestern China, including Yunnan Province and the adjacent areas, from which the tea spread around the world mainly through the seas and lands of the Silk Road (<xref ref-type="bibr" rid="ref18">Kingdom-Ward, 1950</xref>; <xref ref-type="bibr" rid="ref13">Hasimoto, 2001</xref>; <xref ref-type="bibr" rid="ref4">Chen et al., 2005</xref>; <xref ref-type="bibr" rid="ref37">Willson and Clifford, 2012</xref>). In the southwest China, tea plants are mainly distributed in the middle and lower reaches of the Lancang River basin, which is the upper reaches of the Mekong River. The diversity of ecological characteristics has given birth to the rich resources of ancient tea. Particularly, Lincang belongs to the second-largest tea-producing region in the Yunnan province, with the world&#x2019;s oldest tea tree growing for 3,200&#x2009;years, of which the Bingdao Tea Mountain, Daxue Mountain, Baiying Mountain, Nahan, and Xigui Tea Mountain are abundant for ancient tea tree resources (<xref ref-type="bibr" rid="ref27">Ochanda et al., 2015</xref>).</p>
<p>In the study of the genetic evolution of tea trees, the &#x201C;two-origin theory&#x201D; of tea tree is the most common (<xref ref-type="bibr" rid="ref43">Zhang et al., 2018</xref>), researchers infer that the tea originated from the two major classifications of tea, <italic>C. sinensis</italic> var. <italic>assamica</italic> (CSA) in temperate regions and <italic>C. sinensis</italic> var. <italic>sinensis</italic> (CSS) in tropical and subtropical regions. The large-leaf tea tree (CSA) originated in southwestern China or the Assam region of India, whereas the small-leaf tea originated in the south-eastern region of China (<xref ref-type="bibr" rid="ref43">Zhang et al., 2018</xref>). For CSA, it is noted to have appeared in subtropical areas, such as eastern Yunnan, during the Miocene period of the Tertiary. The spread to the northeast in the direction of low relief began at the time of the collision of the Indian and Asian plates, after which the present crescent-shaped distribution of wild tea in Sichuan and Chongqing was formed (<xref ref-type="bibr" rid="ref17">Kan, 2013</xref>). However, as the main domestication center of the tea plant in Lincang of Yunnan-Guizhou Plateau, where the world&#x2019;s oldest tea trees are located, a great deal of uncertainty remains about the genetic structure of tea among the different tea-producing regions. Therefore, the study of genetic variation among different tea-producing regions in Lincang is essential for tea plant diversity.</p>
<p>To get a comprehension of the genetic structure and distribution of tea in Lincang, the single nucleotide polymorphic (SNP) markers were applied, which was identified through whole-genome resequencing (WGR) at the population level. Currently, WGR has been widely used in rice (<xref ref-type="bibr" rid="ref15">Huang et al., 2012</xref>), maize (<xref ref-type="bibr" rid="ref16">Hufford et al., 2012</xref>), grapes (<xref ref-type="bibr" rid="ref22">Liang et al., 2019</xref>), and apples (<xref ref-type="bibr" rid="ref10">Duan et al., 2017</xref>), and other important crops and economic crops. Based on WGR, <xref ref-type="bibr" rid="ref30">Ren et al. (2021)</xref> studied 110 cannabis (<italic>Cannabis sativa</italic>) germplasm resources from around the world and identified candidate genes associated with differentiation traits during the domestication of hemp-type and drug-type cultivars of cannabis, revealing the domestication origin and evolutionary history of cannabis. In addition, <xref ref-type="bibr" rid="ref45">Zhao et al. (2021)</xref> analyzed 427 Moso bamboo from 15 representative geographical areas, and constructed a genomic variation map of Moso bamboo for population evolutionary analysis, revealing the population diversity of this asexually reproducing species, etc. The previous study made it possible to identify that resequencing analysis can be used as a method to explore the origin and population structure of the tea plant in Lincang. Meanwhile, the genomes of two major variants of tea, <italic>viz.</italic>, big-leaf tea [Yun Kang No.10 (<xref ref-type="bibr" rid="ref39">Xia et al., 2017</xref>)] and small-leaf tea [Shuchazao (<xref ref-type="bibr" rid="ref35">Wei et al., 2018</xref>; <xref ref-type="bibr" rid="ref38">Xia et al., 2020</xref>); Biyun (<xref ref-type="bibr" rid="ref42">Zhang et al., 2020a</xref>); Longjing 43 (<xref ref-type="bibr" rid="ref33">Wang et al., 2020</xref>); and wild tea (DASZ (<xref ref-type="bibr" rid="ref44">Zhang et al., 2020b</xref>)], have been sequenced till now, providing a solid foundation for the large-scale application of WGR.</p>
<p>Herein, we collected and sequenced samples of 1,235 tea accessions from Lincang in the Yunnan province and 115 tea accessions from other regions in China. With the identified single-nucleotide polymorphisms (SNPs), we divided the tea samples from eight tea regions in Lincang into three subgroups based on the population structure, and inferred three possible origins in Lincang. In addition, It also was observed that the hybrid among eight areas of Lincang was noticeable, resulting in insignificant genetic differentiation between geographical populations and low genetic diversity. A large number of variations identified not only provide deeper insights into the genetic evolution and structural characteristics of the local tea populations but also lay a foundation for conservation and breeding programs of tea resources in Lincang, Yunnan province, China.</p>
</sec>
<sec id="sec2" sec-type="materials|methods">
<title>Materials and methods</title>
<sec id="sec3">
<title>Sample collection</title>
<p>A total of 1,350 tea accessions were collected at diverse sites from Lincang, Yunnan, and other regions of China during the period from 2019 to 2020. Among them, 31 samples were collected in Cangyuan, 115 in Fengqing, 78 in Gengma, 235 in Linxiang, 291 in Shuangjiang, 187 in Yongde, 217 in Yun, 81 in Zhenkang, and 115 in Others. The Others group was used as a control for the samples of the eight geographic populations, so the sampling range of the Others group was widely dispersed (<xref rid="fig1" ref-type="fig">Figure 1</xref>: black dot). Further, the accession KM6 (<italic>Camellia Cuspidata</italic>) was collected to use as an outgroup during phylogenetic analysis. The details about the sampled populations are presented in <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 1</xref> and the geographical distribution of these points is depicted in <xref rid="fig1" ref-type="fig">Figure 1</xref>.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>The geographical distribution of tea accessions was assessed in the present study.</p>
</caption>
<graphic xlink:href="fpls-13-984422-g001.tif"/>
</fig>
</sec>
<sec id="sec4">
<title>DNA isolation, sequencing and processing of raw read</title>
<p>The total DNA was extracted using the DNA secure plant kit (TIANGEN, Beijing), following the manufacturer&#x2019;s protocol. Around 2&#x2009;&#x03BC;g of the extracted DNA was used to construct the sequencing library for each accession using the NEBNext Ultra DNA Library Prep Kit (NEB Inc., America), following the manufacturer&#x2019;s instructions. Paired-end sequencing libraries with an insert size of approximately 400&#x2009;bp were sequenced on the Illumina NovaSeq 6000 platform. We used fastp v0.12.2 (<xref ref-type="bibr" rid="ref5">Chen et al., 2018</xref>) for the removal of adaptor contamination, poly-N, and low-quality reads (reads having &#x003E;40% bases and Phred score &#x2264;20). Further, the paired-end reads with sequence lengths below 70&#x2009;bp were filtered out. Thus, only high-quality cleaned reads were retained for downstream analysis.</p>
</sec>
<sec id="sec5">
<title>Variant calling and annotation</title>
<p>Paired-end reads were mapped to the reference genome of <italic>Camellia sinensis</italic> (Shuchazao; <xref ref-type="bibr" rid="ref35">Wei et al., 2018</xref>) through BWA v0.7.17-r 1,188 (<xref ref-type="bibr" rid="ref20">Li and Durbin, 2009</xref>) using default parameters. Conversion of SAM to BAM and exclusion of unmapped and multi-mapped reads were performed through SAMtools v1.3.1 (<xref ref-type="bibr" rid="ref21">Li et al., 2009</xref>). Further, the duplicated reads were filtered out using Picard v2.1.1.<xref rid="fn0005" ref-type="fn"><sup>1</sup></xref></p>
<p>After BWA alignment, the reads around indels were realigned. Realignment was performed with GATK 3.3-0-g37228af<xref rid="fn0006" ref-type="fn"><sup>2</sup></xref> (<xref ref-type="bibr" rid="ref25">McKenna et al., 2010</xref>) in two steps. First, we used the RealignerTargetCreator package to identify regions where realignment was needed, followed by a realignment of reads to these regions using IndelRealigner, and created a realigned BAM file for each accession. Then we detected the variation of each sample and obtained the original variation set file (gVCF format) through GATK Haplotype Caller, and gVCF files were further integrated to obtain population variation data.</p>
<p>The SNP filter expression parameters were set as: QD&#x003C;2.0 || MQ&#x003C;40.0 || FS&#x003E;60.0 || SOR&#x003E;5.0 || MQRankSum &#x003C; &#x2212;12.5 || ReadPosRankSum &#x003C; &#x2212;8.0 || QUAL &#x003C;30. The InDel filter expression parameters were set as: QD&#x003C;2.0 || ReadPosRankSum &#x003C; &#x2212;20.0 || InbreedingCoeff &#x003C; &#x2212;0.8 || FS&#x003E;200.0 || SOR&#x003E;10.0 || QUAL&#x003C;30 (<xref ref-type="bibr" rid="ref8">DePristo et al., 2011</xref>). Only insertions and deletions shorter than or equal to 40&#x2009;bp were considered. Indels and SNPs with none bi-allelic, &#x003E;50% missing calls and MAF&#x2009;&#x003C;&#x2009;0.005 were removed, which yielded the basic set. SNPs with MAF&#x2009;&#x003C;&#x2009;0.05, none bi-allelic, &#x003E;50% missing calls were further removed for phylogenetic tree structure, genetic diversity analysis, LD decay, PCA and population structure analyses (the core set). The annotation of SNPs and InDels was performed through ANNOVAR v2015-12-14 (<xref ref-type="bibr" rid="ref34">Wang et al., 2010</xref>) using tea genome as a reference.</p>
</sec>
<sec id="sec6">
<title>Phylogenetic analysis</title>
<p>The populations were clustered to assess the pattern of variation among the sampled populations. We used the whole-genome SNPs to construct the maximum likelihood (ML) phylogenetic tree with 100 bootstraps using SNPhylo v20140701 (<xref ref-type="bibr" rid="ref19">Lee et al., 2014</xref>). <italic>Camellia cuspidate</italic> was used as an outgroup. Color coding of the phylogenetic tree was done through the iTOL web server.<xref rid="fn0007" ref-type="fn"><sup>3</sup></xref></p>
</sec>
<sec id="sec7">
<title>LD, population structure, and PCA</title>
<p>The SNPs in LD were filtered out using PLINK v1.90b3.38 (<xref ref-type="bibr" rid="ref29">Purcell et al., 2007</xref>) with a window of size 50 SNPs (advancing 5 SNPs at a time) and an <italic>r</italic><sup>2</sup> threshold of 0.5 to determine a pruned SNP set to be used in the population structure analysis. LD-based pruning reduces the effects of ascertainment bias in a relatively efficient manner (<xref ref-type="bibr" rid="ref23">Malomane et al., 2018</xref>). Principal component analysis (PCA) was performed with the Genome-wide Complex Trait Analysis (GCTA) v1.25.3 (<xref ref-type="bibr" rid="ref40">Yang et al., 2011</xref>), and the first three eigenvectors were plotted. LD was calculated using PopLDdecay v3.41 (<xref ref-type="bibr" rid="ref41">Zhang et al., 2019</xref>). The pairwise <italic>r</italic><sup>2</sup> values within and between different chromosomes were calculated. The LD for each group was calculated using SNP pairs only from the corresponding group.</p>
<p>The population structure was analyzed using the ADMIXTURE v1.3 (<xref ref-type="bibr" rid="ref2">Alexander et al., 2009</xref>) program with a block-relaxation algorithm. To explore the convergence of individuals, we predefined the number of genetic clusters K, from 2 to 9 and ran the cross-validation (CV) error procedure. Default methods and settings were used in the analyses.</p>
</sec>
<sec id="sec8">
<title>Genetic diversity analysis and population differentiation</title>
<p>The primary genetic diversity parameters like observed heterozygosity (<italic>H<sub>O</sub></italic>), expected homozygosity (<italic>H<sub>E</sub></italic>), inbreeding coefficient (<italic>F</italic>), the average pairwise diversity within a population (<italic>&#x03B8;</italic>&#x03C0;), and Tajima&#x2019;s <italic>D</italic> were calculated using the vcftools v0.1.13 (<xref ref-type="bibr" rid="ref6">Danecek et al., 2011</xref>) with 100&#x2009;kb sliding windows. In addition, overall genetic differentiation across populations measured by Weir and Cockerham&#x2019;s estimator of <italic>F<sub>ST</sub></italic> (<xref ref-type="bibr" rid="ref36">Weir and Hill, 2002</xref>) was also calculated using the same software.</p>
</sec>
<sec id="sec9">
<title>Isolation by distance and environment</title>
<p>To assess the potential correlation of environmental and geographic variation with the tea genetic structure, the use of variables was necessary to adequately capture the general environmental and geographical differences among the Lincang regions sampled. For this purpose, we calculated the pairwise genetic distance matrix using the PLINK (v1.90b3.38). Meanwhile, we downloaded 4 environmental variables and two geographic variables available at each ecotype site (resolution 30 arc seconds) through WorldClim<xref rid="fn0008" ref-type="fn"><sup>4</sup></xref> (<xref ref-type="bibr" rid="ref14">Hijmans et al., 2005</xref>).</p>
<p>Among the four environmental variables, we selected two temperature and two precipitation variables. The temperature variables (in &#x00B0;C&#x002A;10, expressed to the nearest tenths) included the mean diurnal range (MDR) and the mean temperature of the wettest quarter (MTW). The precipitation variables (expressed to the nearest mm) included annual precipitation (AP) and precipitation of the wettest month (PWM; <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 10</xref>). In addition, we derived 10 additional variables from the squares (4) and cross-products (6) of the original four environment variables to explore whether non-linear environmental effects may also affect the genetic structure.</p>
<p>The geographic variables included LONG and LAT. These variables tended to show lower correlations with each other. We intended to reduce the redundancy inherent among highly associated variables (<xref ref-type="bibr" rid="ref12">Hall and Beissinger, 2014</xref>). For the geographical distance matrix, we calculated the great-circle distance (the closest distance between two points on the Earth&#x2019;s surface) in miles from untransformed LONG and LAT values for each pair of accessions using the &#x201C;geosphere&#x201D; package in R.<xref rid="fn0009" ref-type="fn"><sup>5</sup></xref> For the environment distance matrix, we calculated an environmental distance matrix from Euclidean distances between pairs of accessions using all 14 environmental variables.</p>
<p>To sort out the potential effects of environmental variables and geographical isolation on genetic variation in tea tree samples, the Mantel test (<xref ref-type="bibr" rid="ref9">Diniz-Filho et al., 2013</xref>) of correlation among genetic, environmental and geographical distance matrices was applied by using the VEGAN<xref rid="fn0010" ref-type="fn"><sup>6</sup></xref> package in R, in which significance testing of the correlations was performed with 10,000 permutations. Then, we explored whether there was a significant association between environmental distance and genetic distance by using a partial Mantel test, adjusted for any effects of geographical distances. Significance in this test was interpreted as meaning that genetic variation among the tea was influenced by environmental selection, whereas a nonsignificant result suggested a role for isolation by distance (genetic drift).</p>
</sec>
<sec id="sec10">
<title>Differentiation and historical relationships between populations</title>
<p>The historical relationship between Lincang tea geographical populations was estimated using TreeMix (<xref ref-type="bibr" rid="ref28">Pickrell and Pritchard, 2012</xref>), which uses a Maximum Likelihood (ML) method based on a Gaussian model of allele frequency change. The topology of the ML trees changes depending on the number of migration events (<italic>m</italic>) allowed in the model. Here we use <italic>m</italic>&#x2009;=&#x2009;1 to <italic>m</italic>&#x2009;=&#x2009;5 (<xref ref-type="bibr" rid="ref11">Fitak, 2021</xref>). The bootstrap values on the tree are based on 1,000 replicates. Arrows on the graph represent admixture events between different tea populations. The Other tea population was used for roots.</p>
</sec>
<sec id="sec11">
<title>Demographic history reconstruction</title>
<p>To uncover the evolution history of lincang tea subpopulations, we use MSMC2<xref rid="fn0011" ref-type="fn"><sup>7</sup></xref> to infer population size of each group. The input files for MSMC2 were generated according to MSMC Tools.<xref rid="fn0012" ref-type="fn"><sup>8</sup></xref> In brief, only sites with uniquely mapped reads and sites with coverage depths between 0.5-fold and 2-fold of mean depth were used in the analyses. The remaining genomic regions were masked using the script bamCaller.py. Then all segregating sites within each group were phased using SHAPEIT (Version: v2.r904; <xref ref-type="bibr" rid="ref7">Delaneau et al., 2011</xref>). A mutation rate of 6.1&#x2009;&#x00D7;&#x2009;10<sup>&#x2013;9</sup> per site per year was used.</p>
</sec>
<sec id="sec12">
<title>Genome scanning for selective sweep signals</title>
<p>RAiSD (Raised Accuracy in Sweep Detection, Version 2.9; <xref ref-type="bibr" rid="ref1">Alachiotis and Pavlidis, 2018</xref>) was used to detect signatures of selective sweeps based on the &#x03BC; statistics. The significant threshold for &#x03BC; statistic score was set as top 0.1%. Then, We performed a genetic differentiation (<italic>F<sub>ST</sub></italic>) and nucleotide polymorphism (<italic>&#x03B8;</italic>&#x03C0;) based cross approach to investigate the selection signals across the whole genome. A 50&#x2009;kb sliding window with 10&#x2009;kb step approach was applied to quantify <italic>F<sub>ST</sub></italic> and <italic>&#x03B8;</italic>&#x03C0; by using VCFtools software (v0.1.13). The annotated genes living in these regions were considered candidate selected genes.</p>
</sec>
</sec>
<sec id="sec13" sec-type="results">
<title>Results</title>
<sec id="sec14">
<title>Whole-genome resequencing and variant calling</title>
<p>Resequencing of 1,350 tea samples yielded around 9.67&#x2009;Tb data (64,462,516,344 paired-end raw reads). Mapping of these reads with the reference genome resulted in an average alignment rate of 98.9&#x2009;&#x00B1;&#x2009;2.65% (63.79&#x2013;99.54%, <xref ref-type="supplementary-material" rid="SM2">Supplementary Figure 1</xref> and <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 1</xref>). The information about 9 populations with reads is presented in <xref rid="tab1" ref-type="table">Table 1</xref>. The average alignment rate of the population ranged from 96.75 to 98.75%; the average number of reads per sample of the population ranged from 45,974,894 to 56,685,556;</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Sequencing and genetic variation information of tea from different geographic regions.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Population name</th>
<th align="center" valign="top">Number of samples</th>
<th align="center" valign="top">Total number of reads</th>
<th align="center" valign="top">Average number of reads per sample</th>
<th align="center" valign="top">Average alignment rate</th>
<th align="center" valign="top">Average number of SNP per sample</th>
<th align="center" valign="top">SNP number before basic filtering criteria</th>
<th align="center" valign="top">Average number of indel per sample</th>
<th align="center" valign="top">Indel number before basic filtering criteria</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Cangyuan</td>
<td align="center" valign="top">31</td>
<td align="center" valign="top">1,757,252,234</td>
<td align="center" valign="top">56,685,555.94</td>
<td align="center" valign="top">98.46%</td>
<td align="center" valign="top">5,363,031.48</td>
<td align="center" valign="top">166,253,976</td>
<td align="center" valign="top">196,162.10</td>
<td align="center" valign="top">6,081,025</td>
</tr>
<tr>
<td align="left" valign="top">Zhenkang</td>
<td align="center" valign="top">81</td>
<td align="center" valign="top">3,723,966,416</td>
<td align="center" valign="top">45,974,894.02</td>
<td align="center" valign="top">98.67%</td>
<td align="center" valign="top">4,470,043.80</td>
<td align="center" valign="top">362,073,548</td>
<td align="center" valign="top">153,944.21</td>
<td align="center" valign="top">12,469,481</td>
</tr>
<tr>
<td align="left" valign="top">Linxiang</td>
<td align="center" valign="top">235</td>
<td align="center" valign="top">11,292,071,182</td>
<td align="center" valign="top">48,051,366.73</td>
<td align="center" valign="top">98.38%</td>
<td align="center" valign="top">4,460,338.49</td>
<td align="center" valign="top">1,048,179,546</td>
<td align="center" valign="top">157,390.56</td>
<td align="center" valign="top">36,986,781</td>
</tr>
<tr>
<td align="left" valign="top">Yun</td>
<td align="center" valign="top">217</td>
<td align="center" valign="top">10,644,281,662</td>
<td align="center" valign="top">49,051,989.23</td>
<td align="center" valign="top">98.19%</td>
<td align="center" valign="top">4,424,562.90</td>
<td align="center" valign="top">960,130,150</td>
<td align="center" valign="top">150,987.73</td>
<td align="center" valign="top">32,764,338</td>
</tr>
<tr>
<td align="left" valign="top">Gengma</td>
<td align="center" valign="top">78</td>
<td align="center" valign="top">4,006,235,570</td>
<td align="center" valign="top">51,361,994.49</td>
<td align="center" valign="top">97.77%</td>
<td align="center" valign="top">4,972,003.38</td>
<td align="center" valign="top">387,816,264</td>
<td align="center" valign="top">182,226.69</td>
<td align="center" valign="top">14,213,682</td>
</tr>
<tr>
<td align="left" valign="top">Fengqing</td>
<td align="center" valign="top">115</td>
<td align="center" valign="top">5,566,575,542</td>
<td align="center" valign="top">48,405,004.71</td>
<td align="center" valign="top">98.21%</td>
<td align="center" valign="top">4,424,233.28</td>
<td align="center" valign="top">508,786,827</td>
<td align="center" valign="top">151,856.77</td>
<td align="center" valign="top">17,463,528</td>
</tr>
<tr>
<td align="left" valign="top">Yongde</td>
<td align="center" valign="top">187</td>
<td align="center" valign="top">8,770,015,522</td>
<td align="center" valign="top">46,898,478.73</td>
<td align="center" valign="top">96.75%</td>
<td align="center" valign="top">4,386,209.56</td>
<td align="center" valign="top">820,221,187</td>
<td align="center" valign="top">156,149.85</td>
<td align="center" valign="top">29,200,022</td>
</tr>
<tr>
<td align="left" valign="top">Shuangjiang</td>
<td align="center" valign="top">291</td>
<td align="center" valign="top">13,270,042,504</td>
<td align="center" valign="top">45,601,520.63</td>
<td align="center" valign="top">98.75%</td>
<td align="center" valign="top">4,499,634.64</td>
<td align="center" valign="top">1,309,393,681</td>
<td align="center" valign="top">158,839.54</td>
<td align="center" valign="top">46,222,305</td>
</tr>
<tr>
<td align="left" valign="top">Other</td>
<td align="center" valign="top">116</td>
<td align="center" valign="top">5,432,075,712</td>
<td align="center" valign="top">45,229,542.50</td>
<td align="center" valign="top">97.55%</td>
<td align="center" valign="top">3,488,540.00</td>
<td align="center" valign="top">404,670,652</td>
<td align="center" valign="top">111,968.00</td>
<td align="center" valign="top">12,988,316</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>After applying basic filtering criteria (see &#x201C;<xref rid="sec2" ref-type="sec">Materials and Methods</xref>&#x201D; section), we identified 356,171,898 SNPs and 27,367,688 short genomic insertions and deletions (indels). The result of filtering is presented in <xref ref-type="supplementary-material" rid="SM2">Supplementary Figures 2, 3</xref> and <xref ref-type="supplementary-material" rid="SM1">Supplementary Tables 2, 3</xref>. Further filtering yielded a core set of 27,550,879 SNPs and 1,139,750 indels (&#x2264;40&#x2009;bp) with minor allele frequency (MAF) more than 0.05 and max missing less than 0.5. The information about SNPs and InDels is presented in <xref rid="tab1" ref-type="table">Table 1</xref> and <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 4</xref>. Meanwhile, in the core data set, 91.36% (25,170,861) of the SNPs were located in the intergenic region, 4.82% (1,327,890) in the intronic region, 1.31% (36,049) in the 5&#x2032;-UTR, and 2.95% (81,272) in the 3&#x2032;-UTR. We observed 0.91% (251,002) and 1.01% (277,265) of the SNPs in the upstream and downstream regions of the genes, respectively. Further, 0.0151% (4,150) were located in the variable splicing region and 1.44% (395,863) were present in the exonic regions of the genes (<xref rid="tab2" ref-type="table">Table 2</xref> and <xref ref-type="supplementary-material" rid="SM2">Supplementary Figure 4</xref>). Among the exonic SNPs, the proportion of non-synonymous SNPs was found to be 56.31% (223,048) and that of synonymous SNPs was found to be 41.60% (164,772), with the non-synonymous to synonymous mutation ratio of 1.353. The total number of stop-gain SNP mutations was 7,652, whereas the total number of stop-loss SNP mutations was 557 (0.00202%). In addition, 950,567 indels were found to be located in the intergenic region (83.40%), followed by the intronic region (Intronic) with 118,677 (10.41%; <xref rid="tab3" ref-type="table">Table 3</xref> and <xref ref-type="supplementary-material" rid="SM2">Supplementary Figure 4</xref>).</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>The number of SNPs and indels in different genome structures.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Variants</th>
<th align="left" valign="top">Type</th>
<th align="center" valign="top">Core set</th>
</tr>
</thead>
<tbody>
<tr>
<td align="char" valign="top" char="." rowspan="12">SNP</td>
<td align="char" valign="top" char="&#x00B1;">Total</td>
<td align="char" valign="top" char="&#x00B1;">27,550,879</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Intergenic</td>
<td align="char" valign="top" char="&#x00B1;">25,170,861</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Intronic</td>
<td align="char" valign="top" char="&#x00B1;">1,327,890</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Exonic</td>
<td align="char" valign="top" char="&#x00B1;">395,863</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">5&#x2032;-UTR</td>
<td align="char" valign="top" char="&#x00B1;">36,049</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">3&#x2032;-UTR</td>
<td align="char" valign="top" char="&#x00B1;">81,272</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">UTR5; UTR3</td>
<td align="char" valign="top" char="&#x00B1;">237</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Upstream</td>
<td align="char" valign="top" char="&#x00B1;">251,002</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Downstream</td>
<td align="char" valign="top" char="&#x00B1;">277,265</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Upstream; downstream</td>
<td align="char" valign="top" char="&#x00B1;">6,095</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Splicing</td>
<td align="char" valign="top" char="&#x00B1;">4,150</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Exonic; splicing</td>
<td align="char" valign="top" char="&#x00B1;">195</td>
</tr>
<tr>
<td align="char" valign="top" char="." rowspan="11">Indel</td>
<td align="char" valign="top" char="&#x00B1;">Total</td>
<td align="char" valign="top" char="&#x00B1;">1,139,750</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Intergenic</td>
<td align="char" valign="top" char="&#x00B1;">950,567</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Intronic</td>
<td align="char" valign="top" char="&#x00B1;">118,677</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Exonic</td>
<td align="char" valign="top" char="&#x00B1;">14,064</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">5&#x2032;-UTR</td>
<td align="char" valign="top" char="&#x00B1;">3,715</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">3&#x2032;-UTR</td>
<td align="char" valign="top" char="&#x00B1;">8,271</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">UTR5; UTR3</td>
<td align="char" valign="top" char="&#x00B1;">9</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Upstream</td>
<td align="char" valign="top" char="&#x00B1;">19,862</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Downstream</td>
<td align="char" valign="top" char="&#x00B1;">23,608</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Upstream; downstream</td>
<td align="char" valign="top" char="&#x00B1;">569</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Splicing</td>
<td align="char" valign="top" char="&#x00B1;">399</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap position="float" id="tab3">
<label>Table 3</label>
<caption>
<p>The number of large-effect SNPs and indels.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Variants</th>
<th align="left" valign="top">Type</th>
<th align="center" valign="top">Core set</th>
</tr>
</thead>
<tbody>
<tr>
<td align="char" valign="top" char="." rowspan="7">SNP</td>
<td align="char" valign="top" char="&#x00B1;">Total (exonic+exonic splicing)</td>
<td align="char" valign="top" char="&#x00B1;">396,058</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Nonsynonymous</td>
<td align="char" valign="top" char="&#x00B1;">223,048</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Synonymous</td>
<td align="char" valign="top" char="&#x00B1;">164,772</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Nonsyn/syn ratio</td>
<td align="char" valign="top" char="&#x00B1;">1.354</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Stop-gain</td>
<td align="char" valign="top" char="&#x00B1;">7,652</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Stop-loss</td>
<td align="char" valign="top" char="&#x00B1;">557</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Unknown</td>
<td align="char" valign="top" char="&#x00B1;">29</td>
</tr>
<tr>
<td align="char" valign="top" char="." rowspan="7">Indel</td>
<td align="char" valign="top" char="&#x00B1;">Total (exonic+exonic splicing)</td>
<td align="char" valign="top" char="&#x00B1;">14,073</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Frameshift deletion</td>
<td align="char" valign="top" char="&#x00B1;">6,189</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Frameshift insetion</td>
<td align="char" valign="top" char="&#x00B1;">3,744</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Non-frameshift deletion</td>
<td align="char" valign="top" char="&#x00B1;">2,573</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Non-frameshift insertion</td>
<td align="char" valign="top" char="&#x00B1;">1,300</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Stop-gain</td>
<td align="char" valign="top" char="&#x00B1;">245</td>
</tr>
<tr>
<td align="char" valign="top" char="&#x00B1;">Stop-loss</td>
<td align="char" valign="top" char="&#x00B1;">22</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec15">
<title>Population structure and principal component analysis (PCA)</title>
<p>The SNPs after filtering linkage disequilibrium sites (MAF&#x2009;&#x003E;&#x2009;0.05) were used to analyze the population structure and differentiation. According to the calculated CV error value, when <italic>K</italic>&#x2009;=&#x2009;3, the CV error value is the smallest (<xref rid="fig2" ref-type="fig">Figure 2A</xref>). Therefore, there are three genetic stocks (genetically different populations) represented as red, green, and blue color (<xref rid="fig2" ref-type="fig">Figure 2B</xref>) in Lincang. Interestingly, Shuangjiang and Zhenkang populations were found to be almost genetically pure, whereas the other populations showed a substantial level of genetic admixtures (<xref rid="fig2" ref-type="fig">Figure 2B</xref>). The red genetic stock appears to be the most dominant followed by green, whereas the third genetic stock, blue, represents a very small proportion only in the Yun, Zhenkang, and Fengqing tea populations.</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p><bold>(A)</bold> Cross-validation errors. The x-axis represents the <italic>K</italic> value, while y-axis indicates the cross-validation errors. The dot shows <italic>K</italic>&#x2009;=&#x2009;3 with the lowest cross-validation errors. <bold>(B)</bold> Population structure of the tea plant collections, which represents the best inferred K-value with the lowest cross-validation errors. <bold>(C,D)</bold> PCA analysis. From left to right, the three squares indicate group I, group II, and group III.</p>
</caption>
<graphic xlink:href="fpls-13-984422-g002.tif"/>
</fig>
<p>In the principal component analysis of tea accessions from Lincang (<xref rid="fig2" ref-type="fig">Figure 2</xref>), the three principal components of PC1, PC2, and PC3 represented 5.99, 2.28, and 0.61% of the total genetic variance, respectively. The accessions were grouped into three subgroups based on the STRUCTURE inferred clustering result with 1,105, 172 and 74 accessions came to gather for sub-populations 1, 2, and 3, respectively (When an individual has the highest proportion of red ancestors, it is classified as group 1; then, accessions with the highest proportion of yellow ancestors were assigned into the group 2; otherwise, group 3; <xref rid="fig2" ref-type="fig">Figure 2B</xref>). Among them, the first PC (PC1) distantly clustered varieties from groups 2 and the combination of PC1 (5.99%) and PC2 (2.28%) can distinguish groups 2 (red, <xref rid="fig2" ref-type="fig">Figure 2C</xref>) from other accessions.</p>
</sec>
<sec id="sec16">
<title>Phylogenetic analysis</title>
<p>Based on the core SNP set, the phylogenetic tree was constructed using the maximum likelihood (ML) method with KM6 (<italic>C. cuspidata</italic>) as the outgroup, and the bootstrap value of 100. The result of regional phylogenetic analysis showed that no precise geographic or regional clustering was observed in the phylogenetic tree (<xref ref-type="supplementary-material" rid="SM2">Supplementary Figure 5</xref>). However, when tea tree samples were analyzed into three subgroups based on the results of genetic stratification analysis (<xref rid="fig3" ref-type="fig">Figure 3A</xref>), the phylogenetic tree showed four distinct clustering situations, which recapitulates the same patterns in the principal component analysis (PCA) and model-based clustering (<xref rid="fig3" ref-type="fig">Figure 3B</xref>). Among them, group 1 was divided into two clusters, and group 2 and 3 were divided into a cluster. In group 1 (<xref rid="fig3" ref-type="fig">Figure 3A</xref>; blue), the samples collected in Cluster 1 were dominated by Yun and Linxiang, which had a close genetic relationship with the outgroup. It can be inferred that the border between Yun and Linxiang may be the origin of tea. The samples in Cluster 2 are dominated by Shuangjiang and Linxiang, which are genetically more distant from the outgroup, and it can be inferred that the area covered by Cluster 2 may have been introduced later and thus developed into numerous new branches. In group 2 (<xref rid="fig3" ref-type="fig">Figures 3A</xref>,<xref rid="fig3" ref-type="fig">B</xref>), tea samples from Yun County were dominant, and the tea samples collected in Yun (leaf nodes) were genetically closer to the proximal ancestors (the inner node). It can be further inferred that the region of Yun is the area where the initial origin has produced differentiation afterward. In group 3 (<xref rid="fig3" ref-type="fig">Figure 3A</xref>: green and <xref rid="fig3" ref-type="fig">Figure 3B</xref>), tea samples of Fengqing were dominated and were more distantly related to the ancestor (inner node), indicating that group 3 represented by Fengqing was more divergent compared to group 1 and group 2. Notably, we found that 15 tea accessions, collected from Fengqing, Other, and Yun of group 2, were clustered with the Cluster 2 of groups 1; Moreover, 4 tea accessions from Yun in subcluster 3 were clustered into Cluster 1 of groups 1. these samples may exist with the possibility of introgression in their clustered regions.</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p><bold>(A)</bold> Phylogenetic relationships of 1,350 ancient tea plants in three subgroups. Deep blue, orange, and green represent group 1, group 2, and group 3. Bootstrap values are indicated by blue circles. KM6 (<italic>C. Cuspidata</italic>) was selected as the outgroup. <bold>(B)</bold> The region distribution of accession from different tea plant subpopulations.</p>
</caption>
<graphic xlink:href="fpls-13-984422-g003.tif"/>
</fig>
</sec>
<sec id="sec17">
<title>Genetic diversity and population differentiation</title>
<p>The parameters <italic>&#x03B8;&#x03C0;</italic>, <italic>H<sub>O</sub></italic>, <italic>H<sub>E</sub></italic>, <italic>F</italic>, and Tajima&#x2019;s <italic>D</italic> were calculated for tea accessions to estimate the patterns of genetic diversity. The primary genetic diversity parameters are presented in <xref ref-type="supplementary-material" rid="SM1">Supplementary Tables 5&#x2013;9</xref>, <xref rid="tab4" ref-type="table">Table 4</xref> and <xref rid="fig4" ref-type="fig">Figure 4</xref>. For the subgroup, the expected heterozygosity (<italic>H<sub>E</sub></italic>) of the tea populations varied between 9.04 (group 2) and 10.25% (group 3), while the observed heterozygosity (<italic>H<sub>O</sub></italic>) of the tea populations ranged between 3.05% (group 2) and 3.43% (group 3). The inbreeding coefficient (<italic>F</italic>) of the tea populations varied between 65.52% (group 1) and 69.66% (group 3). It is worth noting that a degree of variability existed between subgroups in both <italic>H<sub>E</sub></italic> and <italic>F</italic> (<xref rid="fig4" ref-type="fig">Figure 4A</xref>). Nucleotide diversities (<italic>&#x03B8;&#x03C0;</italic>) in the three subgroup were estimated at the individual level after the correction for sample size. The analysis found that group 1 (8.89&#x2009;&#x00D7;&#x2009;10<sup>&#x2212;4</sup>) had the highest &#x03C0; value, while group 3 (3.31&#x2009;&#x00D7;&#x2009;10<sup>&#x2212;4</sup>) had the lowest &#x03C0; value (<xref rid="tab4" ref-type="table">Table 4</xref> and <xref rid="fig4" ref-type="fig">Figure 4C</xref>). Meanwhile, tajamaD analysis showed positive Tajima&#x2019;s D test values for all subgroup in Lincang (group 3&#x2009;&#x003C;&#x2009;group 2&#x2009;&#x003C;&#x2009;group 1), in agreement with the findings of &#x03C0; analysis, revealed that the Lincang tea populations may be experiencing group constriction, which may be related to the directional selection. For the regional population(<xref ref-type="supplementary-material" rid="SM2">Supplementary Figure 6</xref>; <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 9</xref>), Cangyuan population had the highest nucleotide diversity (1.777&#x2009;&#x00D7;&#x2009;10<sup>&#x2212;3</sup>), which is consistent with the expected heterozygosity and the observed heterozygosity. Otherwise, Tajama&#x2019;D of Cangyuan is closest to 0 concerning the other eight populations, indicating that Cangyuan is the least selected in Lincang and preserves a large amount of tea germplasm, which is potential resources to expand the genetic resources of improvement. The lowest level of diversity was found for the Shuangjiang population (1.071&#x2009;&#x00D7;&#x2009;10<sup>&#x2212;3</sup>), which resulted from a long history of breed formation and selective breeding more than in most other areas.</p>
<table-wrap position="float" id="tab4">
<label>Table 4</label>
<caption>
<p>Genetic diversity of subgroups of lincang tea.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Group</th>
<th align="char" valign="top" char="&#x00D7;"><italic>F</italic>(%)</th>
<th align="char" valign="top" char="&#x00D7;"><italic>H</italic><sub>E</sub></th>
<th align="char" valign="top" char="&#x00D7;"><italic>H</italic><sub>O</sub></th>
<th align="char" valign="top" char="&#x00D7;"><italic>&#x03B8;&#x03C0;</italic></th>
<th align="center" valign="top">Tajama <italic>D</italic></th>
</tr>
</thead>
<tbody>
<tr>
<td align="char" valign="top" char=".">Group 1</td>
<td align="char" valign="top" char="&#x00B1;">65.52%</td>
<td align="char" valign="top" char="&#x00B1;">9.87%</td>
<td align="char" valign="top" char="&#x00B1;">3.43%</td>
<td align="char" valign="top" char="&#x00B1;">8.89E-04</td>
<td align="char" valign="top" char="&#x00B1;">2.41</td>
</tr>
<tr>
<td align="char" valign="top" char=".">Group 2</td>
<td align="char" valign="top" char="&#x00B1;">67.21%</td>
<td align="char" valign="top" char="&#x00B1;">9.04%</td>
<td align="char" valign="top" char="&#x00B1;">3.05%</td>
<td align="char" valign="top" char="&#x00B1;">7.80E-04</td>
<td align="char" valign="top" char="&#x00B1;">1.41</td>
</tr>
<tr>
<td align="char" valign="top" char=".">Group 3</td>
<td align="char" valign="top" char="&#x00B1;">69.66%</td>
<td align="char" valign="top" char="&#x00B1;">10.25%</td>
<td align="char" valign="top" char="&#x00B1;">3.12%</td>
<td align="char" valign="top" char="&#x00B1;">3.13E-04</td>
<td align="char" valign="top" char="&#x00B1;">0.36</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p><bold>(A)</bold> Inbreeding coefficient, Proportion Observed Heterozygous Sites(%) and Proportion Expected Heterozygous Sites (%) estimation in tea plant subpopulations. Significances of difference between groups were derived with one-sided t-test. Among them, 0.01 &#x003C; &#x002A;<italic>p</italic> &#x003C; 0.05; 0.001 &#x003C; &#x002A;&#x002A;<italic>p</italic> &#x003C; 0.01; &#x002A;&#x002A;&#x002A;<italic>p</italic> &#x003C; 0.001. <bold>(B)</bold> Nucleotide diversity (<italic>&#x03B8;&#x03C0;</italic>) and genetic differentiation (<italic>F</italic><italic>
<sub>ST</sub></italic>) within different tea plant subpopulations calculated using the sliding-window approach (100&#x2009;kb windows with 100&#x2009;kb steps). The circle size represents the mean value of <italic>&#x03B8;&#x03C0;</italic> in each subpopulation. The numbers marked between each subpopulation indicate the mean value of <italic>F</italic><italic>
<sub>ST</sub></italic>. <bold>(C)</bold> LD decay in different subpopulations. The x-coordinates indicate the distance between bases and the y-coordinates indicate the mean value of the correlation coefficient.</p>
</caption>
<graphic xlink:href="fpls-13-984422-g004.tif"/>
</fig>
<p>The pairwise <italic>F<sub>ST</sub></italic> between individual populations is presented in <xref rid="fig3" ref-type="fig">Figure 3C</xref>. Our analysis revealed that group 1 and 2 (0.092) are more closely distant genetically, while group 1 and 3 (0.226) are more distant from each other. It is inferred that the ancestral population of subpopulation 3 may have diverged prior to the ancestral populations of subpopulations 1 and 2, resulting in greater genetic divergence. ln addition, <italic>F<sub>ST</sub></italic> values between regions indicate a weak genetic differentiation between regional populations (<xref ref-type="supplementary-material" rid="SM2">Supplementary Figure 7</xref>).</p>
</sec>
<sec id="sec18">
<title>Isolation by distance and environment</title>
<p>In the Mantel tests, we can reject the null hypothesis that these three matrices, genetic distance and environment distance (<italic>r</italic><sub>1</sub>&#x2009;=&#x2009;0.1462), genetic distance, and geographic distance (<italic>r</italic><sub>2</sub>&#x2009;=&#x2009;0.07843), are moderately related with alpha&#x2009;=&#x2009;0.01 (all <italic>p</italic>&#x2009;&#x2264;&#x2009;0.0001). The observed correlation (<italic>r</italic><sub>1</sub>&#x2009;=&#x2009;0.1462, <italic>r</italic><sub>2</sub>&#x2009;=&#x2009;0.07843) suggests that the matrix entries are positively associated. This means that the larger geographical distances between tea accessions lead to greater genetic differences, and the higher differences in environmental variables result in more genetic distance.</p>
<p>In the partial Mantel test, the genetic and environmental distances still were moderate correlated (<italic>r</italic><sub>3</sub>&#x2009;=&#x2009;0.09541, <italic>p</italic>&#x2009;=&#x2009;0.001) after considering geographic distance. This result suggests that environmental selection has a weak but non-negligible effect in shaping genetic variation in wild tea germplasm after individual control distance isolation. It can be inferred that there may be introductions of the tea accessions from each other resulting in the weak regional characteristics of tea trees in these populations, which is corresponding to the results of regional phylogenetic analysis (<xref ref-type="supplementary-material" rid="SM2">Supplementary Figure 5</xref>).</p>
</sec>
<sec id="sec19">
<title>Linkage disequilibrium analysis</title>
<p>Linkage (LD) is a non-random combination of alleles at different positions in a given population, and usually expressed as <italic>D</italic> and <italic>r</italic><sub>2</sub> values (<xref ref-type="bibr" rid="ref32">Slatkin, 2008</xref>), which are mainly related to whether the same species have experienced domestication pressure, regional selection pressure, and nucleotide diversity. The LD analysis showed that the <italic>r</italic><sub>2</sub> values of accessions of all subgroups are &#x003C;0.4 (<xref rid="fig4" ref-type="fig">Figure 4C</xref>). When the <italic>r</italic><sub>2</sub> value is &#x003C;0.4, it is generally considered that there is no effective linkage or no linkage. At the same time, Linkage disequilibrium decay distance indicated that the degree of Linkage in Lincang is pretty low, which is consistent with the information that the tea samples is basically ancient tea trees with a short history of artificial cultivation.</p>
</sec>
<sec id="sec20">
<title>Gene flow and historical effective population size</title>
<p>Population history includes events such as population bottlenecks, expansions, migrations and admixtures, which have important implications for the formation of genetic polymorphic patterns in populations. To reveal the evolutionary history of Lincang tea, we applied the multiple sequentially Markovian coalescent (MSMC; <xref ref-type="bibr" rid="ref31">Schiffels and Durbin, 2014</xref>) model to the analysis of phased SNP data from three subgroups. As displayed in <xref rid="fig5" ref-type="fig">Figure 5H</xref>, similar evolutionary patterns were observed for the three subgroups of Lincang. Group 1 (green line) and group3 (red line) manifested a slight Ne expansion around 10&#x2013;1,000&#x2009;Kya, 2&#x2013;100&#x2009;Kya and a subsequent Ne contraction (Ne&#x2009;&#x2248;&#x2009;80,000 down to Ne&#x2009;&#x2248;&#x2009;1,800 and Ne&#x2009;&#x2248;&#x2009;100,000 down to Ne&#x2009;&#x2248;&#x2009;2,000) around 0.5&#x2013;10&#x2009;Kya, 0.4&#x2013;3&#x2009;Kya, respectively. Interestingly, the range of expansion and contraction in group 2 is significantly greater compared to groups 1 and 2, inferring that group 3 is likely to be more heavily influenced by human and environmental influences.</p>
<fig position="float" id="fig5">
<label>Figure 5</label>
<caption>
<p><bold>(A&#x2013;F)</bold> Population splits and migrations between tea plant accessions. Hybridization likely occurs among the different areas in lincang. <bold>(G)</bold> MSMC-derived demographic history of different tea plant subpopulations from 10<sup>2</sup> to 10<sup>6</sup>&#x2009; years ago.</p>
</caption>
<graphic xlink:href="fpls-13-984422-g005.tif"/>
</fig>
<p>Also, TreeMix was used for inferring historical segregation and admixture of populations based on genome-wide SNP allele frequency data. Regarding the changes in tea gene flow, <xref rid="fig5" ref-type="fig">Figures 5E</xref>,<xref rid="fig5" ref-type="fig">F</xref> both show that there is a Treemix vector connected Fengqing to Zhenkang, and the proportion of gene exchange is relatively large, 38.43 and 36.57%, respectively, indicating bidirectional gene flow. This indicates that hybridization likely occurs among the different geographic areas in nature.</p>
</sec>
<sec id="sec21">
<title>Selection signals of Lincang tea subgroups</title>
<p>To study the selective characterization of three subgroups of Lincang, 611, 1,328, and 2,090 selective sweep regions were both detected in different tea plant subpopulations, which harbored 65,145 and 201 candidate genes, respectively (<xref ref-type="supplementary-material" rid="SM1">Supplementary Tables 10&#x2013;13</xref>, <xref rid="fig6" ref-type="fig">Figure 6</xref>, and <xref ref-type="supplementary-material" rid="SM2">Supplementary Figures 9&#x2013;11</xref>). In group 1, the highest signal 88.62 was found on chromosome 7 at position 26,812,875&#x2009;bp, and the most selective signals were detected on chromosome 7 (278 signals). In group 2, 344 selective signals were identified on chromosome 6, and the highest signal reached to 187.6 on chromosome 12 at 96,426,312&#x2009;bp. For group 3, chromosome 15 (354 selective regions) has the most selective regions, and chromosome 12 at 96,038,055&#x2009;bp has the most selective signal 247.9. GO enrichment of the 65,145 and 201 domesticated candidate genes of three subgroups showed significant functional representation in the GO categories of negative regulation of cellular process in group 1; peptidase S8/S53 domains, oxidoreductase activity, Golgi apparatus, obsolete oxidation&#x2013;reduction process, cellular response to lipid in group2; and UDP-glucosyltransferase activity and glucosyltransferase activity in group3 (<xref ref-type="supplementary-material" rid="SM1">Supplymentary Figure 12</xref>). Interestingly, The peptidase S8/ S53 domains enriched in subpopulation 2 are functionally consistent with those enriched in previous studies of small-leaf tea (<xref ref-type="bibr" rid="ref33">Wang et al., 2020</xref>), and it is inferred that subpopulation 2 may contain domesticated and hybrid species of small-leaf tea. On the drawback, the lack of functional clarity of many high-signal positional candidate genes makes it impossible to identify selective differential traits between different subpopulations.</p>
<fig position="float" id="fig6">
<label>Figure 6</label>
<caption>
<p><bold>(A&#x2013;C)</bold> &#x03BC; statistics calculated by RAiSD across the genome in different tea plant subpopulations. The dashed lines mark the regions at the top 0.1%.</p>
</caption>
<graphic xlink:href="fpls-13-984422-g006.tif"/>
</fig>
</sec>
</sec>
<sec id="sec22" sec-type="discussions">
<title>Discussion</title>
<p>In this study, we reported the characteristics of whole-genome SNPs and indels of 1,350 tea accessions, covering almost all varieties of tea in Lincang, Yunnan. The genomic variation data on the scale of this study is the largest ever reported for the tea plant. In total, our study generated 64,462,516,344 short reads and 5.967 billion SNPs, and together these datasets provide the most extensive genomic resource available for tea researchers.</p>
<p>In the analysis of population structure, genetic stratification analysis by the Bayesian clustering model revealed the presence of three subpopulations. The study found that samples from group 1 were predominantly from Yun and Linxiang, and based on the geographical distribution of the sample, it can be inferred that the origin of group 1 is most likely from the Xigui and Nahan tea mountains located at the junction of Yun and Linxiang. It also has been identified that the wild tea plantation in the Xigui and Nahan tea mountains is one of China&#x2019;s tea tree origin centers (<xref ref-type="bibr" rid="ref47">Zhou and Zhu, 2007</xref>), which is consistent with the analysis of phylogenetic trees. Further, the border of Linxiang and Gengma, located in the northern part of Shuangjiang County, maybe the main site of divergence for cluster 2 of the Lincang tea tree group 1 (<xref rid="fig3" ref-type="fig">Figure 3B</xref>). It is home to the north&#x2013;south branch of the Hengduan Mountain System-Bangma mountain, whose main peak is the Mengku snow mountain, with the highest and largest wild ancient tea tree community in the world at an altitude of 2,200&#x2013;2,750 m. The tea samples collected from Linxiang, Shuangjiang and Gengma probably originated in the wild ancient tea garden of Daxue mountain in Mengku. Meanwhile, it is proved that the Mengku tea species are mainly distributed in Linxiang, Cangyuan and Gengma by introductions centered on Shuangjiang County (<xref ref-type="bibr" rid="ref3">Chen, 1984</xref>), which is further support for the area distribution of samples in group 1 (<xref rid="fig3" ref-type="fig">Figure 3B</xref>).</p>
<p>Likewise, Baiying Mountain&#x2019;s ancient tea plantations of Manwan Town, located in the north-east of Yun, are home to a mixture of Dali tea, Assam tea (<italic>C. sinensis</italic> var. <italic>assamica</italic>), and intermediate species of tea trees (<xref ref-type="bibr" rid="ref46">Zhao et al., 2014</xref>), known as the world&#x2019;s tea gene pool. And is likely to contribute to the main origin of group 2 regional characteristics. Moreover, it also was found in the study that samples mainly from Fengqing and Yunxian, with a few from other Lincang areas, comprise group 3 (<xref rid="fig3" ref-type="fig">Figure 3B</xref>: blue), leading to a tentative inference that the origin of group 3 may be located in the Fengqing or Yun areas. Due to the influence of introgression among samples and the effectiveness of its genetic variation in this study, the possibility of other conditions cannot be ruled out. A more refined tea sample analysis can be carried out. Furthermore, details about development routes of different tea species subpopulations from Lincang tea region still need to be further clarified.</p>
<p>In addition, MSMC analysis identify a dramatic expansion and contraction of effective population size of the different tea subgroups of Lincang. It is noted that the Pu people began domesticating and using wild tea trees in Yunnan 3,000&#x2009;years ago during the Shang and Zhou periods, which may account for the lower effective population. On the genetic variability of the Lincang tea, it was found the significant genetic differences in the three subgroups and the moderate level of genetic differentiation among eight regional populations. Meanwhile, genetic difference and environment (<italic>r</italic><sub>1</sub>&#x2009;=&#x2009;0.1462), and geographic (<italic>r</italic><sub>2</sub>&#x2009;=&#x2009;0.07843) were not significantly associated, indicating that introgressive hybridization and artificial selection may have occurred between regions, in agreement with the results of gene flow analysis and local tea plantation policies. ln addition, the apparent introduction of Lincang tea trees and close geographical distances between sampling regions have led to no clear classification between geographical regions, making the classification of Lincang tea somewhat challenging. In the coming years, further worldwide sampling and analysis will help resolve the current debates on tea taxonomy.</p>
<p>Moreover, a low level of diversity was observed in Lincang, contrary to previous research on tea trees in Lincang (<xref ref-type="bibr" rid="ref24">Mao, 2018</xref>). One possible explanation of the result is that sampling led to differences in genetic diversity. Previous studies have focused on wild teas, whereas this study was based on both wild and cultivated teas, with a much wider distribution of sampling sites, which makes Lincang&#x2019;s genetic diversity more convincing. Another, compared to other species, the nucleotide diversity of these populations also is lower than that of common wild rice (3&#x2009;&#x00D7;&#x2009;10<sup>&#x2212;3</sup>; <xref ref-type="bibr" rid="ref15">Huang et al., 2012</xref>), wild soybean (2.94&#x2009;&#x00D7;&#x2009;10<sup>&#x2212;3</sup>; <xref ref-type="bibr" rid="ref48">Zhou et al., 2015</xref>), wild grape (3.5&#x2009;&#x00D7;&#x2009;10<sup>&#x2212;3</sup>; <xref ref-type="bibr" rid="ref22">Liang et al., 2019</xref>), etc. This may be related to the relatively small effective population of the group in the Lincang tea region. Thus, the current situation of genetic diversity in Lincang should be a cause for widespread concern and protection for the local people.</p>
<p>In conclusion, our population genomic investigations of Lincang tea provide novel information about their ancestry, gene flow, history of effective population size, and genetic diversity. The Lincang tea has three distinct possible origins: Xigui and Nahan Tea Mountain in Linxiang, Baiying Mountain Ancient Tea Garden in Yun, and Jinxiu Village of Xiaowan Town in Fengqing. The finding of the origin location in the Lincang region offers a theoretical benchmark for the investigation of the genesis and development of tea plants on a more global scale. Furthermore, the studies into numerous facets of tea plant biology will be made easier thanks to this extensive SNP database of tea species.</p>
</sec>
<sec id="sec23" sec-type="data-availability">
<title>Data availability statement</title>
<p>The raw sequencing data reported in this paper have been deposited in the BIG Data Center (<ext-link xlink:href="http://bigd.big.ac.cn/gsa" ext-link-type="uri">http://bigd.big.ac.cn/gsa</ext-link>) under the accession number PRJCA011312. In addition, the sequencing data are also accessible from the tea database (<ext-link xlink:href="http://teabase.ynau.edu.cn/index/download/index" ext-link-type="uri">http://teabase.ynau.edu.cn/index/download/index</ext-link>).</p>
</sec>
<sec id="sec24">
<title>Author contributions</title>
<p>YD designed the study. ZW, SN, GX, LY, CW, DL, ShiZ, SY, and ShuZ collected the tea samples. SD, YL, and LY performed the genome data analyses. YL and LY wrote the manuscript. YD, LK, SD, and JS revised and improved the manuscript. All authors reviewed and approved the final version of the manuscript.</p>
</sec>
<sec id="sec25" sec-type="funding-information">
<title>Funding</title>
<p>This work was supported by Digitalization of Biological Resource Project (grant number 202002AA100007), Yunnan; Yunnan provincial key programs of Yunnan Eco-friendly Food International Cooperation Research Center project (grant number 2019ZG00908); and Yunnan Provincial Science and Technology Department Project (Development and Application of Biological Resource Digitalization; grant number 2019008).</p>
</sec>
<sec id="conf1" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="sec100" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
</body>
<back>
<ack>
<p>We thank all the individuals who have helped us in this study.</p>
</ack>
<sec id="sec27" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary material for this article can be found online at: <ext-link xlink:href="https://www.frontiersin.org/articles/10.3389/fpls.2022.1007612/full#supplementary-material" ext-link-type="uri">https://www.frontiersin.org/articles/10.3389/fpls.2022.984422/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Table_1.XLSX" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Data_Sheet_1.docx" id="SM2" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alachiotis</surname> <given-names>N.</given-names></name> <name><surname>Pavlidis</surname> <given-names>P.</given-names></name></person-group> (<year>2018</year>). <article-title>RAiSD detects positive selection based on multiple signatures of a selective sweep and SNP vectors</article-title>. <source>Commun. Biol.</source> <volume>1</volume>, <fpage>1</fpage>&#x2013;<lpage>11</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s42003-018-0085-8</pub-id></citation></ref>
<ref id="ref2"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alexander</surname> <given-names>D. H.</given-names></name> <name><surname>Novembre</surname> <given-names>J.</given-names></name> <name><surname>Lange</surname> <given-names>K.</given-names></name></person-group> (<year>2009</year>). <article-title>Fast model-based estimation of ancestry in unrelated individuals</article-title>. <source>Genome Res.</source> <volume>19</volume>, <fpage>1655</fpage>&#x2013;<lpage>1664</lpage>. doi: <pub-id pub-id-type="doi">10.1101/gr.094052.109</pub-id>, PMID: <pub-id pub-id-type="pmid">19648217</pub-id></citation></ref>
<ref id="ref3"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>C.</given-names></name></person-group> (<year>1984</year>). <source>A general history of tea industry</source>. Agricultural Press.</citation></ref>
<ref id="ref4"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>P.</given-names></name> <name><surname>Xia</surname> <given-names>Y.</given-names></name> <name><surname>Xu</surname> <given-names>M.</given-names></name> <name><surname>Pei</surname> <given-names>S.</given-names></name></person-group> (<year>2005</year>). <article-title>Genetic diversity and differentiation of Camellia sinensis L. (cultivated tea) and its wild relatives in Yunnan province of China, revealed by morphology, biochemistry and allozyme studies</article-title>. <source>Genet. Resour. Crop Evol.</source> <volume>52</volume>, <fpage>41</fpage>&#x2013;<lpage>52</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10722-005-0285-1</pub-id></citation></ref>
<ref id="ref5"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>S.</given-names></name> <name><surname>Zhou</surname> <given-names>Y.</given-names></name> <name><surname>Chen</surname> <given-names>Y.</given-names></name> <name><surname>Gu</surname> <given-names>J.</given-names></name></person-group> (<year>2018</year>). <article-title>Fastp: an ultra-fast all-in-one FASTQ preprocessor</article-title>. <source>Bioinformatics</source> <volume>34</volume>, <fpage>i884</fpage>&#x2013;<lpage>i890</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/bty560</pub-id>, PMID: <pub-id pub-id-type="pmid">30423086</pub-id></citation></ref>
<ref id="ref6"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Danecek</surname> <given-names>P.</given-names></name> <name><surname>Auton</surname> <given-names>A.</given-names></name> <name><surname>Abecasis</surname> <given-names>G.</given-names></name> <name><surname>Albers</surname> <given-names>C. A.</given-names></name> <name><surname>Banks</surname> <given-names>E.</given-names></name> <name><surname>DePristo</surname> <given-names>M. A.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>The variant call format and VCFtools</article-title>. <source>Bioinformatics</source> <volume>27</volume>, <fpage>2156</fpage>&#x2013;<lpage>2158</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btr330</pub-id>, PMID: <pub-id pub-id-type="pmid">21653522</pub-id></citation></ref>
<ref id="ref7"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Delaneau</surname> <given-names>O.</given-names></name> <name><surname>Marchini</surname> <given-names>J.</given-names></name> <name><surname>Zagury</surname> <given-names>J. F.</given-names></name></person-group> (<year>2011</year>). <article-title>A linear complexity phasing method for thousands of genomes</article-title>. <source>Nat. Methods</source> <volume>9</volume>, <fpage>179</fpage>&#x2013;<lpage>181</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nmeth.1785</pub-id>, PMID: <pub-id pub-id-type="pmid">22138821</pub-id></citation></ref>
<ref id="ref8"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>DePristo</surname> <given-names>M. A.</given-names></name> <name><surname>Banks</surname> <given-names>E.</given-names></name> <name><surname>Poplin</surname> <given-names>R.</given-names></name> <name><surname>Garimella</surname> <given-names>K. V.</given-names></name> <name><surname>Maguire</surname> <given-names>J. R.</given-names></name> <name><surname>Hartl</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>A framework for variation discovery and genotyping using next-generation DNA sequencing data</article-title>. <source>Nat. Genet.</source> <volume>43</volume>, <fpage>491</fpage>&#x2013;<lpage>498</lpage>. doi: <pub-id pub-id-type="doi">10.1038/ng.806</pub-id>, PMID: <pub-id pub-id-type="pmid">21478889</pub-id></citation></ref>
<ref id="ref9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Diniz-Filho</surname> <given-names>J. A. F.</given-names></name> <name><surname>Soares</surname> <given-names>T. N.</given-names></name> <name><surname>Lima</surname> <given-names>J. S.</given-names></name> <name><surname>Dobrovolski</surname> <given-names>R.</given-names></name> <name><surname>Landeiro</surname> <given-names>V. L.</given-names></name> <name><surname>Telles</surname> <given-names>M. P. D. C.</given-names></name> <etal/></person-group>. (<year>2013</year>). <article-title>Mantel test in population genetics</article-title>. <source>Genet. Mol. Biol.</source> <volume>36</volume>, <fpage>475</fpage>&#x2013;<lpage>485</lpage>. doi: <pub-id pub-id-type="doi">10.1590/S1415-47572013000400002</pub-id>, PMID: <pub-id pub-id-type="pmid">24385847</pub-id></citation></ref>
<ref id="ref10"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Duan</surname> <given-names>N.</given-names></name> <name><surname>Bai</surname> <given-names>Y.</given-names></name> <name><surname>Sun</surname> <given-names>H.</given-names></name> <name><surname>Wang</surname> <given-names>N.</given-names></name> <name><surname>Ma</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Genome re-sequencing reveals the history of apple and supports a two-stage model for fruit enlargement</article-title>. <source>Nat. Commun.</source> <volume>8</volume>, <fpage>1</fpage>&#x2013;<lpage>11</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-017-00336-7</pub-id></citation></ref>
<ref id="ref11"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fitak</surname> <given-names>R. R.</given-names></name></person-group> (<year>2021</year>). <article-title>Opt M: estimating the optimal number of migration edges on population trees using Treemix</article-title>. <source>Biol. Methods Protoc.</source> <volume>6</volume>:<fpage>bpab017</fpage>. doi: <pub-id pub-id-type="doi">10.1093/biomethods/bpab017</pub-id>, PMID: <pub-id pub-id-type="pmid">34595352</pub-id></citation></ref>
<ref id="ref12"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hall</surname> <given-names>L. A.</given-names></name> <name><surname>Beissinger</surname> <given-names>S. R.</given-names></name></person-group> (<year>2014</year>). <article-title>A practical toolbox for design and analysis of landscape genetics studies</article-title>. <source>Landsc. Ecol.</source> <volume>29</volume>, <fpage>1487</fpage>&#x2013;<lpage>1504</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10980-014-0082-3</pub-id></citation></ref>
<ref id="ref13"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Hasimoto</surname> <given-names>M.</given-names></name></person-group> (<year>2001</year>). "The origin of the tea plant", in: <italic>Proceedings of 2001 International Conference on O&#x2013;Cha (Tea) Culture and Science (Session II)</italic>, 5&#x2013;8.</citation></ref>
<ref id="ref14"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hijmans</surname> <given-names>R. J.</given-names></name> <name><surname>Cameron</surname> <given-names>S. E.</given-names></name> <name><surname>Parra</surname> <given-names>J. L.</given-names></name> <name><surname>Jones</surname> <given-names>P. G.</given-names></name> <name><surname>Jarvis</surname> <given-names>A.</given-names></name></person-group> (<year>2005</year>). <article-title>Very high resolution interpolated climate surfaces for global land areas</article-title>. <source>Int. J. Climatol.</source> <volume>25</volume>, <fpage>1965</fpage>&#x2013;<lpage>1978</lpage>. doi: <pub-id pub-id-type="doi">10.1002/joc.1276</pub-id></citation></ref>
<ref id="ref15"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>X.</given-names></name> <name><surname>Kurata</surname> <given-names>N.</given-names></name> <name><surname>Wang</surname> <given-names>Z.-X.</given-names></name> <name><surname>Wang</surname> <given-names>A.</given-names></name> <name><surname>Zhao</surname> <given-names>Q.</given-names></name> <name><surname>Zhao</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2012</year>). <article-title>A map of rice genome variation reveals the origin of cultivated rice</article-title>. <source>Nature</source> <volume>490</volume>, <fpage>497</fpage>&#x2013;<lpage>501</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nature11532</pub-id>, PMID: <pub-id pub-id-type="pmid">23034647</pub-id></citation></ref>
<ref id="ref16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hufford</surname> <given-names>M. B.</given-names></name> <name><surname>Xu</surname> <given-names>X.</given-names></name> <name><surname>Van Heerwaarden</surname> <given-names>J.</given-names></name> <name><surname>Pyh&#x00E4;j&#x00E4;rvi</surname> <given-names>T.</given-names></name> <name><surname>Chia</surname> <given-names>J.-M.</given-names></name> <name><surname>Cartwright</surname> <given-names>R. A.</given-names></name> <etal/></person-group>. (<year>2012</year>). <article-title>Comparative population genomics of maize domestication and improvement</article-title>. <source>Nat. Genet.</source> <volume>44</volume>, <fpage>808</fpage>&#x2013;<lpage>811</lpage>. doi: <pub-id pub-id-type="doi">10.1038/ng.2309</pub-id>, PMID: <pub-id pub-id-type="pmid">22660546</pub-id></citation></ref>
<ref id="ref17"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Kan</surname> <given-names>N. C.</given-names></name></person-group> (<year>2013</year>). Study on the Origin of Tea Tree and the Distribution of Wild Tea Tree in Sichuan and Chongqing.  <source>Southwest J. Agr.</source> <volume>26</volume>, <fpage>382</fpage>&#x2013;<lpage>385</lpage>. doi: <pub-id pub-id-type="doi">10.16213/j.cnki.scjas.2013.01.056</pub-id></citation></ref>
<ref id="ref18"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kingdom-Ward</surname> <given-names>F.</given-names></name></person-group> (<year>1950</year>). <article-title>Does wild tea exist?</article-title> <source>Nature</source> <volume>165</volume>, <fpage>297</fpage>&#x2013;<lpage>299</lpage>. doi: <pub-id pub-id-type="doi">10.1038/165297a0</pub-id></citation></ref>
<ref id="ref19"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lee</surname> <given-names>T.-H.</given-names></name> <name><surname>Guo</surname> <given-names>H.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Kim</surname> <given-names>C.</given-names></name> <name><surname>Paterson</surname> <given-names>A. H.</given-names></name></person-group> (<year>2014</year>). <article-title>SNPhylo: a pipeline to construct a phylogenetic tree from huge SNP data</article-title>. <source>BMC Genomics</source> <volume>15</volume>, <fpage>1</fpage>&#x2013;<lpage>6</lpage>. doi: <pub-id pub-id-type="doi">10.1186/1471-2164-15-162</pub-id></citation></ref>
<ref id="ref20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Durbin</surname> <given-names>R.</given-names></name></person-group> (<year>2009</year>). <article-title>Fast and accurate short read alignment with burrows&#x2013;wheeler transform</article-title>. <source>Bioinformatics</source> <volume>25</volume>, <fpage>1754</fpage>&#x2013;<lpage>1760</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btp324</pub-id>, PMID: <pub-id pub-id-type="pmid">19451168</pub-id></citation></ref>
<ref id="ref21"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name> <name><surname>Handsaker</surname> <given-names>B.</given-names></name> <name><surname>Wysoker</surname> <given-names>A.</given-names></name> <name><surname>Fennell</surname> <given-names>T.</given-names></name> <name><surname>Ruan</surname> <given-names>J.</given-names></name> <name><surname>Homer</surname> <given-names>N.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>The sequence alignment/map format and SAMtools</article-title>. <source>Bioinformatics</source> <volume>25</volume>, <fpage>2078</fpage>&#x2013;<lpage>2079</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btp352</pub-id>, PMID: <pub-id pub-id-type="pmid">19505943</pub-id></citation></ref>
<ref id="ref22"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liang</surname> <given-names>Z.</given-names></name> <name><surname>Duan</surname> <given-names>S.</given-names></name> <name><surname>Sheng</surname> <given-names>J.</given-names></name> <name><surname>Zhu</surname> <given-names>S.</given-names></name> <name><surname>Ni</surname> <given-names>X.</given-names></name> <name><surname>Shao</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Whole-genome resequencing of 472 Vitis accessions for grapevine diversity and demographic history analyses</article-title>. <source>Nat. Commun.</source> <volume>10</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-019-09135-8</pub-id></citation></ref>
<ref id="ref23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Malomane</surname> <given-names>D. K.</given-names></name> <name><surname>Reimer</surname> <given-names>C.</given-names></name> <name><surname>Weigend</surname> <given-names>S.</given-names></name> <name><surname>Weigend</surname> <given-names>A.</given-names></name> <name><surname>Sharifi</surname> <given-names>A. R.</given-names></name> <name><surname>Simianer</surname> <given-names>H.</given-names></name></person-group> (<year>2018</year>). <article-title>Efficiency of different strategies to mitigate ascertainment bias when using SNP panels in diversity studies</article-title>. <source>BMC Genomics</source> <volume>19</volume>, <fpage>1</fpage>&#x2013;<lpage>16</lpage>. doi: <pub-id pub-id-type="doi">10.1186/s12864-017-4416-9</pub-id></citation></ref>
<ref id="ref24"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Mao</surname> <given-names>J.</given-names></name></person-group> (<year>2018</year>). <source>Analysis of genetic diversity and genetic structure of ancient tea trees in Lincang</source>, <publisher-loc>Yunnan (China)</publisher-loc>: <publisher-name>Chinese Academy of Agricultural Sciences</publisher-name>.</citation></ref>
<ref id="ref25"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>McKenna</surname> <given-names>A.</given-names></name> <name><surname>Hanna</surname> <given-names>M.</given-names></name> <name><surname>Banks</surname> <given-names>E.</given-names></name> <name><surname>Sivachenko</surname> <given-names>A.</given-names></name> <name><surname>Cibulskis</surname> <given-names>K.</given-names></name> <name><surname>Kernytsky</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2010</year>). <article-title>The genome analysis toolkit: a MapReduce framework for analyzing next-generation DNA sequencing data</article-title>. <source>Genome Res.</source> <volume>20</volume>, <fpage>1297</fpage>&#x2013;<lpage>1303</lpage>. doi: <pub-id pub-id-type="doi">10.1101/gr.107524.110</pub-id>, PMID: <pub-id pub-id-type="pmid">20644199</pub-id></citation></ref>
<ref id="ref26"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mondal</surname> <given-names>T. K.</given-names></name> <name><surname>Bhattacharya</surname> <given-names>A.</given-names></name> <name><surname>Laxmikumaran</surname> <given-names>M.</given-names></name> <name><surname>Ahuja</surname> <given-names>P. S.</given-names></name></person-group> (<year>2004</year>). <article-title>Recent advances of tea (Camellia sinensis) biotechnology</article-title>. <source>Plant Cell Tiss. Org. Cult.</source> <volume>76</volume>, <fpage>195</fpage>&#x2013;<lpage>254</lpage>. doi: <pub-id pub-id-type="doi">10.1023/B:TICU.0000009254.87882.71</pub-id></citation></ref>
<ref id="ref27"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ochanda</surname> <given-names>S. O.</given-names></name> <name><surname>Wanyoko</surname> <given-names>J. K.</given-names></name> <name><surname>Ruto</surname> <given-names>H. K.</given-names></name></person-group> (<year>2015</year>). <article-title>Effect of spices on consumer acceptability of purple tea (Camellia sinensis)</article-title>. <source>Food Nutr. Sci.</source> <italic>06</italic>, <fpage>703</fpage>&#x2013;<lpage>711</lpage>. doi: <pub-id pub-id-type="doi">10.4236/fns.2015.68073</pub-id></citation></ref>
<ref id="ref28"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pickrell</surname> <given-names>J. K.</given-names></name> <name><surname>Pritchard</surname> <given-names>J. K.</given-names></name></person-group> (<year>2012</year>). <article-title>Inference of population splits and mixtures from genome-wide allele frequency data</article-title>. <source>PLoS Genetics.</source> <volume>8</volume>:<fpage>e1002967</fpage>  doi: <pub-id pub-id-type="doi">10.1371/journal.pgen.1002967</pub-id></citation></ref>
<ref id="ref29"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Purcell</surname> <given-names>S.</given-names></name> <name><surname>Neale</surname> <given-names>B.</given-names></name> <name><surname>Todd-Brown</surname> <given-names>K.</given-names></name> <name><surname>Thomas</surname> <given-names>L.</given-names></name> <name><surname>Ferreira</surname> <given-names>M. A.</given-names></name> <name><surname>Bender</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2007</year>). <article-title>PLINK: a tool set for whole-genome association and population-based linkage analyses</article-title>. <source>Am. J. Hum. Genet.</source> <volume>81</volume>, <fpage>559</fpage>&#x2013;<lpage>575</lpage>. doi: <pub-id pub-id-type="doi">10.1086/519795</pub-id>, PMID: <pub-id pub-id-type="pmid">17701901</pub-id></citation></ref>
<ref id="ref30"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ren</surname> <given-names>G.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Ridout</surname> <given-names>K.</given-names></name> <name><surname>Serrano-Serrano</surname> <given-names>M. L.</given-names></name> <name><surname>Yang</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Large-scale whole-genome resequencing unravels the domestication history of Cannabis sativa</article-title>. <source>Sci. Adv.</source> <volume>7</volume>:<fpage>eabg2286</fpage>. doi: <pub-id pub-id-type="doi">10.1126/sciadv.abg2286</pub-id>, PMID: <pub-id pub-id-type="pmid">34272249</pub-id></citation></ref>
<ref id="ref31"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schiffels</surname> <given-names>S.</given-names></name> <name><surname>Durbin</surname> <given-names>R.</given-names></name></person-group> (<year>2014</year>). <article-title>Inferring human population size and separation history from multiple genome sequences</article-title>. <source>Nat. Genet.</source> <volume>46</volume>, <fpage>919</fpage>&#x2013;<lpage>925</lpage>. doi: <pub-id pub-id-type="doi">10.1038/ng.3015</pub-id>, PMID: <pub-id pub-id-type="pmid">24952747</pub-id></citation></ref>
<ref id="ref32"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Slatkin</surname> <given-names>M.</given-names></name></person-group> (<year>2008</year>). <article-title>Linkage disequilibrium&#x2014;understanding the evolutionary past and mapping the medical future</article-title>. <source>Nat. Rev. Genet.</source> <volume>9</volume>, <fpage>477</fpage>&#x2013;<lpage>485</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nrg2361</pub-id>, PMID: <pub-id pub-id-type="pmid">18427557</pub-id></citation></ref>
<ref id="ref33"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Feng</surname> <given-names>H.</given-names></name> <name><surname>Chang</surname> <given-names>Y.</given-names></name> <name><surname>Ma</surname> <given-names>C.</given-names></name> <name><surname>Wang</surname> <given-names>L.</given-names></name> <name><surname>Hao</surname> <given-names>X.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Population sequencing enhances understanding of tea plant evolution</article-title>. <source>Nat. Commun.</source> <volume>11</volume>, <fpage>1</fpage>&#x2013;<lpage>10</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-020-18228-8</pub-id></citation></ref>
<ref id="ref34"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>K.</given-names></name> <name><surname>Li</surname> <given-names>M.</given-names></name> <name><surname>Hakonarson</surname> <given-names>H.</given-names></name></person-group> (<year>2010</year>). <article-title>ANNOVAR: functional annotation of genetic variants from high-throughput sequencing data</article-title>. <source>Nucleic Acids Res.</source> <volume>38</volume>:<fpage>e164</fpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkq603</pub-id>, PMID: <pub-id pub-id-type="pmid">20601685</pub-id></citation></ref>
<ref id="ref35"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wei</surname> <given-names>C.</given-names></name> <name><surname>Yang</surname> <given-names>H.</given-names></name> <name><surname>Wang</surname> <given-names>S.</given-names></name> <name><surname>Zhao</surname> <given-names>J.</given-names></name> <name><surname>Liu</surname> <given-names>C.</given-names></name> <name><surname>Gao</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Draft genome sequence of Camellia sinensis var. sinensis provides insights into the evolution of the tea genome and tea quality</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>115</volume>, <fpage>E4151</fpage>&#x2013;<lpage>E4158</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.1719622115</pub-id></citation></ref>
<ref id="ref36"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Weir</surname> <given-names>B. S.</given-names></name> <name><surname>Hill</surname> <given-names>W. G.</given-names></name></person-group> (<year>2002</year>). <article-title>Estimating F-statistics</article-title>. <source>Annu. Rev. Genet.</source> <volume>36</volume>, <fpage>721</fpage>&#x2013;<lpage>750</lpage>. doi: <pub-id pub-id-type="doi">10.1146/annurev.genet.36.050802.093940</pub-id>, PMID: <pub-id pub-id-type="pmid">12359738</pub-id></citation></ref>
<ref id="ref37"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Willson</surname> <given-names>K.C.</given-names></name> <name><surname>Clifford</surname> <given-names>M.N.</given-names></name></person-group> (<year>2012</year>). <source>Tea: Cultivation to Consumption</source>. <publisher-loc>Berlin/Heidelberg</publisher-loc>: <publisher-name>Springer Science &#x0026; Business Media</publisher-name>.</citation></ref>
<ref id="ref38"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xia</surname> <given-names>E.</given-names></name> <name><surname>Tong</surname> <given-names>W.</given-names></name> <name><surname>Hou</surname> <given-names>Y.</given-names></name> <name><surname>An</surname> <given-names>Y.</given-names></name> <name><surname>Chen</surname> <given-names>L.</given-names></name> <name><surname>Wu</surname> <given-names>Q.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>The reference genome of tea plant and resequencing of 81 diverse accessions provide insights into its genome evolution and adaptation</article-title>. <source>Mol. Plant</source> <volume>13</volume>, <fpage>1013</fpage>&#x2013;<lpage>1026</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.molp.2020.04.010</pub-id>, PMID: <pub-id pub-id-type="pmid">32353625</pub-id></citation></ref>
<ref id="ref39"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xia</surname> <given-names>E.-H.</given-names></name> <name><surname>Zhang</surname> <given-names>H.-B.</given-names></name> <name><surname>Sheng</surname> <given-names>J.</given-names></name> <name><surname>Li</surname> <given-names>K.</given-names></name> <name><surname>Zhang</surname> <given-names>Q.-J.</given-names></name> <name><surname>Kim</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>The tea tree genome provides insights into tea flavor and independent evolution of caffeine biosynthesis</article-title>. <source>Mol. Plant</source> <volume>10</volume>, <fpage>866</fpage>&#x2013;<lpage>877</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.molp.2017.04.002</pub-id>, PMID: <pub-id pub-id-type="pmid">28473262</pub-id></citation></ref>
<ref id="ref40"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Lee</surname> <given-names>S. H.</given-names></name> <name><surname>Goddard</surname> <given-names>M. E.</given-names></name> <name><surname>Visscher</surname> <given-names>P. M.</given-names></name></person-group> (<year>2011</year>). <article-title>GCTA: a tool for genome-wide complex trait analysis</article-title>. <source>Am. J. Hum. Genet.</source> <volume>88</volume>, <fpage>76</fpage>&#x2013;<lpage>82</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ajhg.2010.11.011</pub-id>, PMID: <pub-id pub-id-type="pmid">21167468</pub-id></citation></ref>
<ref id="ref41"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>C.</given-names></name> <name><surname>Dong</surname> <given-names>S.-S.</given-names></name> <name><surname>Xu</surname> <given-names>J.-Y.</given-names></name> <name><surname>He</surname> <given-names>W.-M.</given-names></name> <name><surname>Yang</surname> <given-names>T.-L.</given-names></name></person-group> (<year>2019</year>). <article-title>PopLDdecay: a fast and effective tool for linkage disequilibrium decay analysis based on variant call format files</article-title>. <source>Bioinformatics</source> <volume>35</volume>, <fpage>1786</fpage>&#x2013;<lpage>1788</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/bty875</pub-id>, PMID: <pub-id pub-id-type="pmid">30321304</pub-id></citation></ref>
<ref id="ref42"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Q.-J.</given-names></name> <name><surname>Li</surname> <given-names>W.</given-names></name> <name><surname>Li</surname> <given-names>K.</given-names></name> <name><surname>Nan</surname> <given-names>H.</given-names></name> <name><surname>Shi</surname> <given-names>C.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2020a</year>). <article-title>The chromosome-level reference genome of tea tree unveils recent bursts of non-autonomous LTR retrotransposons in driving genome size evolution</article-title>. <source>Mol. Plant</source> <volume>13</volume>, <fpage>935</fpage>&#x2013;<lpage>938</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.molp.2020.04.009</pub-id>, PMID: <pub-id pub-id-type="pmid">32353626</pub-id></citation></ref>
<ref id="ref43"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>W.</given-names></name> <name><surname>Rong</surname> <given-names>J.</given-names></name> <name><surname>Wei</surname> <given-names>C.</given-names></name> <name><surname>Gao</surname> <given-names>L.</given-names></name> <name><surname>Chen</surname> <given-names>J.</given-names></name></person-group> (<year>2018</year>). <article-title>Domestication origin and spread of cultivated tea plants</article-title>. <source>Biodivers. Sci.</source> <volume>26</volume>, <fpage>357</fpage>&#x2013;<lpage>372</lpage>. doi: <pub-id pub-id-type="doi">10.17520/biods.2018006</pub-id></citation></ref>
<ref id="ref44"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>W.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Qiu</surname> <given-names>H.</given-names></name> <name><surname>Guo</surname> <given-names>Y.</given-names></name> <name><surname>Wan</surname> <given-names>H.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <etal/></person-group>. (<year>2020b</year>). <article-title>Genome assembly of wild tea tree DASZ reveals pedigree and selection history of tea varieties</article-title>. <source>Nat. Commun.</source> <volume>11</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-020-17498-6</pub-id></citation></ref>
<ref id="ref45"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>H.</given-names></name> <name><surname>Sun</surname> <given-names>S.</given-names></name> <name><surname>Ding</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Yue</surname> <given-names>X.</given-names></name> <name><surname>Du</surname> <given-names>X.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Analysis of 427 genomes reveals moso bamboo population structure and genetic basis of property traits</article-title>. <source>Nat. Commun.</source> <volume>12</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-021-25795-x</pub-id></citation></ref>
<ref id="ref46"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>D.-W.</given-names></name> <name><surname>Yang</surname> <given-names>J.-B.</given-names></name> <name><surname>Yang</surname> <given-names>S.-X.</given-names></name> <name><surname>Kato</surname> <given-names>K.</given-names></name> <name><surname>Luo</surname> <given-names>J.-P.</given-names></name></person-group> (<year>2014</year>). <article-title>Genetic diversity and domestication origin of tea plant Camellia taliensis (Theaceae) as revealed by microsatellite markers</article-title>. <source>BMC Plant Biol.</source> <volume>14</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. doi: <pub-id pub-id-type="doi">10.1186/1471-2229-14-14</pub-id></citation></ref>
<ref id="ref47"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>J. -G.</given-names></name> <name><surname>Zhu</surname> <given-names>Y.-X.</given-names></name></person-group> (<year>2007</year>). <source>Introduction to Tea Science.</source> <publisher-loc>China Chinese Medicine Press</publisher-loc>: <publisher-name>China Chinese Medicine Press.</publisher-name>, PMID: <pub-id pub-id-type="pmid">20601685</pub-id></citation></ref>
<ref id="ref48"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>Z.</given-names></name> <name><surname>Jiang</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Gou</surname> <given-names>Z.</given-names></name> <name><surname>Lyu</surname> <given-names>J.</given-names></name> <name><surname>Li</surname> <given-names>W.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>Resequencing 302 wild and cultivated accessions identifies genes related to domestication and improvement in soybean</article-title>. <source>Nat. Biotechnol.</source> <volume>33</volume>, <fpage>408</fpage>&#x2013;<lpage>414</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nbt.3096</pub-id>, PMID: <pub-id pub-id-type="pmid">25643055</pub-id></citation></ref>
</ref-list>
<fn-group><fn id="fn0005"><p><sup>1</sup><ext-link xlink:href="https://sourceforge.net/projects/picard/" ext-link-type="uri">https://sourceforge.net/projects/picard/</ext-link> (Accessed August 26, 2022).</p></fn><fn id="fn0006"><p><sup>2</sup><ext-link xlink:href="https://gatk.broadinstitute.org/" ext-link-type="uri">https://gatk.broadinstitute.org/</ext-link> (Accessed August 26, 2022).</p></fn>
<fn id="fn0007"><p><sup>3</sup><ext-link xlink:href="http://itol.embl.de" ext-link-type="uri">http://itol.embl.de</ext-link> (Accessed August 26, 2022).</p></fn>
<fn id="fn0008"><p><sup>4</sup><ext-link xlink:href="https://www.worldclim.org/" ext-link-type="uri">https://www.worldclim.org/</ext-link> (Accessed August 26, 2022).</p></fn>
<fn id="fn0009"><p><sup>5</sup><ext-link xlink:href="http://cran.r-project.org/web/packages/geosphere/index.html" ext-link-type="uri">http://cran.r-project.org/web/packages/geosphere/index.html</ext-link> (Accessed August 26, 2022).</p></fn>
<fn id="fn0010"><p><sup>6</sup><ext-link xlink:href="http://CRAN.Rproject.orgd/package=vegan" ext-link-type="uri">http://CRAN.Rproject.orgd/package=vegan</ext-link> (Accessed August 26, 2022).</p></fn>
<fn id="fn0011"><p><sup>7</sup><ext-link xlink:href="https://github.com/stschiff/" ext-link-type="uri">https://github.com/stschiff/</ext-link> msmc2 (Accessed August 26, 2022).</p></fn>
<fn id="fn0012"><p><sup>8</sup><ext-link xlink:href="https://github.com/stschiff/msmc-tools" ext-link-type="uri">https://github.com/stschiff/msmc-tools</ext-link> (Accessed August 26, 2022).</p></fn></fn-group>
</back>
</article>