<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2022.996265</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Genome-wide characterization and sequence polymorphism analyses of cysteine-rich poly comb-like protein in <italic>Glycine max</italic></article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Nisar</surname> <given-names>Tayyaba</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Tahir</surname> <given-names>Muhammad Hammad Nadeem</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Iqbal</surname> <given-names>Shahid</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1957944/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Sajjad</surname> <given-names>Muhammad</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/403405/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Nadeem</surname> <given-names>Muhammad Azhar</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/446948/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Qanmber</surname> <given-names>Ghulam</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1241318/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Baig</surname> <given-names>Ayesha</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1888326/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Khan</surname> <given-names>Zulqurnain</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1285639/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhao</surname> <given-names>Zhengyun</given-names></name>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Geng</surname> <given-names>Zhide</given-names></name>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x002A;</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Ur Rehman</surname> <given-names>Shoaib</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1209984/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Institute of Plant Breeding and Biotechnology, Muhammad Nawaz Shareef (MNS) University of Agriculture</institution>, <addr-line>Multan</addr-line>, <country>Pakistan</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Biosciences, Commission on Science and Technology for Sustainable Development in the South (COMSATS) University Islamabad</institution>, <addr-line>Islamabad</addr-line>, <country>Pakistan</country></aff>
<aff id="aff3"><sup>3</sup><institution>Faculty of Agricultural Sciences and Technologies, Sivas University of Science and Technology</institution>, <addr-line>Sivas</addr-line>, <country>Turkey</country></aff>
<aff id="aff4"><sup>4</sup><institution>State Key Laboratory of Cotton Biology, Cotton Research Institute of Chinese Academy of Agricultural Sciences</institution>, <addr-line>Anyang</addr-line>, <country>China</country></aff>
<aff id="aff5"><sup>5</sup><institution>Department of Biotechnology, Commission on Science and Technology for Sustainable Development in the South (COMSATS), University Islamabad, Abbottabad Campus</institution>, <addr-line>Abbottabad</addr-line>, <country>Pakistan</country></aff>
<aff id="aff6"><sup>6</sup><institution>Institute of Food Crops, Yunnan Academy of Agricultural Sciences</institution>, <addr-line>Kunming</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Engin Yol, Akdeniz University, Turkey</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Nastaran Mehri, Ardebil Agriculture and Natural Resources Research Center (AREEO), Iran; Shengwu Hu, Northwest A&#x0026;F University, China; Sunny Ahmar, University of Silesia in Katowice, Poland</p></fn>
<corresp id="c001">&#x002A;Correspondence: Shoaib Ur Rehman, <email>shoaib.rehman@mnsuam.edu.pk</email></corresp>
<corresp id="c002">Zhide Geng, <email>gengzd2002@163.com</email></corresp>
<fn fn-type="other" id="fn004"><p>This article was submitted to Plant Breeding, a section of the journal Frontiers in Plant Science</p></fn>
</author-notes>
<pub-date pub-type="epub">
<day>20</day>
<month>09</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>13</volume>
<elocation-id>996265</elocation-id>
<history>
<date date-type="received">
<day>17</day>
<month>07</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>15</day>
<month>08</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2022 Nisar, Tahir, Iqbal, Sajjad, Nadeem, Qanmber, Baig, Khan, Zhao, Geng and Ur Rehman.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Nisar, Tahir, Iqbal, Sajjad, Nadeem, Qanmber, Baig, Khan, Zhao, Geng and Ur Rehman</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>Cysteine-rich poly comb-like protein (<italic>CPP</italic>) is a member of cysteine-rich transcription factors that regulates plant growth and development. In the present work, we characterized twelve <italic>CPP</italic> transcription factors encoding genes in soybean (<italic>Glycine max</italic>). Phylogenetic analyses classified <italic>CPP</italic> genes into six clades. Sequence logos analyses between <italic>G. max</italic> and <italic>G. soja</italic> amino acid residues exhibited high conservation. The presence of growth and stress-related <italic>cis</italic>-acting elements in the upstream regions of <italic>GmCPPs</italic> highlight their role in plant development and tolerance against abiotic stress. <italic>Ka/Ks</italic> levels showed that <italic>GmCPPs</italic> experienced limited selection pressure with limited functional divergence arising from segmental or whole genome duplication events. By using the PAN-genome of soybean, a single nucleotide polymorphism was identified in <italic>GmCPP-6</italic>. To perform high throughput genotyping, a kompetitive allele-specific PCR (KASP) marker was developed. Association analyses indicated that <italic>GmCPP-6-T</italic> allele of <italic>GmCPP-6</italic> (in exon region) was associated with higher thousand seed weight under both water regimes (well-water and water-limited). Taken together, these results provide vital information to further decipher the biological functions of <italic>CPP</italic> genes in soybean molecular breeding.</p>
</abstract>
<kwd-group>
<kwd>soybean</kwd>
<kwd>phylogenetic analyses</kwd>
<kwd>kompetitive allele specific PCR</kwd>
<kwd>association analyses</kwd>
<kwd>drought</kwd>
<kwd><italic>GmCPP</italic></kwd>
</kwd-group>
<counts>
<fig-count count="7"/>
<table-count count="0"/>
<equation-count count="0"/>
<ref-count count="53"/>
<page-count count="13"/>
<word-count count="6487"/>
</counts>
</article-meta>
</front>
<body>
<sec id="S1" sec-type="intro">
<title>Introduction</title>
<p>Identification and genome-wide characterization of plant transcription factors (TFs) bear vital significance (<xref ref-type="bibr" rid="B25">Qanmber et al., 2019</xref>). In plants, TFs play a central role in various developmental processes as well as a stress response (<xref ref-type="bibr" rid="B10">Green et al., 1987</xref>).</p>
<p>Cysteine-rich polycomb-like proteins (<italic>CPP</italic>-like) belong to a small TFs family characterized by the presence of one or two similar Cys-rich domains known as the CXC domain (also known as the CRC domain), and the TCR motif (<xref ref-type="bibr" rid="B7">Cvitanich et al., 2000</xref>; <xref ref-type="bibr" rid="B11">Hauser et al., 2000</xref>; <xref ref-type="bibr" rid="B35">Sijacic et al., 2011</xref>). Plants and animals contain members of this family, but prokaryotes, yeasts, and fungi lack them. CXC domains of <italic>CPP</italic>-like proteins are highly conserved in different genera and species (<xref ref-type="bibr" rid="B36">Song et al., 2000</xref>; <xref ref-type="bibr" rid="B1">Andersen et al., 2007</xref>). <italic>CPP</italic>-like genes are involved in plant development, and in cell division control. <italic>CPP</italic> TFs is a small gene family that includes tesmin/<italic>TSO1</italic>-like CXC (TCX) proteins (<xref ref-type="bibr" rid="B1">Andersen et al., 2007</xref>). In many plant species, <italic>CPP</italic> TFs have been discovered to have a variety of functions. <italic>TSO1</italic>, the first <italic>CPP</italic> TFs, were identified and characterized in <italic>Arabidopsis thaliana</italic> using map-based cloning, and its biological functions were explored through mutant screening (<xref ref-type="bibr" rid="B29">Riechmann et al., 2000</xref>). <italic>TSO1</italic> gene is mainly expressed in flowers, in developing ovules and microspores. The <italic>tso1</italic> mutants show deficiencies in karyokinesis and cytokinesis, as well as a loss of control over directional cellular expansion and coordination of adjacent cell growth (<xref ref-type="bibr" rid="B29">Riechmann et al., 2000</xref>; <xref ref-type="bibr" rid="B35">Sijacic et al., 2011</xref>).</p>
<p>Many <italic>CPP</italic> genes have been identified in various plant species, including <italic>A. thaliana</italic> (<xref ref-type="bibr" rid="B12">Hauser et al., 1998</xref>), <italic>Oryza sativa</italic> (<xref ref-type="bibr" rid="B50">Yang Z. et al., 2008</xref>), <italic>Zea mays</italic> (<xref ref-type="bibr" rid="B38">Song et al., 2016b</xref>), and <italic>Glycine max</italic> (<xref ref-type="bibr" rid="B51">Zhang et al., 2015</xref>). Cucumber (<italic>Cucumis sativus</italic>) plant is susceptible to abiotic stresses due to its high transpiration rate (<xref ref-type="bibr" rid="B53">Zhou et al., 2017</xref>). Gene expression of <italic>CsCPP</italic> genes is upregulated in response to abiotic stresses like salt, cold, drought, and ABA, suggesting that <italic>CsCPPs</italic> may play a role in abiotic stress responses (<xref ref-type="bibr" rid="B48">Yang et al., 2019</xref>).</p>
<p>Soybean (<italic>G. max</italic> L.) belongs to the leguminous family and is a prominent source of edible oil and is cultivated in different parts of the world (<xref ref-type="bibr" rid="B9">Fehr and Caviness, 1977</xref>; <xref ref-type="bibr" rid="B37">Song et al., 2016a</xref>). Soybean seed contains 35% protein and 18% oil contents (<xref ref-type="bibr" rid="B46">Wilson, 2004</xref>). Abiotic stress factors are major limiting elements affecting its yield and quality. The role of <italic>CPP</italic>-like protein has been reported in the growth and development of <italic>A. thaliana</italic>, <italic>O. sativa</italic>, <italic>Z. mays</italic>, and <italic>C. sativus</italic>. Although <italic>CPP</italic>-like genes have been identified in soybean, more work is required to further decipher their function in <italic>G. max.</italic> The availability of the soybean pan-genome is expected to pave the way for molecular breeding in soybean (<xref ref-type="bibr" rid="B33">Schmutz et al., 2010</xref>). Although molecular markers are available, their deployment in soybean molecular breeding remains limited because of cost ineffectiveness while exploring large populations. Kompetitive Allele Specific PCR (KASP), is a high-throughput and breeder-friendly genotyping platform (<xref ref-type="bibr" rid="B24">Neelam et al., 2013</xref>). KASP offers cost-effective genotyping by eliminating the need for post-PCR handling (<xref ref-type="bibr" rid="B22">Majeed et al., 2018</xref>).</p>
<p>In this study, we characterized twelve <italic>GmCPP</italic> genes and performed systematic analyses using genome-wide structure depiction and sequence polymorphism investigations. We analyzed <italic>GmCPPs</italic> to explore evolutionary relationships, gene structure, conserved motifs, gene duplication, and association of sequence polymorphism with the studied soybean phenotypic traits under well-water (WW) and water limited (WL) conditions. The present work will assist to underpin the evolution of <italic>GmCPPs</italic> and provide information on <italic>GmCPP</italic> genes to be used in soybean molecular breeding.</p>
</sec>
<sec id="S2" sec-type="materials|methods">
<title>Materials and methods</title>
<sec id="S2.SS1">
<title>Sequence identification</title>
<p>The <italic>CPP</italic> gene and encoded proteins in various species like <italic>G. max, O. sativa</italic>, <italic>Z. mays</italic>, <italic>A. thaliana</italic>, <italic>Brassica rapa, G. soja, Cajanus Cajan</italic>, <italic>Chlamydomonas reinhardtii</italic>, <italic>and Selaginella moellendorffi</italic> were downloaded from plant transcription database.<sup><xref ref-type="fn" rid="footnote1">1</xref></sup> To confirm the retrieved CPP proteins, local BLASTp, NCBI Batch CD-search, Interproscan V. 63<sup><xref ref-type="fn" rid="footnote2">2</xref></sup> and SMART<sup><xref ref-type="fn" rid="footnote3">3</xref></sup> were also used. Non-redundant gene members were selected and the rest were excluded for further analyses. Other biophysical characteristics i.e., protein length, molecular weight (MW), isoelectric point (pI), and gravity values for <italic>GmCPP</italic>s were extracted using ExPASy Protparam Tool.<sup><xref ref-type="fn" rid="footnote4">4</xref></sup> Furthermore, sub-cellular localization of <italic>GmCPPs</italic> was also identified using the Softberry<sup><xref ref-type="fn" rid="footnote5">5</xref></sup> and CELLO V2.5 web-tool.<sup><xref ref-type="fn" rid="footnote6">6</xref></sup></p>
</sec>
<sec id="S2.SS2">
<title>Sequence alignment and evolutionary analysis</title>
<p>Full-length amino acid sequences of all studied species were aligned and two phylogenetic trees were generated using MEGA X using the maximum likelihood method (ML) following parameters as reported by <xref ref-type="bibr" rid="B17">Kumar et al. (2018)</xref>. The bootstrap method (1,000 replications) was used to determine the dependability of clades. Graphical representation of multiple sequence alignment of conserved CPP amino acid residues in <italic>G. max</italic> and <italic>G. soja</italic> was performed separately by the Clustal W program (<xref ref-type="bibr" rid="B40">Tamura et al., 2011</xref>) and WEBLOG webtool.<sup><xref ref-type="fn" rid="footnote7">7</xref></sup></p>
</sec>
<sec id="S2.SS3">
<title>Gene structure, protein motif, and <italic>cis</italic>-element analyses</title>
<p>To explore exon/intron structure, bed-files from databases were obtained and analyzed using GSDS 2.0.<sup><xref ref-type="fn" rid="footnote8">8</xref></sup> Protein motif distributions were determined using the online MEME tool.<sup><xref ref-type="fn" rid="footnote9">9</xref></sup> For <italic>cis-</italic>element analyses, &#x223C;2 kb upstream regions were analyzed in the PlantCARE database (<xref ref-type="bibr" rid="B19">Lescot et al., 2002</xref>) and the elements were characterized on the basis of their predicted biological functions, and graphical representation was done by using TBtool software.</p>
</sec>
<sec id="S2.SS4">
<title>Gene duplication and synteny analysis</title>
<p>To determine the chromosomal distribution of <italic>GmCPPs</italic>, extracted gff3-files of soybean genome annotation were downloaded from SoyBase (<ext-link ext-link-type="uri" xlink:href="http://SoyBase.org">SoyBase.org</ext-link>). Gene duplication analyses were performed following the methods as reported previously (<xref ref-type="bibr" rid="B49">Yang et al., 2017</xref>). CIRCOS was used to create the figure and <italic>Ka/Ks</italic> values were calculated using PAL2NAL (<xref ref-type="bibr" rid="B39">Suyama et al., 2006</xref>; <xref ref-type="bibr" rid="B16">Krzywinski et al., 2009</xref>).</p>
</sec>
<sec id="S2.SS5">
<title>Soybean plant material and phenotyping</title>
<p>A set of 46 soybean accessions were planted at MNS University of Agriculture, Multan in the springs of 2021 (2021-UAM). Field experiments were carried out under WL and WW experimental units following Augmented design (Check = UAMSB200). The WL experimental units were subjected to drought especially at the flowering stage, whereas, WW experimental units were irrigated after every fortnight (depending upon water requirement). Each soybean genotype was planted on two beds (length &#x00D7; width = 15 &#x00D7; 2.5 ft) on both sides. Plant-to-plant distance was maintained at a distance of 1 ft. Phenotypic data were recorded for plant height, thousand seed weight, pods<sup>&#x2013;1</sup> plant, seeds<sup>&#x2013;1</sup> pod, seed weight<sup>&#x2013;1</sup> pod, seed length, seed thickness, seed width, and pod length from both water regimes.</p>
</sec>
<sec id="S2.SS6">
<title>Development of single nucleotide polymorphism based kompetitive allele-specific polymerase chain reaction markers for <italic>GmCPP</italic></title>
<p>Genomic DNA of the investigated soybean germplasm was extracted from young seedling leaves using the CTAB method (<xref ref-type="bibr" rid="B18">Lecharny et al., 2003</xref>). The quality of extracted DNA was initially checked by using NANO-Drop (K5800C Micro-Spectrophotometer) followed by running the extracted DNA on 1.0% agarose gel.</p>
<p>The whole genome sequence of three cultivars of soybean (Williams-82, Lee, and Zhonghuang-13) was downloaded from SoyBase.<sup><xref ref-type="fn" rid="footnote10">10</xref></sup> Local BLAST was performed to identify the sequences of <italic>GmCPPs</italic> in the aforementioned soybean genotypes. For the identification of sequence polymorphism, multiple sequence alignment was performed using the Seqman program in the DNASTAR Lasergene package. Standard kompetitive allele-specific PCR (KASP) guidelines<sup><xref ref-type="fn" rid="footnote11">11</xref></sup> were followed for the development of KASP primers on the identified single nucleotide polymorphism (SNP) of <italic>GmCPP6</italic>. Allele-specific primers were developed having standard HEX and FAM tails with a targeted SNP at three prime ends. Two reverse primers (allele-specific) and one common forward primer were designed so that the total fragment length was less than 100 bp. The standard KASP reaction mixture, KASP assay, and PCR conditions were followed as reported by <xref ref-type="bibr" rid="B28">Rasheed et al. (2016)</xref>, <xref ref-type="bibr" rid="B22">Majeed et al. (2018)</xref>, <xref ref-type="bibr" rid="B42">Ur Rehman et al. (2019</xref>, <xref ref-type="bibr" rid="B41">2021)</xref>, and <xref ref-type="bibr" rid="B15">Irshad et al. (2019</xref>, <xref ref-type="bibr" rid="B14">2021)</xref>.</p>
</sec>
<sec id="S2.SS7">
<title>Statistical analyses</title>
<p>Phenotypic data were analyzed with XLSTAT Software 2014. Student&#x2019;s <italic>t-</italic>test at <italic>p</italic> less than 0.05 was used to check the effect of each allelic variation on the recorded phenotypic traits.</p>
</sec>
</sec>
<sec id="S3" sec-type="results">
<title>Results</title>
<sec id="S3.SS1">
<title>Identification of cysteine-rich polycomb-like protein gene family members in different species</title>
<p>We identified a total of 81 <italic>CPP</italic> genes in nine investigated species including chlorophytes (<italic>C. reinhardtii</italic>), lycophytes (<italic>S. moellendorffii</italic>), <italic>Brassicaceae</italic> (<italic>A. thaliana</italic> and <italic>B. rapa</italic>), <italic>Fabaceae</italic> (<italic>G. max, G. soja</italic>, and <italic>C. cajan</italic>), and <italic>Poaceae</italic> (<italic>Z. mays</italic> and <italic>O. sativa</italic>). Among these, 12 <italic>CPP</italic> genes were shortlisted in <italic>G. max</italic>, 10 in <italic>G. soja</italic>, 16 in <italic>B. rapa</italic>, 11 each in <italic>Z. mays</italic> and <italic>O. sativa</italic>, eight in <italic>A. thaliana</italic>, six in <italic>C. cajan</italic>, four in <italic>S. meollendorffii</italic>, and three in <italic>C. reinhardtii</italic>. A higher number of <italic>CPPs</italic> were identified in <italic>G. max</italic> as compared to chlorophytes and lycophytes indicating a duplication effect on <italic>GmCPPs</italic> in <italic>G. max</italic>. These findings also signify that <italic>CPPs</italic> experienced extension in higher plants. The transcription factor ID, taxonomic ID, and predict sub-cellular localization are presented in <xref ref-type="supplementary-material" rid="TS1">Supplementary Table 1</xref>. These results showed that the <italic>GmCPP</italic> coding sequence ranged from 1,656 to 2,715 bp for <italic>GmCPP-6</italic> and <italic>GmCPP-11</italic>, respectively. Similarly, an amino acid number of <italic>GmCPP</italic> genes ranged from 483 to 904 for <italic>GmCPP-5</italic> and <italic>GmCPP-11</italic>, respectively. Molecular weight ranged from 54,034.67 to 98,476.32 kDa for <italic>GmCPP12</italic> and <italic>GmCPP-4</italic>, respectively. The isoelectric point of <italic>GmCPP4</italic> was the highest (9.2) and that of <italic>GmCPP-1</italic> was the lowest (5.41). The grand averages of hydropathicity values of all <italic>GmCPPs</italic> were less than zero and ranged from -0.734 for <italic>GmCPP-11</italic> to -0.511 for <italic>GmCPP-4</italic>. In addition, all <italic>GmCPPs</italic> are localized in the nucleus, except <italic>GmCPP-1</italic> and 10.</p>
</sec>
<sec id="S3.SS2">
<title>Phylogenetic analyses of cysteine-rich polycomb-like protein gene family</title>
<p>The phylostratum analyses of <italic>CPP</italic> genes identified the primitive lineage as <italic>CPP</italic> genes, which were also identified in chlorophyte (<italic>C. reinhardtii</italic>) (<xref ref-type="fig" rid="F1">Figure 1</xref>). Further, the <italic>CPP</italic> genes were identified in lychophytes, dicots, and monocots. These outcomes signified that <italic>CPPs</italic> originated from early plants phylostratum and possible orthologs are present throughout kingdom Plantae. An evolutionary tree was generated to determine the phylogenetic relationship among the studied <italic>CPPs</italic>. To indicate the <italic>CPPs</italic> from <italic>C. reinhardtii</italic>, <italic>S. moellendorffii</italic>, <italic>A. thaliana</italic>, <italic>B. rapa</italic>, <italic>G. max, G. soja</italic>, <italic>C. cajan, Z. mays</italic>, and <italic>O. sativa</italic> the prefixes Cr, Sm, At, Br, Gm, Gs, Cc, Zm, and Os were used correspondingly. The phylogenetic analyses divided 81 genes into six clades based on sequence similarities (<xref ref-type="fig" rid="F1">Figure 1</xref>). Clade-I comprised 16 members, Clade-II possessed 13 members, Clade-III contained 14 members, Clade-IV, and V had 15 members each, and Clade-VI contained eight members. Clade-I lacks genes from chlorophytes which suggested the evolution of <italic>CPP</italic> genes after the split of chlorophyte. Interestingly, <italic>CPP</italic> genes from monocot and dicot species were unsystematically distributed to all clades. Further, phylogenetic analyses indicated that <italic>G. max</italic> and <italic>B. rapa</italic> experienced gene family expansion since both have more <italic>CPP</italic> genes compared to other studied organisms.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption><p>Phylogenetic tree of <italic>GmCPPs</italic> from nine different species. Phylostartum analyses of <italic>CPP</italic> gene family <bold>(Upper portion)</bold>. Phylogenetic and evolutionary relationship of <italic>CPP</italic> gene family in soybean and other plant species <bold>(Lower portion)</bold>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-996265-g001.tif"/>
</fig>
<p>To explore the conservation of each amino acid residue in GmCPP and GsCPP, multiple sequence alignment was executed to generate sequence logos in <italic>G. max</italic> and <italic>G. soja</italic>. The outcomes showed that the amino acid residue distribution was highly similar at most of the loci among the <italic>G. max</italic> and <italic>G. soja</italic>. For example, some amino acid residues such as C [6], L [7], Y [8], C [9], C [11], F [12], A [13], N [29], A [34], and so on were found to be highly conserved (<xref ref-type="fig" rid="F2">Figure 2</xref>). Phylogenetic analyses also highlight that <italic>GmCPP</italic> and <italic>GsCPP</italic> members lie in close proximity to the evolutionary tree.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption><p>Sequence logo of GmCPP and GsCPP. Amino acid residues shared by two plant species are highly conserved. Each black letter showed the conserved amino acids at a given location.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-996265-g002.tif"/>
</fig>
</sec>
<sec id="S3.SS3">
<title>Gene structure, protein motif, and <italic>cis</italic>-acting element analysis</title>
<p>It has been well-documented that intron-exon distribution arrangement in a gene is related to its biological function. The intron number in <italic>GmCPP</italic> ranged from 7 to 9 (<xref ref-type="fig" rid="F3">Figure 3</xref>). Conserved domains in each sequence were identified using the CDD tool of NCBI.<sup><xref ref-type="fn" rid="footnote12">12</xref></sup> All members of the <italic>GmCPP</italic> gene family contain the TCR domain (<xref ref-type="supplementary-material" rid="FS1">Supplementary Figure 1</xref>). MEME tool was used to explore the conserved motif distributions of <italic>GmCPPs</italic>. The outcomes indicated that most of the <italic>GmCPP</italic> proteins exhibited similar motif distribution patterns such as motifs one, two, and eight exist in almost all proteins (<xref ref-type="fig" rid="F4">Figure 4</xref>). We also identified <italic>cis</italic>-acting elements in the upstream regions of <italic>GmCPPs</italic> and grouped them on the basis of their functional relevance. All <italic>GmCPPs</italic> had <italic>cis</italic>-acting elements related to plant development, stress, and light responses (<xref ref-type="supplementary-material" rid="TS2">Supplementary Table 2</xref>).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption><p>Gene structure display of <italic>GmCPP</italic>. Exon/intron structure display of <italic>GmCPP</italic> genes; Green color shows the upstream and downstream region, yellow color shows exons, black lines show introns.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-996265-g003.tif"/>
</fig>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption><p>Motifs in all <italic>GmCPP</italic> genes. Distribution of conserved motifs in <italic>GmCPP</italic> are presented in different colors.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-996265-g004.tif"/>
</fig>
</sec>
<sec id="S3.SS4">
<title>Chromosomal distribution, gene duplication, and synteny analyses</title>
<p>The 12 <italic>GmCPP</italic> genes are scattered on five chromosomes, including three of each gene on chromosomes one and four, respectively. Two each gene are on chromosomes five and 10 while one gene is present on chromosome seven (<xref ref-type="supplementary-material" rid="TS3">Supplementary Table 3</xref>). To investigate the relationship of gene pairs, we explored the gene locus on a chromosome and executed synteny analyses. Their, synteny analyses showed that <italic>GmCPP</italic> genes were highly conserved among five chromosomes (<xref ref-type="fig" rid="F5">Figure 5</xref>). Whole genome duplication, segmental duplication, and tandem duplication play a vital role in the extension of a gene family (<xref ref-type="bibr" rid="B49">Yang et al., 2017</xref>). To investigate the expansion of the <italic>GmCPP</italic> family in soybean, we executed gene duplication analyses in the soybean genome (<xref ref-type="supplementary-material" rid="TS3">Supplementary Table 3</xref>). Out of all studied gene pairs, 10 gene pairs were attributed to segmental duplication. We also explored the non-synonymous divergence (<italic>Ka</italic>) versus synonymous (<italic>Ks</italic>) values for the <italic>GmCPP</italic> gene pairs. It was found that nine duplicated gene pairs showed <italic>Ka/Ks</italic> values &#x003C;0.5, whereas, two duplicated gene pairs showed <italic>Ka/Ks</italic> values between 0.5 and 1.0 (<xref ref-type="supplementary-material" rid="TS3">Supplementary Table 3</xref>). Generally, <italic>Ka/Ks</italic> of the studied gene pairs were &#x003C;1, showing that the <italic>GmCPP</italic> gene family experienced purifying selection pressure with restricted functional differences.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption><p>Gene duplication and synteny analysis. Gene duplication among <italic>GmCPP</italic> genes. Blue line shows orthologous/paralogous pair. Gm01 to Gm20 shows the chromosomes of <italic>Glycine max</italic>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-996265-g005.tif"/>
</fig>
</sec>
<sec id="S3.SS5">
<title>Marker trait association analysis</title>
<p>For <italic>GmCPP-6</italic>, a polymorphic site was identified in the coding region. KASP marker was developed at the SNP site. KASP assay results showed that soybean accession having HEX tail has &#x201C;C&#x201D; allele while accession having FAM tail has &#x201C;T&#x201D; type allele (<xref ref-type="fig" rid="F6">Figure 6</xref>).</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption><p>Gene structure and KASP marker development for <italic>GmCPP-6</italic>. Red colored alphabets indicated sequence polymorphism. Nucleotides color blue indicating FAM where nucleotides colored red indicating HEX.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-996265-g006.tif"/>
</fig>
<p>Two allelic variations of <italic>GmCPP-6</italic>, i.e., <italic>GmCPP-6-T</italic> and <italic>GmCPP-6-C</italic> were identified in the studied soybean germplasm. <italic>GmCPP-6-C</italic> was the most frequently occurring allelic variation available in 58.6% of studied soybean accessions. Marker trait association analyses exhibited that at unique field sites, <italic>GmCPP-6-T</italic> was associated with higher thousand seed weight in both environments (<xref ref-type="fig" rid="F7">Figure 7</xref>).</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption><p>Phenotypic comparison of <italic>GmCPP-6</italic> allelic variations under well water (WW) and water limited (WL) conditions. The two environments were at University of Agriculture Multan (UAM) under WW and WL conditions in year 2021. &#x002A;<italic>p</italic> less than 0.05. Error bars denote standard error.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-996265-g007.tif"/>
</fig>
</sec>
</sec>
<sec id="S4" sec-type="discussion">
<title>Discussion</title>
<p>Cysteine-rich polycomb-like protein TFs are quite a small gene family involved in plant growth and stress responses (<xref ref-type="bibr" rid="B1">Andersen et al., 2007</xref>; <xref ref-type="bibr" rid="B21">Lu et al., 2013</xref>; <xref ref-type="bibr" rid="B51">Zhang et al., 2015</xref>; <xref ref-type="bibr" rid="B52">Zhou et al., 2018</xref>; <xref ref-type="bibr" rid="B23">Nan et al., 2021</xref>). In earlier studies, identification of the <italic>CPP</italic> gene family in <italic>Camellia sinensis</italic>, <italic>A. thaliana</italic>, <italic>C. sativus</italic>, and <italic>G. max</italic> has been performed. But genome-wide characterization, in relation to analyses of sequence polymorphism, has not been performed in soybean. In the present work, a comprehensive identification, characterization, and analysis of sequence polymorphism of <italic>GmCPPs</italic> was performed.</p>
<sec id="S4.SS1">
<title>Soybean cysteine-rich polycomb-like proteins are conserved during evolution</title>
<p>In the present work, we identified 81 <italic>CPP</italic> genes in nine different organisms i.e., chlorophytes (<italic>C. reinhardtii</italic>), lycophytes (<italic>S. moellendorffii</italic>), <italic>Brassicaceae</italic> (<italic>A. thaliana</italic> and <italic>B. rapa</italic>), <italic>Fabaceae</italic> (<italic>G. max, G. soja</italic>, and <italic>C. cajan</italic>), and <italic>Poaceae</italic> (<italic>Z. mays</italic> and <italic>O. sativa</italic>). Phylostratum analyses of <italic>CPPs</italic> indicated that the primitive plant pedigree as <italic>CPPs</italic> were existing in chlorophytes, showing that <italic>CPP</italic> genes originated from early land plants and ortholog genes of <italic>CPP</italic> are existing across kingdom Plantae. Phylogenetic analyses were performed to establish the evolutionary relationship among the studied species. All <italic>CPP</italic> genes were divided into six different clades which showed that most of the <italic>G. max</italic> genes exhibited a close relationship with <italic>G. soja</italic> genes and indicated that both species share a common ancestry. <italic>G. soja</italic> genome has shown to have 0.9154 GB consensus sequences, covering &#x223C;98% of <italic>G. max</italic> genome sequence (<xref ref-type="bibr" rid="B33">Schmutz et al., 2010</xref>). Gene structure analyses showed that <italic>GmCPPs</italic> have a higher number of intron which indicates that <italic>GmCPP</italic> belong to the primitive gene family group. Sequence logos for conserved amino acid residues were also highly conserved in <italic>G. max</italic> and <italic>G. soja</italic>. Both N and C terminals of <italic>GmCPPs</italic> and <italic>GsCPPs</italic> are conserved. These results indicate that the <italic>GmCPP</italic> and <italic>GsCPP</italic> genes are evolutionarily conserved which might be helpful to underpin the pattern of <italic>CPP</italic> protein sequence conservation in other members of kingdom Plantae.</p>
</sec>
<sec id="S4.SS2">
<title>Biophysical characteristics</title>
<p>The estimates of biophysical parameters of all <italic>GmCPP</italic> gene family members delivered helpful information. Biophysical properties predicted that 10 out of 12 <italic>GmCPPs</italic> were positioned in the nucleus. The values of pI and grand average of hydropathicity of all <italic>GmCPPs</italic> indicated that all <italic>CPP</italic> proteins were hydrophilic (&#x003C;0) and alkaline (&#x223C;7) (<xref ref-type="supplementary-material" rid="TS4">Supplementary Table 4</xref>).</p>
</sec>
<sec id="S4.SS3">
<title>Exon-intron and motif analyses</title>
<p>The structure of the gene is a vital component that might be contributed by deletion and/or insertion incidents (<xref ref-type="bibr" rid="B18">Lecharny et al., 2003</xref>). In past, genome-wide studies have demonstrated that the loss or gain of introns during eukaryotic divergence was widespread (<xref ref-type="bibr" rid="B30">Rogozin et al., 2003</xref>; <xref ref-type="bibr" rid="B31">Roy and Penny, 2007</xref>). Gene structure analyses showed that all <italic>GmCPP</italic> genes have varied intron lengths that might play crucial roles in the functional divergence of <italic>GmCPP</italic> genes. It has been well-documented that introns play an important role in the evolution of different species (<xref ref-type="bibr" rid="B45">William Roy and Gilbert, 2006</xref>). In the current study, we observed that the number of intron for <italic>GmCPPs</italic> ranged from seven to nine indicating that <italic>G. max</italic> has evolved a long time ago (&#x003E;Million years). Roy and Gilbert also advocated that earlier evolved species have more introns as compared with the newly evolved species (<xref ref-type="bibr" rid="B45">William Roy and Gilbert, 2006</xref>). Ten motifs were identified which showed that <italic>CPP</italic> proteins might function in different biological pathways allied with other co-factors. The motif distribution pattern of <italic>CPP</italic> proteins indicated that the distribution was relatively conserved and minimal divergence among the proteins from different groups might be linked with the specific biological function associated with soybean development and stress tolerance.</p>
<p>Transcription is governed by the binding of TFs to promoter <italic>cis</italic>-acting regulatory elements. Various studies have reported the crucial role of <italic>cis</italic>-acting elements in the processes of plant growth and stress responses (<xref ref-type="bibr" rid="B8">Fankhauser and Chory, 1997</xref>). In this study, <italic>cis</italic>-acting elements related to plant development and stress responses were identified in the upstream region of <italic>GmCPPs</italic>.</p>
</sec>
<sec id="S4.SS4">
<title>Gene duplication and selection pressure</title>
<p>The uneven distribution of <italic>GmCPP</italic> genes on chromosomes of <italic>G. max</italic> shows probable gene loss or addition through whole genome or segmental duplication incidents. It has been reported that gene duplication and divergence generally lead toward evolution (<xref ref-type="bibr" rid="B5">Chothia et al., 2003</xref>). Gene duplication creates functional differences, which is crucial for speciation and adaptableness in changing environmental conditions (<xref ref-type="bibr" rid="B6">Conant and Wolfe, 2008</xref>). Gene duplication indicates that the aligned sequences share &#x003E; 70% similarity and coverage length &#x003E; 80% of the entire length (<xref ref-type="bibr" rid="B47">Yang S. et al., 2008</xref>). The two duplicated genes present on the different chromosomes of the same sub-genome might be the consequence of segmental or whole genome duplication, whereas, their presence on the same chromosomes might be the consequence of tandem duplication (<xref ref-type="bibr" rid="B13">He et al., 2012</xref>). Tandemly duplicated genes tend to be positioned together on chromosomes whereas, in segmental or whole genome duplication, the duplicated genes are generally distributed throughout the genome (<xref ref-type="bibr" rid="B32">Schauser et al., 2005</xref>). Approximately 65 million years ago whole genome and segmental duplications in primitive plant species contributed to the expansion of a number of gene families (<xref ref-type="bibr" rid="B2">Barakat et al., 2009</xref>; <xref ref-type="bibr" rid="B44">Wang et al., 2013</xref>) and contributed genomic complexity to kingdom Plantae (<xref ref-type="bibr" rid="B4">Cannon et al., 2004</xref>).</p>
<p>In the present work, we characterized 12 <italic>GmCPP</italic> genes, three times the number of <italic>CPPs</italic> present in chlorophytes, which indicates that <italic>CPP</italic> experienced expansion during their evolution. As reported previously, expansion in the genome permitted many crop plant species to acclimatize to environmental conditions (<xref ref-type="bibr" rid="B26">Ramsey and Schemske, 1998</xref>). We noticed that segmental and whole genome duplication were the major reasons responsible for the expansion <italic>CPP</italic> gene family in <italic>G. max</italic>. Segmental type duplication is the main contributor during evolution and it has happened in numerous plant genomes which contain many duplicated chromosomal blocks (<xref ref-type="bibr" rid="B4">Cannon et al., 2004</xref>). For instance, many <italic>A. thaliana</italic> gene families experienced evolutionary dynamics that led toward gene family expansion (<xref ref-type="bibr" rid="B3">Baumberger et al., 2003</xref>; <xref ref-type="bibr" rid="B43">Wang et al., 2008</xref>). Our results showed that <italic>GmCPPs</italic> grouped into pairs (Segmental duplication) which shows an ancient expansion in the gene family in <italic>G. max</italic>. To estimate the selection and environmental pressure, non-synonymous (<italic>Ka</italic>) and synonymous (<italic>Ks</italic>) rates of substitution (<italic>Ka</italic>/<italic>Ks</italic>) were computed. We noticed that <italic>Ka</italic>/<italic>Ks</italic> values of <italic>GmCPP</italic> genes were &#x003C;1 illustrating that <italic>GmCPP</italic> gene family experienced strong purifying selection pressure.</p>
</sec>
<sec id="S4.SS5">
<title>Allelic variations influencing seed weight</title>
<p>Soybean PAN-genome might be helpful for bridging the phenotype to genotype gap in soybean breeding. Recently, PAN-genome has been used to discover genes for flowering time in <italic>G. soja</italic> (<xref ref-type="bibr" rid="B20">Li et al., 2014</xref>). Marker-assisted selection of elite alleles in breeding programs is vital for ongoing soybean breeding. The utilization of elite allelic variations in cultivars can be enriched if effective molecular platforms are available (<xref ref-type="bibr" rid="B27">Rasheed et al., 2017</xref>; <xref ref-type="bibr" rid="B22">Majeed et al., 2018</xref>; <xref ref-type="bibr" rid="B41">Ur Rehman et al., 2021</xref>). In this study, we used the PAN-genome of <italic>G. max</italic> to explore sequence polymorphism for <italic>GmCPP-6</italic>. Although, sequence polymorphism was explored in all studied <italic>GmCPP</italic> genes the allelic variation was only identified in the CDS region of <italic>GmCPP-6.</italic> Hence, all other genes were excluded for marker-trait association analyses. The absence of polymorphism in all other <italic>GmCPP</italic> genes is possibly due to allele fixation during evolution or because of the lower number of <italic>G. max</italic> accessions available for PAN-genomics studies. More work is required for further confirmation or to investigate these two possibilities. Converting sequence polymorphism to gel-free (KASP) markers enable SNPs to be more efficiently applied in selecting desirable alleles in marker-assisted breeding. Moreover, the KASP assay procedure is cost-effective. In the current study, soybean accessions having <italic>GmCPP-6-T</italic> had higher thousand seed weight under both environmental conditions i.e., WW and WL. Moreover, RNA-Seq Atlas of <italic>G. max</italic> also reported higher expression of <italic>GmCPP-6</italic> in seeds (14&#x2013;25 days after fertilization) (<xref ref-type="bibr" rid="B34">Severin et al., 2010</xref>). Generally, yield-related parameters of crop plants are administered by several genes and are strongly influenced by external stimuli. Pyramiding of favorable alleles might be helpful for continued improvement in soybean. The developed molecular marker will be useful for marker-assisted breeding in soybean which can be used in combination with other molecular markers.</p>
</sec>
</sec>
<sec id="S5" sec-type="conclusion">
<title>Conclusion</title>
<p>Eighty-one <italic>CPP</italic> genes were studied in this research, and on the basis of phylogenetic analyses, all genes were divided into six sub-groups. The amino acid residues of <italic>G. max</italic> and <italic>G. soja</italic> demonstrated less conservation in web logos. Introns are present in <italic>GmCPP</italic> genes, and the pattern of protein motif distribution is less consistent across all proteins. Growth regulator <italic>cis</italic>-acting elements were found in the upstream regions of <italic>GmCPP</italic>, indicating their role in plant growth and development. Gene duplication and synteny analysis revealed that the <italic>GmCPP</italic> genes have undergone segmental and whole genome duplication during evolution, resulting in a significant expansion of the <italic>GmCPP</italic>. The current study also delivers molecular marker associated with higher thousand seed weight in soybean. These findings lay the groundwork for further research into the roles of <italic>GmCPP</italic> genes in soybean growth, development, and response to external stimuli.</p>
</sec>
<sec id="S6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number (s) can be found in the article/<xref ref-type="supplementary-material" rid="FS1">Supplementary material</xref>.</p>
</sec>
<sec id="S7">
<title>Author contributions</title>
<p>TN, MT, SI, and SU conceived the idea. TN performed the experiment, analyzed the data, and wrote the original draft of the manuscript. MT, SI, MS, GQ, ZK, ZZ, and ZG guided in the execution of field and laboratory experiments. ZK, ZZ, and ZG assisted in the development of molecular markers. MT, SI, AB, and SU reviewed the manuscript. All authors contributed to the article and approved the submitted version.</p>
</sec>
</body>
<back>
<sec id="S8" sec-type="funding-information">
<title>Funding</title>
<p>This research was funded by the Key Technology and Science Promotion Cooperation Base on Whole Industrial Chain of Bean, Project number: GHJD-2020025 and Punjab Agricultural Research Board, Project number: PARB-830.</p>
</sec>
<ack>
<p>The authors are grateful to the Director of University Farms Abdul Ghaffar and Deputy Director University Farms, Mr. Mahmood Alam Khan of MNSUAM for providing the facility to conduct field research. The authors are also grateful to the Graduate Resource Center of MNSUAM for providing training on the construction of high-resolution images for publication.</p>
</ack>
<sec id="S9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="S10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="S11" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpls.2022.996265/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpls.2022.996265/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Image_1.JPEG" id="FS1" mimetype="image/jpeg" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Supplementary Figure 1</label>
<caption><p>Showing conserved domain in all <italic>GmCPP</italic> genes. Conserved domain in <italic>GmCPP</italic> genes. Yellow color shows TCR family domains.</p></caption>
</supplementary-material>
<supplementary-material xlink:href="Data_Sheet_1.xlsx" id="TS1" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Supplementary Table 1</label>
<caption><p>Transcription factor ID, taxonomic ID and predict sub-cellular localization.</p></caption>
</supplementary-material>
<supplementary-material xlink:href="Data_Sheet_1.xlsx" id="TS2" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Supplementary Table 2</label>
<caption><p><italic>cis</italic>-acting elements related to plant development, stress, and light responses.</p></caption>
</supplementary-material>
<supplementary-material xlink:href="Data_Sheet_1.xlsx" id="TS3" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Supplementary Table 3</label>
<caption><p>Gene duplication, collinearity/synteny, and <italic>Ka/Ks</italic> values.</p></caption>
</supplementary-material>
<supplementary-material xlink:href="Data_Sheet_1.xlsx" id="TS4" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Supplementary Table 4</label>
<caption><p>Biophysical properties of GmCPP.</p></caption>
</supplementary-material>
</sec>
<fn-group>
<fn id="footnote1">
<label>1</label>
<p><ext-link ext-link-type="uri" xlink:href="http://planttfdb.gao-lab.org/link.php">http://planttfdb.gao-lab.org/link.php</ext-link></p></fn>
<fn id="footnote2">
<label>2</label>
<p><ext-link ext-link-type="uri" xlink:href="http://www.ebi.ac.uk/InterProScan/">http://www.ebi.ac.uk/InterProScan/</ext-link></p></fn>
<fn id="footnote3">
<label>3</label>
<p><ext-link ext-link-type="uri" xlink:href="http://smart.embl-heidelberg.de/">http://smart.embl-heidelberg.de/</ext-link></p></fn>
<fn id="footnote4">
<label>4</label>
<p><ext-link ext-link-type="uri" xlink:href="http://us.expasy.org/tools/protparam.html">http://us.expasy.org/tools/protparam.html</ext-link></p></fn>
<fn id="footnote5">
<label>5</label>
<p><ext-link ext-link-type="uri" xlink:href="http://www.softberry.com/">http://www.softberry.com/</ext-link></p></fn>
<fn id="footnote6">
<label>6</label>
<p><ext-link ext-link-type="uri" xlink:href="https://mybiosoftware.com/cello-v-2-5-subcellular-localization-predictor.html">https://mybiosoftware.com/cello-v-2-5-subcellular-localization-predictor.html</ext-link></p></fn>
<fn id="footnote7">
<label>7</label>
<p><ext-link ext-link-type="uri" xlink:href="https://weblogo.berkeley.edu/logo.cgi">https://weblogo.berkeley.edu/logo.cgi</ext-link></p></fn>
<fn id="footnote8">
<label>8</label>
<p><ext-link ext-link-type="uri" xlink:href="http://gsds.gao-lab.org/">http://gsds.gao-lab.org/</ext-link></p></fn>
<fn id="footnote9">
<label>9</label>
<p><ext-link ext-link-type="uri" xlink:href="http://memesuite.org">http://memesuite.org</ext-link></p></fn>
<fn id="footnote10">
<label>10</label>
<p><ext-link ext-link-type="uri" xlink:href="https://bar.utoronto.ca/eplant/">https://bar.utoronto.ca/eplant/</ext-link></p></fn>
<fn id="footnote11">
<label>11</label>
<p><ext-link ext-link-type="uri" xlink:href="http://www.lgcgenomics.com">http://www.lgcgenomics.com</ext-link></p></fn>
<fn id="footnote12">
<label>12</label>
<p><ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Structure/cdd/wrpsb.cgi">https://www.ncbi.nlm.nih.gov/Structure/cdd/wrpsb.cgi</ext-link></p></fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Andersen</surname> <given-names>S. U.</given-names></name> <name><surname>Algreen-Petersen</surname> <given-names>R. G.</given-names></name> <name><surname>Hoedl</surname> <given-names>M.</given-names></name> <name><surname>Jurkiewicz</surname> <given-names>A.</given-names></name> <name><surname>Cvitanich</surname> <given-names>C.</given-names></name> <name><surname>Braunschweig</surname> <given-names>U.</given-names></name><etal/></person-group> (<year>2007</year>). <article-title>The conserved cysteine-rich domain of a tesmin/TSO1-like protein binds zinc in vitro and TSO1 is required for both male and female fertility in <italic>Arabidopsis thaliana</italic>.</article-title> <source><italic>J. Exp. Bot.</italic></source> <volume>58</volume> <fpage>3657</fpage>&#x2013;<lpage>70</lpage>. <pub-id pub-id-type="doi">10.1093/jxb/erm215</pub-id> <pub-id pub-id-type="pmid">18057042</pub-id></citation></ref>
<ref id="B2"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Barakat</surname> <given-names>A.</given-names></name> <name><surname>Bagniewska-Zadworna</surname> <given-names>A.</given-names></name> <name><surname>Choi</surname> <given-names>A.</given-names></name> <name><surname>Plakkat</surname> <given-names>U.</given-names></name> <name><surname>DiLoreto</surname> <given-names>D. S.</given-names></name> <name><surname>Yellanki</surname> <given-names>P.</given-names></name><etal/></person-group> (<year>2009</year>). <article-title>The cinnamyl alcohol dehydrogenase gene family in Populus: Phylogeny, organization, and expression.</article-title> <source><italic>BMC Plant Biol.</italic></source> <volume>9</volume>:<issue>26</issue>. <pub-id pub-id-type="doi">10.1186/1471-2229-9-26</pub-id> <pub-id pub-id-type="pmid">19267902</pub-id></citation></ref>
<ref id="B3"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Baumberger</surname> <given-names>N.</given-names></name> <name><surname>Doesseger</surname> <given-names>B.</given-names></name> <name><surname>Guyot</surname> <given-names>R.</given-names></name> <name><surname>Diet</surname> <given-names>A.</given-names></name> <name><surname>Parsons</surname> <given-names>R. L.</given-names></name> <name><surname>Clark</surname> <given-names>M. A.</given-names></name><etal/></person-group> (<year>2003</year>). <article-title>Whole-genome comparison of leucine-rich repeat extensins in <italic>Arabidopsis</italic> and rice. A conserved family of cell wall proteins form a vegetative and a reproductive clade.</article-title> <source><italic>Plant Physiol.</italic></source> <volume>131</volume> <fpage>1313</fpage>&#x2013;<lpage>1326</lpage>. <pub-id pub-id-type="doi">10.1104/pp.102.014928</pub-id> <pub-id pub-id-type="pmid">12644681</pub-id></citation></ref>
<ref id="B4"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cannon</surname> <given-names>S. B.</given-names></name> <name><surname>Mitra</surname> <given-names>A.</given-names></name> <name><surname>Baumgarten</surname> <given-names>A.</given-names></name> <name><surname>Young</surname> <given-names>N. D.</given-names></name> <name><surname>May</surname> <given-names>G.</given-names></name></person-group> (<year>2004</year>). <article-title>The roles of segmental and tandem gene duplication in the evolution of large gene families in <italic>Arabidopsis thaliana</italic>.</article-title> <source><italic>BMC Plant Biol.</italic></source> <volume>4</volume>:<issue>10</issue>. <pub-id pub-id-type="doi">10.1186/1471-2229-4-10</pub-id> <pub-id pub-id-type="pmid">15171794</pub-id></citation></ref>
<ref id="B5"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chothia</surname> <given-names>C.</given-names></name> <name><surname>Gough</surname> <given-names>J.</given-names></name> <name><surname>Vogel</surname> <given-names>C.</given-names></name> <name><surname>Teichmann</surname> <given-names>S. A.</given-names></name></person-group> (<year>2003</year>). <article-title>Evolution of the protein repertoire.</article-title> <source><italic>Science</italic></source> <volume>300</volume> <fpage>1701</fpage>&#x2013;<lpage>1703</lpage>.</citation></ref>
<ref id="B6"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Conant</surname> <given-names>G. C.</given-names></name> <name><surname>Wolfe</surname> <given-names>K. H.</given-names></name></person-group> (<year>2008</year>). <article-title>Turning a hobby into a job: How duplicated genes find new functions.</article-title> <source><italic>Nat. Rev. Genet.</italic></source> <volume>9</volume> <fpage>938</fpage>&#x2013;<lpage>950</lpage>. <pub-id pub-id-type="doi">10.1038/nrg2482</pub-id> <pub-id pub-id-type="pmid">19015656</pub-id></citation></ref>
<ref id="B7"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cvitanich</surname> <given-names>C.</given-names></name> <name><surname>Pallisgaard</surname> <given-names>N.</given-names></name> <name><surname>Nielsen</surname> <given-names>K. A.</given-names></name> <name><surname>Hansen</surname> <given-names>A. C.</given-names></name> <name><surname>Larsen</surname> <given-names>K.</given-names></name> <name><surname>Pihakaski-Maunsbach</surname> <given-names>K.</given-names></name><etal/></person-group> (<year>2000</year>). <article-title>CPP1, a DNA-binding protein involved in the expression of a soybean leghemoglobin c3 gene.</article-title> <source><italic>Proc. Natl. Acad. Sci. U.S.A.</italic></source> <volume>97</volume> <fpage>8163</fpage>&#x2013;<lpage>8168</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.090468497</pub-id> <pub-id pub-id-type="pmid">10859345</pub-id></citation></ref>
<ref id="B8"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fankhauser</surname> <given-names>C.</given-names></name> <name><surname>Chory</surname> <given-names>J.</given-names></name></person-group> (<year>1997</year>). <article-title>Light control of plant development.</article-title> <source><italic>Annu. Rev. Cell Dev. Biol.</italic></source> <volume>13</volume> <fpage>203</fpage>&#x2013;<lpage>229</lpage>.</citation></ref>
<ref id="B9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fehr</surname> <given-names>W. R.</given-names></name> <name><surname>Caviness</surname> <given-names>C. E.</given-names></name></person-group> (<year>1977</year>). <article-title>Stages of soybean development.</article-title> <source><italic>Ames</italic></source> <volume>80</volume>:<issue>11</issue>.</citation></ref>
<ref id="B10"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Green</surname> <given-names>P. J.</given-names></name> <name><surname>Kay</surname> <given-names>S. A.</given-names></name> <name><surname>Chua</surname> <given-names>N. H.</given-names></name></person-group> (<year>1987</year>). <article-title>Sequence-specific interactions of a pea nuclear factor with light responsive elements upstream of the rbcS-3A gene.</article-title> <source><italic>EMBO J.</italic></source> <volume>6</volume> <fpage>2543</fpage>&#x2013;<lpage>2549</lpage>.</citation></ref>
<ref id="B11"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hauser</surname> <given-names>B. A.</given-names></name> <name><surname>He</surname> <given-names>J. Q.</given-names></name> <name><surname>Park</surname> <given-names>S. O.</given-names></name> <name><surname>Gasser</surname> <given-names>C. S.</given-names></name></person-group> (<year>2000</year>). <article-title>TSO1 is a novel protein that modulates cytokinesis and cell expansion in <italic>Arabidopsis</italic>.</article-title> <source><italic>Development</italic></source> <volume>127</volume> <fpage>2219</fpage>&#x2013;<lpage>2226</lpage>. <pub-id pub-id-type="doi">10.1242/dev.127.10.2219</pub-id> <pub-id pub-id-type="pmid">10769245</pub-id></citation></ref>
<ref id="B12"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hauser</surname> <given-names>B. A.</given-names></name> <name><surname>Villanueva</surname> <given-names>J. M.</given-names></name> <name><surname>Gasser</surname> <given-names>C. S.</given-names></name></person-group> (<year>1998</year>). <article-title><italic>Arabidopsis TSO1</italic> regulates directional processes in cells during floral organogenesis.</article-title> <source><italic>Genetics</italic></source> <volume>150</volume> <fpage>411</fpage>&#x2013;<lpage>423</lpage>. <pub-id pub-id-type="doi">10.1093/genetics/150.1.411</pub-id> <pub-id pub-id-type="pmid">9725857</pub-id></citation></ref>
<ref id="B13"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>He</surname> <given-names>H.</given-names></name> <name><surname>Dong</surname> <given-names>Q.</given-names></name> <name><surname>Shao</surname> <given-names>Y.</given-names></name> <name><surname>Jiang</surname> <given-names>H.</given-names></name> <name><surname>Zhu</surname> <given-names>S.</given-names></name> <name><surname>Cheng</surname> <given-names>B.</given-names></name><etal/></person-group> (<year>2012</year>). <article-title>Genome-wide survey and characterization of the WRKY gene family in <italic>Populus trichocarpa</italic>.</article-title> <source><italic>Plant Cell Rep.</italic></source> <volume>31</volume> <fpage>1199</fpage>&#x2013;<lpage>1217</lpage>. <pub-id pub-id-type="doi">10.1007/s00299-012-1241-0</pub-id> <pub-id pub-id-type="pmid">22371255</pub-id></citation></ref>
<ref id="B14"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Irshad</surname> <given-names>A.</given-names></name> <name><surname>Guo</surname> <given-names>H.</given-names></name> <name><surname>Rehman</surname> <given-names>S. U.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Gu</surname> <given-names>Y.</given-names></name> <name><surname>Xiong</surname> <given-names>H.</given-names></name><etal/></person-group> (<year>2021</year>). <article-title>Identification of single nucleotide polymorphism in TaSBEIII and development of KASP Marker associated with grain weight in wheat.</article-title> <source><italic>Front. Genet.</italic></source> <volume>12</volume>:<issue>697294</issue>. <pub-id pub-id-type="doi">10.3389/fgene.2021.697294</pub-id> <pub-id pub-id-type="pmid">34306037</pub-id></citation></ref>
<ref id="B15"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Irshad</surname> <given-names>A.</given-names></name> <name><surname>Guo</surname> <given-names>H.</given-names></name> <name><surname>Zhang</surname> <given-names>S.</given-names></name> <name><surname>Gu</surname> <given-names>J.</given-names></name> <name><surname>Zhao</surname> <given-names>L.</given-names></name> <name><surname>Xie</surname> <given-names>Y.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>EcoTILLING reveals natural allelic variations in starch synthesis key gene TaSSIV and its haplotypes associated with higher thousand grain weight.</article-title> <source><italic>Genes</italic></source> <volume>10</volume>:<issue>307</issue>. <pub-id pub-id-type="doi">10.3390/genes10040307</pub-id> <pub-id pub-id-type="pmid">31003564</pub-id></citation></ref>
<ref id="B16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Krzywinski</surname> <given-names>M.</given-names></name> <name><surname>Schein</surname> <given-names>J.</given-names></name> <name><surname>Birol</surname> <given-names>I.</given-names></name> <name><surname>Connors</surname> <given-names>J.</given-names></name> <name><surname>Gascoyne</surname> <given-names>R.</given-names></name> <name><surname>Horsman</surname> <given-names>D.</given-names></name><etal/></person-group> (<year>2009</year>). <article-title>Circos: An information aesthetic for comparative genomics.</article-title> <source><italic>Genome Res.</italic></source> <volume>19</volume> <fpage>1639</fpage>&#x2013;<lpage>1645</lpage>. <pub-id pub-id-type="doi">10.1101/gr.092759.109</pub-id> <pub-id pub-id-type="pmid">19541911</pub-id></citation></ref>
<ref id="B17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kumar</surname> <given-names>S.</given-names></name> <name><surname>Stecher</surname> <given-names>G.</given-names></name> <name><surname>Li</surname> <given-names>M.</given-names></name> <name><surname>Knyaz</surname> <given-names>C.</given-names></name> <name><surname>Tamura</surname> <given-names>K.</given-names></name></person-group> (<year>2018</year>). <article-title>MEGA X molecular evolutionary genetics analysis across computing platforms.</article-title> <source><italic>Mol. Biol. Evol.</italic></source> <volume>35</volume>:<issue>1547</issue>. <pub-id pub-id-type="doi">10.1093/molbev/msy096</pub-id> <pub-id pub-id-type="pmid">29722887</pub-id></citation></ref>
<ref id="B18"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lecharny</surname> <given-names>A.</given-names></name> <name><surname>Boudet</surname> <given-names>N.</given-names></name> <name><surname>Gy</surname> <given-names>I.</given-names></name> <name><surname>Aubourg</surname> <given-names>S.</given-names></name> <name><surname>Kreis</surname> <given-names>M.</given-names></name></person-group> (<year>2003</year>). <article-title>Introns in, introns out in plant gene families: A genomic approach of the dynamics of gene structure.</article-title> <source><italic>J. Struct. Funct. Genom.</italic></source> <volume>3</volume> <fpage>111</fpage>&#x2013;<lpage>116</lpage>. <pub-id pub-id-type="pmid">12836690</pub-id></citation></ref>
<ref id="B19"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lescot</surname> <given-names>M.</given-names></name> <name><surname>Dehais</surname> <given-names>P.</given-names></name> <name><surname>Thijis</surname> <given-names>G.</given-names></name> <name><surname>Marchal</surname> <given-names>K.</given-names></name> <name><surname>Moreau</surname> <given-names>Y.</given-names></name> <name><surname>Van de Peer</surname> <given-names>Y.</given-names></name><etal/></person-group> (<year>2002</year>). <article-title>PlantCare a database of plant cis-acting regulatory elements and a portal to tools for in silico analysis of promoter sequences.</article-title> <source><italic>Nucleic Acid Res.</italic></source> <volume>30</volume> <fpage>325</fpage>&#x2013;<lpage>327</lpage>. <pub-id pub-id-type="doi">10.1093/nar/30.1.325</pub-id> <pub-id pub-id-type="pmid">11752327</pub-id></citation></ref>
<ref id="B20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Y. H.</given-names></name> <name><surname>Zhou</surname> <given-names>G.</given-names></name> <name><surname>Ma</surname> <given-names>J.</given-names></name> <name><surname>Jiang</surname> <given-names>W.</given-names></name> <name><surname>Jin</surname> <given-names>L. G.</given-names></name> <name><surname>Zhang</surname> <given-names>Z.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>De novo assembly of soybean wild relatives for pan-genome analysis of diversity and agronomic traits.</article-title> <source><italic>Nat. Biotechnol.</italic></source> <volume>32</volume> <fpage>1045</fpage>&#x2013;<lpage>1052</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.2979</pub-id> <pub-id pub-id-type="pmid">25218520</pub-id></citation></ref>
<ref id="B21"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lu</surname> <given-names>T.</given-names></name> <name><surname>Dou</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>C.</given-names></name></person-group> (<year>2013</year>). <article-title>Fuzzy clustering of CPP family in plants with evolution and interaction analyses.</article-title> <source><italic>BMC Bioinform.</italic></source> <volume>14</volume>:<issue>S10</issue>. <pub-id pub-id-type="doi">10.1186/1471-2105-14-S13-S10</pub-id> <pub-id pub-id-type="pmid">24268301</pub-id></citation></ref>
<ref id="B22"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Majeed</surname> <given-names>U.</given-names></name> <name><surname>Darwish</surname> <given-names>E.</given-names></name> <name><surname>Rehman</surname> <given-names>S. U.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name></person-group> (<year>2018</year>). <article-title>Kompetitive allele specific PCR (KASP) a singleplex genotyping platform and its application.</article-title> <source><italic>J. Agric. Sci.</italic></source> <volume>11</volume> <fpage>11</fpage>&#x2013;<lpage>20</lpage>.</citation></ref>
<ref id="B23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nan</surname> <given-names>H.</given-names></name> <name><surname>Lin</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Gao</surname> <given-names>L.</given-names></name></person-group> (<year>2021</year>). <article-title>Comprehensive genomic analysis and expression profiling of Cysteine-rich Polycomb-like transcription factor gene family in tea tree.</article-title> <source><italic>Hortic. Plant J.</italic></source> <volume>7</volume> <fpage>469</fpage>&#x2013;<lpage>478</lpage>.</citation></ref>
<ref id="B24"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Neelam</surname> <given-names>K.</given-names></name> <name><surname>Brown-Guedira</surname> <given-names>G.</given-names></name> <name><surname>Huang</surname> <given-names>L.</given-names></name></person-group> (<year>2013</year>). <article-title>Development and validation of a breeder-friendly KASPar marker for wheat leaf rust resistance locus Lr21.</article-title> <source><italic>Mol. Breed.</italic></source> <volume>31</volume> <fpage>233</fpage>&#x2013;<lpage>237</lpage>.</citation></ref>
<ref id="B25"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Qanmber</surname> <given-names>G.</given-names></name> <name><surname>Liu</surname> <given-names>J.</given-names></name> <name><surname>Yu</surname> <given-names>D.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Lu</surname> <given-names>L.</given-names></name> <name><surname>Mo</surname> <given-names>H.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>Genome-wide identification and characterization of the PERK gene family in <italic>Gossypium hirsutum</italic> reveals gene duplication and functional divergence.</article-title> <source><italic>Int. J. Mol. Sci.</italic></source> <volume>20</volume>:<issue>1750</issue>. <pub-id pub-id-type="doi">10.3390/ijms20071750</pub-id> <pub-id pub-id-type="pmid">30970629</pub-id></citation></ref>
<ref id="B26"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ramsey</surname> <given-names>J.</given-names></name> <name><surname>Schemske</surname> <given-names>D. W.</given-names></name></person-group> (<year>1998</year>). <article-title>Pathways, mechanisms, and rates of polyploid formation in flowering plants.</article-title> <source><italic>Annu. Rev. Ecol. Evol. Syst.</italic></source> <volume>29</volume> <fpage>467</fpage>&#x2013;<lpage>501</lpage>.</citation></ref>
<ref id="B27"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rasheed</surname> <given-names>A.</given-names></name> <name><surname>Hao</surname> <given-names>Y.</given-names></name> <name><surname>Xia</surname> <given-names>X.</given-names></name> <name><surname>Khan</surname> <given-names>A.</given-names></name> <name><surname>Xu</surname> <given-names>Y.</given-names></name> <name><surname>Varshney</surname> <given-names>R. K.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Crop breeding chips and genotyping platforms: Progress, challenges, and perspectives.</article-title> <source><italic>Mol. Plant</italic></source> <volume>10</volume> <fpage>1047</fpage>&#x2013;<lpage>1064</lpage>. <pub-id pub-id-type="doi">10.1016/j.molp.2017.06.008</pub-id> <pub-id pub-id-type="pmid">28669791</pub-id></citation></ref>
<ref id="B28"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rasheed</surname> <given-names>A.</given-names></name> <name><surname>Weie</surname> <given-names>W.</given-names></name> <name><surname>Fengmei</surname> <given-names>G.</given-names></name> <name><surname>Shengnan</surname> <given-names>Z.</given-names></name> <name><surname>Hui</surname> <given-names>J.</given-names></name> <name><surname>Jindong</surname> <given-names>L.</given-names></name><etal/></person-group> (<year>2016</year>). <article-title>Development and validation of KASP assays for genes underpinning key economic traits in bread wheat.</article-title> <source><italic>Theor. Appl. Genet.</italic></source> <volume>10</volume> <fpage>1843</fpage>&#x2013;<lpage>1860</lpage>. <pub-id pub-id-type="doi">10.1007/s00122-016-2743-x</pub-id> <pub-id pub-id-type="pmid">27306516</pub-id></citation></ref>
<ref id="B29"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Riechmann</surname> <given-names>J. L.</given-names></name> <name><surname>Heard</surname> <given-names>J.</given-names></name> <name><surname>Martin</surname> <given-names>G.</given-names></name> <name><surname>Reuber</surname> <given-names>L.</given-names></name> <name><surname>Jiang</surname> <given-names>C. Z.</given-names></name> <name><surname>Keddie</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2000</year>). <article-title><italic>Arabidopsis</italic> transcription factors: Genome-wide comparative analysis among eukaryotes.</article-title> <source><italic>Science</italic></source> <volume>290</volume> <fpage>2105</fpage>&#x2013;<lpage>2110</lpage>. <pub-id pub-id-type="doi">10.1126/science.290.5499.2105</pub-id> <pub-id pub-id-type="pmid">11118137</pub-id></citation></ref>
<ref id="B30"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rogozin</surname> <given-names>I. B.</given-names></name> <name><surname>Wolf</surname> <given-names>Y. I.</given-names></name> <name><surname>Sorokin</surname> <given-names>A. V.</given-names></name> <name><surname>Mirkin</surname> <given-names>B. G.</given-names></name> <name><surname>Koonin</surname> <given-names>E. V.</given-names></name></person-group> (<year>2003</year>). <article-title>Remarkable interkingdom conservation of intron positions and massive, lineage-specific intron loss and gain in eukaryotic evolution.</article-title> <source><italic>Curr. Biol.</italic></source> <volume>13</volume> <fpage>1512</fpage>&#x2013;<lpage>1517</lpage>. <pub-id pub-id-type="doi">10.1016/s0960-9822(03)00558-x</pub-id> <pub-id pub-id-type="pmid">12956953</pub-id></citation></ref>
<ref id="B31"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Roy</surname> <given-names>S. W.</given-names></name> <name><surname>Penny</surname> <given-names>D.</given-names></name></person-group> (<year>2007</year>). <article-title>Patterns of intron loss and gain in plants: Intron loss&#x2013;dominated evolution and genome-wide comparison of <italic>O. sativa</italic> and <italic>A. thaliana</italic>.</article-title> <source><italic>Mol. Biol. Evol.</italic></source> <volume>24</volume> <fpage>171</fpage>&#x2013;<lpage>181</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msl159</pub-id> <pub-id pub-id-type="pmid">17065597</pub-id></citation></ref>
<ref id="B32"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schauser</surname> <given-names>L.</given-names></name> <name><surname>Wieloch</surname> <given-names>W.</given-names></name> <name><surname>Stougaard</surname> <given-names>J.</given-names></name></person-group> (<year>2005</year>). <article-title>Evolution of NIN-like proteins in <italic>Arabidopsis</italic>, rice, and <italic>Lotus japonicus</italic>.</article-title> <source><italic>J. Mol. Evol.</italic></source> <volume>60</volume> <fpage>229</fpage>&#x2013;<lpage>237</lpage>.</citation></ref>
<ref id="B33"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schmutz</surname> <given-names>J.</given-names></name> <name><surname>Cannon</surname> <given-names>S. B.</given-names></name> <name><surname>Schlueter</surname> <given-names>J.</given-names></name> <name><surname>Ma</surname> <given-names>J.</given-names></name> <name><surname>Mitros</surname> <given-names>T.</given-names></name> <name><surname>Nelson</surname> <given-names>W.</given-names></name><etal/></person-group> (<year>2010</year>). <article-title>Genome sequence of the paleopolyploid soybean.</article-title> <source><italic>Nature</italic></source> <volume>463</volume> <fpage>178</fpage>&#x2013;<lpage>183</lpage>.</citation></ref>
<ref id="B34"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Severin</surname> <given-names>A. J.</given-names></name> <name><surname>Woody</surname> <given-names>J. L.</given-names></name> <name><surname>Bolo</surname> <given-names>Y. T.</given-names></name> <name><surname>Joseph</surname> <given-names>B.</given-names></name> <name><surname>Diers</surname> <given-names>B. W.</given-names></name> <name><surname>Farmer</surname> <given-names>A. D.</given-names></name><etal/></person-group> (<year>2010</year>). <article-title>RNA-Seq Atlas of <italic>Glycine max</italic>: A guide to soybean transcriptome.</article-title> <source><italic>BMC Plant Biol.</italic></source> <volume>10</volume>:<issue>160</issue>. <pub-id pub-id-type="doi">10.1186/1471-2229-10-160</pub-id> <pub-id pub-id-type="pmid">20687943</pub-id></citation></ref>
<ref id="B35"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sijacic</surname> <given-names>P.</given-names></name> <name><surname>Wang</surname> <given-names>W.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name></person-group> (<year>2011</year>). <article-title>Recessive antimorphic alleles overcome functionally redundant loci to reveal TSO1 function in <italic>Arabidopsis</italic> flowers and meristems.</article-title> <source><italic>PLoS Genet.</italic></source> <volume>7</volume>:<issue>e1002352</issue>. <pub-id pub-id-type="doi">10.1371/journal.pgen.1002352</pub-id> <pub-id pub-id-type="pmid">22072982</pub-id></citation></ref>
<ref id="B36"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Song</surname> <given-names>J. Y.</given-names></name> <name><surname>Leung</surname> <given-names>T.</given-names></name> <name><surname>Ehler</surname> <given-names>L. K.</given-names></name> <name><surname>Wang</surname> <given-names>C. Z.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name></person-group> (<year>2000</year>). <article-title>Regulation of meristem organization and cell division by TSO1, an <italic>Arabidopsis</italic> gene with cysteine-rich repeats.</article-title> <source><italic>Development</italic></source> <volume>127</volume> <fpage>2207</fpage>&#x2013;<lpage>2217</lpage>. <pub-id pub-id-type="doi">10.1242/dev.127.10.2207</pub-id> <pub-id pub-id-type="pmid">10769244</pub-id></citation></ref>
<ref id="B37"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Song</surname> <given-names>L.</given-names></name> <name><surname>Nguyen</surname> <given-names>N.</given-names></name> <name><surname>Deshmukh</surname> <given-names>R. K.</given-names></name> <name><surname>Patil</surname> <given-names>G. B.</given-names></name> <name><surname>Prince</surname> <given-names>S. J.</given-names></name> <name><surname>Valliyodan</surname> <given-names>B.</given-names></name><etal/></person-group> (<year>2016a</year>). <article-title>Soybean TIP gene family analysis and characterization of GmTIP1; 5 and GmTIP2; 5 water transport activity.</article-title> <source><italic>Front. Plant Sci.</italic></source> <volume>7</volume>:<issue>1564</issue>. <pub-id pub-id-type="doi">10.3389/fpls.2016.01564</pub-id> <pub-id pub-id-type="pmid">27818669</pub-id></citation></ref>
<ref id="B38"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Song</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Wu</surname> <given-names>F.</given-names></name> <name><surname>Zhang</surname> <given-names>L.</given-names></name></person-group> (<year>2016b</year>). <article-title>Genome-wide analysis of the maize (<italic>Zea may</italic> L.) CPP-like gene family and expression profiling under abiotic stress.</article-title> <source><italic>Genet. Mol. Res.</italic></source> <volume>15</volume>:<comment>gmr.15038023</comment>. <pub-id pub-id-type="doi">10.4238/gmr.15038023</pub-id> <pub-id pub-id-type="pmid">27525875</pub-id></citation></ref>
<ref id="B39"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Suyama</surname> <given-names>M.</given-names></name> <name><surname>Torrents</surname> <given-names>D.</given-names></name> <name><surname>Bork</surname> <given-names>P.</given-names></name></person-group> (<year>2006</year>). <article-title>PAL2NAL: Robust conversion of protein sequence alignments into the corresponding codon alignments.</article-title> <source><italic>Nucleic Acid. Res.</italic></source> <volume>2006</volume>:<fpage>W609</fpage>&#x2013;<lpage>W612</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkl315</pub-id> <pub-id pub-id-type="pmid">16845082</pub-id></citation></ref>
<ref id="B40"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tamura</surname> <given-names>K.</given-names></name> <name><surname>Peterson</surname> <given-names>D.</given-names></name> <name><surname>Peterson</surname> <given-names>N.</given-names></name> <name><surname>Stecher</surname> <given-names>G.</given-names></name> <name><surname>Nei</surname> <given-names>M.</given-names></name> <name><surname>Kumar</surname> <given-names>S.</given-names></name></person-group> (<year>2011</year>). <article-title>MEGA5: Molecular evolutionary genetics analysis using maximum likelihood, evolutionary distance, and maximum parsimony methods.</article-title> <source><italic>Mol. Biol. Evol.</italic></source> <volume>28</volume> <fpage>2731</fpage>&#x2013;<lpage>2739</lpage>.</citation></ref>
<ref id="B41"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ur Rehman</surname> <given-names>S.</given-names></name> <name><surname>Sher</surname> <given-names>M. A.</given-names></name> <name><surname>Saddique</surname> <given-names>M. A. B.</given-names></name> <name><surname>Ali</surname> <given-names>Z.</given-names></name> <name><surname>Khan</surname> <given-names>M. A.</given-names></name> <name><surname>Mao</surname> <given-names>X.</given-names></name><etal/></person-group> (<year>2021</year>). <article-title>Development and exploitation of KASP assays for genes underpinning drought tolerance among wheat cultivars from Pakistan.</article-title> <source><italic>Front. Genet.</italic></source> <volume>12</volume>:<issue>684702</issue>. <pub-id pub-id-type="doi">10.3389/fgene.2021.684702</pub-id> <pub-id pub-id-type="pmid">34178041</pub-id></citation></ref>
<ref id="B42"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ur Rehman</surname> <given-names>S.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Chang</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Mao</surname> <given-names>X.</given-names></name> <name><surname>Jing</surname> <given-names>R.</given-names></name></person-group> (<year>2019</year>). <article-title>A wheat protein kinase gene TaSnRK2.9-5A associated with yield contributing traits.</article-title> <source><italic>Theor. Appl. Genet.</italic></source> <volume>132</volume> <fpage>907</fpage>&#x2013;<lpage>919</lpage>. <pub-id pub-id-type="doi">10.1007/s00122-018-3247-7</pub-id> <pub-id pub-id-type="pmid">30519711</pub-id></citation></ref>
<ref id="B43"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>D.</given-names></name> <name><surname>Guo</surname> <given-names>Y.</given-names></name> <name><surname>Wu</surname> <given-names>C.</given-names></name> <name><surname>Yang</surname> <given-names>G.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Zheng</surname> <given-names>C.</given-names></name></person-group> (<year>2008</year>). <article-title>Genome-wide analysis of CCCH zinc finger family in <italic>Arabidopsis</italic> and rice.</article-title> <source><italic>BMC Genom.</italic></source> <volume>9</volume>:<issue>44</issue>. <pub-id pub-id-type="doi">10.1186/1471-2164-9-44</pub-id> <pub-id pub-id-type="pmid">18221561</pub-id></citation></ref>
<ref id="B44"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Zhang</surname> <given-names>H.</given-names></name> <name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Chen</surname> <given-names>Y.</given-names></name> <name><surname>Xu</surname> <given-names>X.</given-names></name> <name><surname>Mao</surname> <given-names>X.</given-names></name><etal/></person-group> (<year>2013</year>). <article-title>Phylogenetic, expression, and bioinformatic analysis of the ABC1 gene family in <italic>Populus trichocarpa</italic>.</article-title> <source><italic>Sci. World J.</italic></source> <volume>2013</volume>:<issue>785070</issue>. <pub-id pub-id-type="doi">10.1155/2013/785070</pub-id> <pub-id pub-id-type="pmid">24163630</pub-id></citation></ref>
<ref id="B45"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>William Roy</surname> <given-names>S.</given-names></name> <name><surname>Gilbert</surname> <given-names>W.</given-names></name></person-group> (<year>2006</year>). <article-title>The evolution of spliceosomal introns: Patterns, puzzles and progress.</article-title> <source><italic>Nat. Rev. Genet.</italic></source> <volume>7</volume> <fpage>211</fpage>&#x2013;<lpage>221</lpage>. <pub-id pub-id-type="doi">10.1038/nrg1807</pub-id> <pub-id pub-id-type="pmid">16485020</pub-id></citation></ref>
<ref id="B46"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wilson</surname> <given-names>R. F.</given-names></name></person-group> (<year>2004</year>). &#x201C;<article-title>Seed composition</article-title>&#x201D;, in <source><italic>Soybeans: Improvement, Production, and Uses</italic></source>, <role>eds</role> <person-group person-group-type="editor"><name><surname>Shibles</surname> <given-names>R. M.</given-names></name> <name><surname>Harper</surname> <given-names>J. E.</given-names></name> <name><surname>Wilson</surname> <given-names>R. F.</given-names></name> <name><surname>Shoemaker</surname> <given-names>R. C.</given-names></name></person-group> (<publisher-loc>Madison, WI</publisher-loc>: <publisher-name>American Society of Agronomy</publisher-name>), <fpage>621</fpage>&#x2013;<lpage>677</lpage>.</citation></ref>
<ref id="B47"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>S.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Yue</surname> <given-names>J. X.</given-names></name> <name><surname>Tian</surname> <given-names>D.</given-names></name> <name><surname>Chen</surname> <given-names>J. Q.</given-names></name></person-group> (<year>2008</year>). <article-title>Recent duplications dominate NBS-encoding gene expansion in two woody species.</article-title> <source><italic>Mol. Genet. Genom.</italic></source> <volume>280</volume> <fpage>187</fpage>&#x2013;<lpage>198</lpage>. <pub-id pub-id-type="doi">10.1007/s00438-008-0355-0</pub-id> <pub-id pub-id-type="pmid">18563445</pub-id></citation></ref>
<ref id="B48"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>Y.</given-names></name> <name><surname>Ahammed</surname> <given-names>G. J.</given-names></name> <name><surname>Wan</surname> <given-names>C.</given-names></name> <name><surname>Liu</surname> <given-names>H. R.</given-names></name> <name><surname>Chen</surname> <given-names>R.</given-names></name> <name><surname>Zhou</surname> <given-names>Y.</given-names></name></person-group> (<year>2019</year>). <article-title>Comprehensive analysis of TIFY transcription factors and their expression profiles under jasmonic acid and abiotic stresses in watermelon.</article-title> <source><italic>Int. J. Genom.</italic></source> <volume>2019</volume>:<issue>6813086</issue>. <pub-id pub-id-type="doi">10.1155/2019/6813086</pub-id> <pub-id pub-id-type="pmid">31662958</pub-id></citation></ref>
<ref id="B49"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>Z. E.</given-names></name> <name><surname>Gong</surname> <given-names>Q.</given-names></name> <name><surname>Qin</surname> <given-names>W. Q.</given-names></name> <name><surname>Yang</surname> <given-names>Z. R.</given-names></name> <name><surname>Cheng</surname> <given-names>Y.</given-names></name> <name><surname>Lu</surname> <given-names>L. L.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>Genome-wide analysis of WOX genes in upland cotton and their expression pattern under different stresses.</article-title> <source><italic>BMC Plant Biol.</italic></source> <volume>17</volume>:<issue>113</issue>. <pub-id pub-id-type="doi">10.1186/s12870-017-1065-8</pub-id> <pub-id pub-id-type="pmid">28683794</pub-id></citation></ref>
<ref id="B50"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>Z.</given-names></name> <name><surname>Gu</surname> <given-names>S.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>W.</given-names></name> <name><surname>Tang</surname> <given-names>Z.</given-names></name> <name><surname>Xu</surname> <given-names>C.</given-names></name></person-group> (<year>2008</year>). <article-title>Molecular evolution of the CPP-like gene family in plants: Insights from comparative genomics of <italic>Arabidopsis</italic> and rice.</article-title> <source><italic>J. Mol. Evol.</italic></source> <volume>67</volume> <fpage>266</fpage>&#x2013;<lpage>277</lpage>. <pub-id pub-id-type="doi">10.1007/s00239-008-9143-z</pub-id> <pub-id pub-id-type="pmid">18696028</pub-id></citation></ref>
<ref id="B51"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>L.</given-names></name> <name><surname>Zhao</surname> <given-names>H. K.</given-names></name> <name><surname>Wang</surname> <given-names>Y. M.</given-names></name> <name><surname>Yuan</surname> <given-names>C. P.</given-names></name> <name><surname>Zhang</surname> <given-names>Y. Y.</given-names></name> <name><surname>Li</surname> <given-names>H. Y.</given-names></name><etal/></person-group> (<year>2015</year>). <article-title>Genome-wide identification and expression analysis of the CPP-like gene family in soybean.</article-title> <source><italic>Genet. Mol. Res.</italic></source> <volume>14</volume> <fpage>1260</fpage>&#x2013;<lpage>1268</lpage>. <pub-id pub-id-type="doi">10.4238/2015.February.13.4</pub-id> <pub-id pub-id-type="pmid">25730064</pub-id></citation></ref>
<ref id="B52"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>Y.</given-names></name> <name><surname>Hu</surname> <given-names>L.</given-names></name> <name><surname>Ye</surname> <given-names>S.</given-names></name> <name><surname>Jiang</surname> <given-names>L.</given-names></name> <name><surname>Liu</surname> <given-names>S.</given-names></name></person-group> (<year>2018</year>). <article-title>Genome-wide identification and characterization of cysteine-rich polycomb-like protein (CPP) family genes in cucumber (<italic>Cucumis sativus</italic>) and their roles in stress responses.</article-title> <source><italic>Biologia</italic></source> <volume>73</volume> <fpage>425</fpage>&#x2013;<lpage>435</lpage>.</citation></ref>
<ref id="B53"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>S.</given-names></name> <name><surname>Yang</surname> <given-names>Z.</given-names></name> <name><surname>Yang</surname> <given-names>Y.</given-names></name> <name><surname>Jiang</surname> <given-names>L.</given-names></name> <name><surname>Hu</surname> <given-names>L.</given-names></name></person-group> (<year>2017</year>). <article-title>CsCAT3, a catalase gene from <italic>Cucumis sativus</italic>, confers resistance to a variety of stresses to <italic>Escherichia coli</italic>.</article-title> <source><italic>Biotechnol. Equip.</italic></source> <volume>31</volume> <fpage>886</fpage>&#x2013;<lpage>896</lpage>.</citation></ref>
</ref-list>
</back>
</article>