<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Microbiol.</journal-id>
<journal-title>Frontiers in Microbiology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Microbiol.</abbrev-journal-title>
<issn pub-type="epub">1664-302X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmicb.2016.02081</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Microbiology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Characterization of Uncultured Genome Fragment from Soil Metagenomic Library Exposed Rare Mismatch of Internal Tetranucleotide Frequency</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Liu</surname> <given-names>Yunpeng</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x2020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/344649/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Yang</surname> <given-names>Dongqing</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x2020;</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname> <given-names>Nan</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/180587/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Chen</surname> <given-names>Lin</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/344665/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Cui</surname> <given-names>Zhongli</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/21213/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Shen</surname> <given-names>Qirong</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/294036/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Zhang</surname> <given-names>Ruifu</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/180501/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Key Laboratory of Microbial Resources Collection and Preservation, Ministry of Agriculture, Institute of Agricultural Resources and Regional Planning, Chinese Academy of Agricultural Sciences</institution> <country>Beijing, China</country></aff>
<aff id="aff2"><sup>2</sup><institution>Jiangsu Key Lab and Engineering Center for Solid Organic Waste Utilization, National Engineering Research Center for Organic-Based Fertilizers, Nanjing Agricultural University</institution> <country>Nanjing, China</country></aff>
<aff id="aff3"><sup>3</sup><institution>College of Life Sciences, Nanjing Agricultural University</institution> <country>Nanjing, China</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: <italic>Marina G. Kalyuzhanaya, San Diego State University, USA</italic></p></fn>
<fn fn-type="edited-by"><p>Reviewed by: <italic>Youn-Sig Kwak, Gyeongsang National University, South Korea; Juan Antonio Ugalde, Facultad de Medicina, Clinica Alemana Universidad del Desarrollo, Chile</italic></p></fn>
<fn fn-type="corresp" id="fn001"><p>&#x002A;Correspondence: <italic>Ruifu Zhang, <email>zhangruifu@caas.cn</email></italic></p></fn>
<fn fn-type="other" id="fn002"><p><sup>&#x2020;</sup><italic>These authors have contributed equally to this work.</italic></p></fn>
<fn fn-type="other" id="fn003"><p>This article was submitted to Evolutionary and Genomic Microbiology, a section of the journal Frontiers in Microbiology</p></fn>
</author-notes>
<pub-date pub-type="epub">
<day>22</day>
<month>12</month>
<year>2016</year>
</pub-date>
<pub-date pub-type="collection">
<year>2016</year>
</pub-date>
<volume>7</volume>
<elocation-id>2081</elocation-id>
<history>
<date date-type="received">
<day>25</day>
<month>04</month>
<year>2016</year>
</date>
<date date-type="accepted">
<day>08</day>
<month>12</month>
<year>2016</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2016 Liu, Yang, Zhang, Chen, Cui, Shen and Zhang.</copyright-statement>
<copyright-year>2016</copyright-year>
<copyright-holder>Liu, Yang, Zhang, Chen, Cui, Shen and Zhang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) or licensor are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>Exploring the genomic information of a specific uncultured soil bacterium is vital to understand its function in the ecosystem but is still a challenge due to the lack of culture techniques. To examine the genomes of uncultured bacteria, a metagenomic bacterial artificial chromosome library derived from a soil sample was screened for 16S rDNA-containing clones. Five clones (4C6, 5E7, 5G4, 5G12, and 5H7) containing uncultured soil bacteria genome fragment (with low 16S rDNA similarity to isolated bacteria) were selected for sequencing. Clone 5E7 and 5G4 showed only 82 and 83% of 16S rDNA identity to known sequences. Phylogenetic analysis of 16S rDNA indicated that 5E7 and 5G4 were potentially from new class of Chloroflexi. Only one-third of the 5G4 open reading frames have significant hits against HMMER. Internal tetranucleotide frequency analysis indicated that the unknown region of 5G4 was poorly correlated with other parts of the clone, indicating that this section might be obtained through lateral transfer. It was suggested that this region rich for unknown genes is under fast evolution.</p>
</abstract>
<kwd-group>
<kwd>uncultured soil bacterium</kwd>
<kwd>bacterial artificial chromosome library</kwd>
<kwd>16S rDNA</kwd>
<kwd>genome fragment</kwd>
<kwd>tetranucleotide correlation</kwd>
</kwd-group>
<contract-num rid="cn001">31600088</contract-num>
<contract-num rid="cn002">2016M591297</contract-num>
<contract-sponsor id="cn001">National Natural Science Foundation of China<named-content content-type="fundref-id">10.13039/501100001809</named-content></contract-sponsor>
<contract-sponsor id="cn002">China Postdoctoral Science Foundation<named-content content-type="fundref-id">10.13039/501100002858</named-content></contract-sponsor>
<contract-sponsor id="cn003">National Infrastructure for Microbiological Resources<named-content content-type="fundref-id">10.13039/501100007693</named-content></contract-sponsor>
<counts>
<fig-count count="4"/>
<table-count count="1"/>
<equation-count count="0"/>
<ref-count count="48"/>
<page-count count="9"/>
<word-count count="0"/>
</counts>
</article-meta>
</front>
<body>
<sec><title>Introduction</title>
<p>Soils are dominated by immensely diverse populations of microorganisms that remain largely unexplored (<xref ref-type="bibr" rid="B38">Torsvik and &#x00D8;vre&#x00E5;s</xref>, <xref ref-type="bibr" rid="B38">2002</xref>). It is estimated that more than 99% of the microorganisms present in natural environments are not readily cultivable with known cultivation techniques, and this situation will not change until new culture technologies are developed (<xref ref-type="bibr" rid="B31">Streit and Schmitz, 2004</xref>; <xref ref-type="bibr" rid="B40">Urich et al., 2008</xref>; <xref ref-type="bibr" rid="B46">Yamada and Sekiguchi, 2009</xref>). To overcome the limitations of cultivation techniques, culture-independent strategies, especially novel molecular techniques, have been developed (<xref ref-type="bibr" rid="B14">Liles et al., 2003</xref>; <xref ref-type="bibr" rid="B48">Zhou et al., 2010</xref>; <xref ref-type="bibr" rid="B17">Lundberg et al., 2012</xref>; <xref ref-type="bibr" rid="B12">Lalande et al., 2013</xref>).</p>
<p>Fingerprinting techniques, including denaturing gradient gel electrophoresis (DGGE) and temperature gradient gel electrophoresis (TGGE), have been successfully used in many diversity studies and shown to be powerful methods for detecting uncultured microbes in soil (<xref ref-type="bibr" rid="B12">Lalande et al., 2013</xref>). However, DGGE/TGGE fails to detect minority populations due to inadequate sensitivity. As DGGE/TGGE is a strategy dependent on polymerase chain reaction (PCR), it only shows information of the amplified sequences and limits its function on analyzing unknown sequences. Gene array is a high-throughput metagenomic tool based on DNA hybridization, which is sensitive enough for the analysis of microbial communities and potential gene functions (<xref ref-type="bibr" rid="B48">Zhou et al., 2010</xref>; <xref ref-type="bibr" rid="B39">Tu et al., 2014</xref>). However, gene array approaches target functional genes but not the genome. Moreover, the identification of unknown genes is difficult for gene array approach due to the dependence on probe hybridization, which is hard to do with unknown sequences. In addition, establishing the connection between microbial diversity and physiological functions, that is, who is doing what, constitutes a fundamental problem (<xref ref-type="bibr" rid="B21">Maron et al., 2008</xref>; <xref ref-type="bibr" rid="B27">Prosser, 2015</xref>). <xref ref-type="bibr" rid="B31">Streit and Schmitz (2004)</xref> declared that metagenomics might be the key to investigating uncultured microbes (<xref ref-type="bibr" rid="B41">Vavourakis et al., 2016</xref>). Although the newly developed single-cell sequencing approach provides a method to obtain insights into uncultured microbes efficiently (<xref ref-type="bibr" rid="B28">Rinke et al., 2013</xref>), metagenomic library based method is cost-effective and enables high-throughput identification of organismal communities from small amounts of DNA (<xref ref-type="bibr" rid="B43">Williams et al., 2014</xref>). In addition, it is still an efficient way to isolate novel genes from uncultured soil and marine microbes (<xref ref-type="bibr" rid="B47">Zheng et al., 2013</xref>; <xref ref-type="bibr" rid="B18">Mai et al., 2014</xref>; <xref ref-type="bibr" rid="B26">Peng et al., 2014</xref>). Previous studies have obtained information related to uncultured microbes from different environmental samples by sequencing DNA libraries and investigating heterologous expression (<xref ref-type="bibr" rid="B29">Rondon et al., 2000</xref>; <xref ref-type="bibr" rid="B14">Liles et al., 2003</xref>; <xref ref-type="bibr" rid="B10">Kim et al., 2008</xref>; <xref ref-type="bibr" rid="B1">Albertsen et al., 2013</xref>).</p>
<p>It has been generally agreed that directly cloning large fragments of the genomic DNA from microbes in natural soil provides a strategy for studying the uncultured microbes (<xref ref-type="bibr" rid="B29">Rondon et al., 2000</xref>; <xref ref-type="bibr" rid="B14">Liles et al., 2003</xref>; <xref ref-type="bibr" rid="B22">Massana et al., 2008</xref>; <xref ref-type="bibr" rid="B13">Li et al., 2012</xref>). A bacterial artificial chromosome (BAC) vector with the ability to maintain large DNA fragments stably in <italic>Escherichia coli</italic>, has shown some advantages in metagenomic research (<xref ref-type="bibr" rid="B29">Rondon et al., 2000</xref>; <xref ref-type="bibr" rid="B14">Liles et al., 2003</xref>; <xref ref-type="bibr" rid="B16">Liu et al., 2011</xref>). In a previous study, a metagenomic BAC library derived from microorganisms in red soil was constructed, and the cloning, heterologous expression, and purification of a new endo-&#x03B2;-1,4-glucanase gene, <italic>cel</italic>5G, was achieved (<xref ref-type="bibr" rid="B15">Liu et al., 2010</xref>, <xref ref-type="bibr" rid="B16">2011</xref>).</p>
<p>Red soils spread widely in the southern China, cover about 2.04 million km<sup>2</sup> in tropical and subtropical regions of southern China (<xref ref-type="bibr" rid="B8">Guangming et al., 2003</xref>; <xref ref-type="bibr" rid="B44">Wilson et al., 2004</xref>). Double cropping system of wheat (<italic>Triticum aestivum</italic> L.) and corn (<italic>Zea mays</italic> L.) is dominant in the upland of this agricultural region (<xref ref-type="bibr" rid="B45">Xu et al., 2003</xref>). Due to some unfavorable properties, such as low pH and deficiencies of phosphorus, calcium, and magnesium, the productivity of these soils is generally low. In the current study, based on restriction fragment length polymorphism (RFLP) and 16S rDNA sequencing, we isolated five clones with inserts from uncultured bacteria from the red soil-derived metagenomic BAC library. Sequencing of the BAC inserts provided a glimpse of the genomes of these five uncultured bacteria together with the 16S rDNA and showed a rare mismatch of internal tetranucleotide frequency in a clone.</p>
</sec>
<sec id="s1" sec-type="materials|methods">
<title>Materials and Methods</title>
<sec><title>Metagenomic BAC Library</title>
<p>The metagenomic library containing 3,024 BAC clones was constructed in a previous study (<xref ref-type="bibr" rid="B15">Liu et al., 2010</xref>, <xref ref-type="bibr" rid="B16">2011</xref>). The DNA sample was from red soil collected at the Yingtan Red Soil Ecological Station (28&#x00B0;15&#x2032;20&#x2032;&#x2032; N, 116&#x00B0;55&#x2032;30&#x2032;&#x2032; E) of the Chinese Academy of Science, Jiangxi Province, China. The BAC library was estimated to contain approximately 200 Mb, with an average insert size of 75 kb. The library was stored at &#x2013;80&#x00B0;C in 32 96-well cell culture plates containing 200 &#x03BC;l of Luria-Bertani (LB) medium with 12.5 &#x03BC;g/ml chloramphenicol (Cm) and 30% glycerol in each well.</p>
</sec>
<sec><title>Plasmid Isolation from the BAC Library</title>
<p>All clones were inoculated into new 96-well plates for activation and then the contents of each well were transferred to 3 ml fresh liquid LB medium with 12.5 &#x03BC;g/ml Cm and cultured overnight. The plasmids were extracted following the protocol described by <xref ref-type="bibr" rid="B16">Liu et al. (2011)</xref>, and the residual chromosomal DNA from the plasmid host (<italic>E. coli</italic> DH10B) was digested by plasmid-safe, ATP-dependent DNase (Epicentre Technologies) at 37&#x00B0;C for 2 h to remove the nicked DNA. The reactions were then incubated in a water bath at 70&#x00B0;C for 15 min to inactivate the DNase.</p>
</sec>
<sec><title>Screening of 16S rRNA Gene-Containing BAC Plasmids</title>
<p>To screen the 16S rRNA gene-containing BAC plasmids in the library, the extracted plasmids and the bacterial cells from the library were used to amplify the 16S rDNA fragment in 96-well PCR plates in a 25 &#x03BC;l volume containing 1 &#x03BC;l of DNA or cell suspension as the template, 2.5 &#x03BC;l of 10 &#x00D7; PCR buffer, 2 &#x03BC;l of Mg<sup>2+</sup> (20 mM), 2 &#x03BC;l of 2.5 mM dNTP, 1 &#x03BC;l (10 pmol/&#x03BC;l) of each of the primers (27F, 5&#x2032;-AGAGTTTGATCCTGGCTCAG and 1492R, 5&#x2032;-GGTTACCTTGTTACGACTT), 0.5 &#x03BC;l of Taq polymerase, and 16 &#x03BC;l of ddH<sub>2</sub>O. The primer pair amplified about 1,500 bp of the 16S rDNA. The reaction program included 5 min of denaturation at 95&#x00B0;C, 30 cycles of 95&#x00B0;C for 1 min, 54&#x00B0;C for 90 s, and extension at 72&#x00B0;C for 120 s followed by 10 min of extension at 72&#x00B0;C. The PCR products amplified from the extracted BAC plasmids were detected on 1% agarose gels.</p>
<p>All the PCR reactions using bacterial cells as templates resulted the amplification of 16S rDNA products of the BAC host, <italic>E. coli</italic> DH10B. To eliminate this background and screen for the 16S rRNA genes contained in the BAC plasmids, the RFLP analysis using endonuclease <italic>HhaI</italic>, which recognizes GCGC sites, was performed for all the PCR products. The digestion was carried out at 37&#x00B0;C for 2 h. The restriction fragments were analyzed on 1% agarose gels, and the 16S rDNA of <italic>E. coli</italic> DH10B was used as the control. The agarose gel was stained with ethidium bromide and analyzed with a digital imaging system. The 16S rDNA products that showed RFLP profiles different with that of the <italic>E. coli</italic> DH10B control were selected and verified by amplification from the corresponding BAC plasmids. In total, 500 BAC clones were screened, and seven 16S rRNA gene-containing BAC plasmids were obtained.</p>
</sec>
<sec><title>16S rRNA Sequencing and Phylogenetic Tree Construction</title>
<p>The confirmed 16S rRNA genes contained in the BAC plasmids were re-amplified with high fidelity polymerase using DNase digested and purified BAC plasmids as templates. The PCR products were purified and linked to the pMD19-T vector (TAKARA, Dalian, China) for sequencing, which was performed by GenScript (Nanjing) Co., Ltd. The sequences obtained were searched against the NCBI nr/nt Database using BLAST. In total, five clones, named 4C6, 5E7, 5G4, 5G12, and 5H7, were found to contain 16S rRNA genes with high dissimilarity to cultured bacteria.</p>
<p>A phylogenetic Maximum Likelihood tree of the 16S rDNA sequences was constructed. 16S rDNA sequences of 20 matched type strains from RDP database were downloaded as references. Both uncultured and isolated strain with the 16S rDNA sequences longer than 1,200 bp were included in sequence match. The duplicates were removed. An out group was selected from the RDP Hierarchy Browser. Multiple alignment of the sequences for each BAC clone was conducted by ClustalW. The evolutionary history was inferred by using the Maximum Likelihood method based on the Tamura&#x2013;Nei model (<xref ref-type="bibr" rid="B33">Tamura and Nei, 1993</xref>). Maximum likelihood trees were then constructed using MEGA 6.0 based on 16S rDNA (<xref ref-type="bibr" rid="B34">Tamura et al., 2013</xref>). The phylogeny was tested by 100 of bootstrap replications.</p>
</sec>
<sec><title>Sequencing and Annotation of the Selected BAC Inserted Fragments</title>
<p>The inserted fragments of the five selected BAC clones were sequenced using a Roche 454 GS FLX system in the Chinese National Human Genome Center (Shanghai, China). Newbler v2.3 was used to assemble the sequences.</p>
<p>For annotation, the protein-coding genes (CDS) were predicted by Glimmer 3 (<xref ref-type="bibr" rid="B6">Delcher et al., 2007</xref>), and pseudogenes and anomalous start/stop codons were identified by GenePRIMP (<xref ref-type="bibr" rid="B25">Pati et al., 2010</xref>). Then, all of the genes were manually curated with the genome viewer Artemis (<xref ref-type="bibr" rid="B3">Carver et al., 2008</xref>). The functional annotation was carried out using the BLASTP with GenBank&#x2019;s non-redundant protein databases (nr) (parameters: <italic>E</italic>-value = 1<italic>e</italic> &#x2013; 5, coverage &#x003E;60%, and identity &#x003E;50%). Each gene was functionally classified into the cluster of orthologous groups (COGs) categories using an RPS-BLAST search against the COGs database with an <italic>E</italic>-value of 1<italic>e</italic> &#x2013; 5 (<xref ref-type="bibr" rid="B35">Tatusov et al., 2003</xref>). The domain recognition was carried out with an HMMER search (<xref ref-type="bibr" rid="B9">Johnson et al., 2010</xref>) against the PFAM database (version 30.0) (<xref ref-type="bibr" rid="B7">Finn et al., 2015</xref>) with an <italic>E</italic>-value of 1<italic>e</italic> &#x2013; 5. The rRNAs were predicated with RNAmmer 1.2 Server (<xref ref-type="bibr" rid="B11">Lagesen et al., 2007</xref>). The annotation was compared with reported soil metagenomes in Integrated Microbial Genomes (IMG) system based on the function profile (<xref ref-type="bibr" rid="B19">Markowitz et al., 2014a</xref>,<xref ref-type="bibr" rid="B20">b</xref>). Profile of the metagenomes across the functions found in the BAC clones was shown. For each study in IMG, one sample was picked to be included in the comparison. In general, 69 soil metagenomes and 172 functions (pfam) were included.</p>
</sec>
<sec><title>Tetranucleotide Frequency Analysis</title>
<p>The internal tetranucleotide correlations of BAC inserted sequences were analyzed following the compositional method described by Teeling (<xref ref-type="bibr" rid="B36">Teeling et al., 2004a</xref>,<xref ref-type="bibr" rid="B37">b</xref>) using a maximal-order Markov model (<xref ref-type="bibr" rid="B30">Schbath et al., 1995</xref>). Fragments were extended with their reverse complements. The extended sequences were cut into 300 bp fragments from the beginning of the sequences, with a step size of 100 bp. The frequencies of all 256 tetranucleotides and their corresponding expected frequencies were calculated for these sequences. The frequencies were transformed into <italic>z</italic>-scores for each tetranucleotide. The Pearson correlation coefficients for the <italic>z</italic>-scores were calculated. For each insert, the tetranucleotide frequencies of all the fragments were determined. These results were then directly used for the Pearson correlation analysis. All the above processes were performed by Perl script (The script was shared in Github<sup><xref ref-type="fn" rid="fn01">1</xref></sup>), and the visual outputs were finished by R script (heatmap.2).</p>
</sec>
<sec><title>Accession Numbers</title>
<p>Sequences have been deposited in GenBank with accession number JX091737, JX091738, JX091739, JX091740, and JX091741 corresponding to 16S rDNA sequences of 4C6, 5E7, 5G4, 5G12, and 5H7; KT342854, KT342855, KT342856, KT342857, and KT342858 corresponding to full length sequences of 4C6, 5E7, 5G4, 5G12, and 5H7, respectively. IMG ID for 4C6, 5E7, 5G4, 5G12, and 5H7 are 2695420969, 2695420970, 2695420984, 2695421012, and 2695421011.</p>
</sec>
</sec>
<sec><title>Results</title>
<sec><title>16S rDNA Analysis and Sequencing of BAC Inserts</title>
<p>The BAC library was estimated to contain approximately 200 Mb, with an average insert size of 75 kb. As a result, seven clones (4C6, 5E7, 5G4, 5G12, 5H7, 10D9, and 27A5) from 500 clones screened were estimated to include 16S rDNA fragments within the inserts. The 27A5 and 10D9 clones, which showed similarities to known species, were clustered with a <italic>Bacillus</italic> spp. group and a <italic>Pseudomonas</italic> spp. group, respectively. Subsequent sequencing of the 16S rDNA confirmed that five clones, 4C6, 5E7, 5G4, 5G12, and 5H7, were originated from uncultured bacteria. The identities to the nearest BLAST results from cultivable microbes in the NCBI database were 94% (4C6), 82% (5E7), 83% (5G4), 88% (5G12), and 97% (5H7) (<bold>Table <xref ref-type="table" rid="T1">1</xref></bold>). The 5E7 and 5G4 clones had the lowest similarities to all known 16S rDNA sequences and showed interesting differences from the cultivable microbes in the subsequent analysis. Clones containing ambiguous 16S rDNA were removed in the screening step, which led to a much lower proportion of positive clones in the library than expected.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>General information of five BAC inserts.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left"></td>
<th valign="top" align="center">4C6</th>
<th valign="top" align="center">5E7</th>
<th valign="top" align="center">5G4</th>
<th valign="top" align="center">5G12</th>
<th valign="top" align="center">5H7</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">BAC insert length (bp)</td>
<td valign="top" align="center">23,678</td>
<td valign="top" align="center">46,092</td>
<td valign="top" align="center">30,180</td>
<td valign="top" align="center">43,604</td>
<td valign="top" align="center">56,420</td>
</tr>
<tr>
<td valign="top" align="left">G + C content (%)</td>
<td valign="top" align="center">51.6</td>
<td valign="top" align="center">49.6</td>
<td valign="top" align="center">48.2</td>
<td valign="top" align="center">49.2</td>
<td valign="top" align="center">49.0</td>
</tr>
<tr>
<td valign="top" align="left">No. of predicted ORF</td>
<td valign="top" align="center">18</td>
<td valign="top" align="center">44</td>
<td valign="top" align="center">36</td>
<td valign="top" align="center">42</td>
<td valign="top" align="center">59</td>
</tr>
<tr>
<td valign="top" align="left">No. of hypothetical protein</td>
<td valign="top" align="center">8</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center">19</td>
<td valign="top" align="center">14</td>
<td valign="top" align="center">14</td>
</tr>
<tr>
<td valign="top" align="left">Proteins assigned to COGs</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">32</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">29</td>
<td valign="top" align="center">39</td>
</tr>
<tr>
<td valign="top" align="left">Average ORF length (bp)</td>
<td valign="top" align="center">1,017</td>
<td valign="top" align="center">981</td>
<td valign="top" align="center">617</td>
<td valign="top" align="center">912</td>
<td valign="top" align="center">788</td>
</tr>
<tr>
<td valign="top" align="left">Coding regions (%)</td>
<td valign="top" align="center">54.4</td>
<td valign="top" align="center">75.6</td>
<td valign="top" align="center">44.4</td>
<td valign="top" align="center">58</td>
<td valign="top" align="center">65</td>
</tr>
<tr>
<td valign="top" align="left">rrn operon</td>
<td valign="top" align="center">16S</td>
<td valign="top" align="center">16S-23S-5.8S</td>
<td valign="top" align="center">16S-23S</td>
<td valign="top" align="center">16S-23S-5.8S</td>
<td valign="top" align="center">16S-23S-5.8S</td>
</tr>
<tr>
<td valign="top" align="left">Nearest relation (accession no.)</td>
<td valign="top" align="center">HQ118747.1</td>
<td valign="top" align="center">EF516466.1</td>
<td valign="top" align="center">FJ479355.1</td>
<td valign="top" align="center">KC555030.1</td>
<td valign="top" align="center">FJ820395.1</td>
</tr>
<tr>
<td valign="top" align="left"></td>
<td valign="top" align="center">Uncultured</td>
<td valign="top" align="center">Uncultured</td>
<td valign="top" align="center">Uncultured</td>
<td valign="top" align="center">Uncultured</td>
<td valign="top" align="center">Uncultured</td>
</tr>
<tr>
<td valign="top" align="left">Identities to cultivable sample<sup>a</sup></td>
<td valign="top" align="center">97% (94%)</td>
<td valign="top" align="center">82% (82%)</td>
<td valign="top" align="center">83% (83%)</td>
<td valign="top" align="center">93% (88%)</td>
<td valign="top" align="center">98% (97%)</td></tr>
</tbody>
</table>
<table-wrap-foot>
<attrib><sup>a</sup><italic>Numbers in brackets indicate 16S rDNA identities to known genomes.</italic></attrib>
</table-wrap-foot>
</table-wrap>
<p>For the investigation of the phylogenetic position of the five uncultured BAC clones within the bacterial domain, a phylogenetic tree based on 16S rRNA gene sequences was constructed (<bold>Figures <xref ref-type="fig" rid="F1">1</xref></bold> and <bold><xref ref-type="fig" rid="F2">2</xref></bold>, Supplementary Figures <xref ref-type="supplementary-material" rid="SM1">S1</xref>&#x2013;<xref ref-type="supplementary-material" rid="SM1">S3</xref>). The phylogenetic tree of the five uncultured clones showed that 4C6 belonged to Mucilaginibacter (Figure S1), 5H7 belonged to the Novosphingobium (Figure S2) and 5G12 was clustered with the Gaiella (Figure S3). 5E7 and 5G4 were clustered with the Chloroflexi, however, both of them showed difference with known classes (<bold>Figures <xref ref-type="fig" rid="F1">1</xref></bold> and <bold><xref ref-type="fig" rid="F2">2</xref></bold>). Note that the bootstrap values above 5E7 were low, the reason is that the similarity between 5E7, <italic>Sphaerobacter thermophilus</italic> (T) DSM20745T and the <italic>Ktedonobacteria</italic> is similar. Also the analysis using RDP classifier showed 5E7 and 5G4 were from unknown classes of Chloroflexi [5E7: Bacteria (100%) &#x201C;Chloroflexi&#x201D; (96%) Caldilineae (30%) Caldilineales (30%) Caldilineaceae (30%) Litorilinea (30%); 5G4: Root (100%) Bacteria (100%) &#x201C;Chloroflexi&#x201D; (99%) Dehalococcoidia (88%) Dehalococcoidales (88%) Dehalococcoidaceae (88%) Dehalococcoides (88%)] (<xref ref-type="bibr" rid="B42">Wang et al., 2007</xref>). The highest identities of 5E7 and 5G4 to known species were only 82% and 83%, respectively (<bold>Table <xref ref-type="table" rid="T1">1</xref></bold>). To our knowledge, this is the first report of genomic fractions of bacteria with such low identities to known species. A similar result, i.e., that cultured and uncultured bacteria differed greatly in a phylogenetic analysis of 16S rRNA genes, has been reported previously (<xref ref-type="bibr" rid="B32">Suzuki et al., 1997</xref>; <xref ref-type="bibr" rid="B4">Cottrell et al., 2000</xref>).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption><p><bold>Molecular phylogenetic analysis of 5E7 by Maximum Likelihood method.</bold> The evolutionary history was inferred by using the Maximum Likelihood method based on the Tamura&#x2013;Nei model. The tree with the highest log likelihood (-10636.1388) is shown. Initial tree for the heuristic search were obtained automatically by applying Neighbor-Join and BioNJ algorithms to a matrix of pairwise distances estimated using the Maximum Composite Likelihood (MCL) approach, and then selecting the topology with superior log likelihood value. The tree is drawn to scale, with branch lengths measured in the number of substitutions per site. The analysis involved 21 nucleotide sequences. All positions containing gaps and missing data were eliminated. There were a total of 1,301 positions in the final dataset. Evolutionary analyses were conducted in MEGA7. The phylogeny was tested by 100 of bootstrap replications.</p></caption>
<graphic xlink:href="fmicb-07-02081-g001.tif"/>
</fig>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption><p><bold>Molecular phylogenetic analysis of 5G4 by Maximum Likelihood method.</bold> The evolutionary history was inferred by using the Maximum Likelihood method based on the Tamura&#x2013;Nei model. The tree with the highest log likelihood (-11781.7674) is shown. Initial tree for the heuristic search were obtained automatically by applying Neighbor-Join and BioNJ algorithms to a matrix of pairwise distances estimated using the MCL approach, and then selecting the topology with superior log likelihood value. The tree is drawn to scale, with branch lengths measured in the number of substitutions per site. The analysis involved 22 nucleotide sequences. All positions containing gaps and missing data were eliminated. There were a total of 1,244 positions in the final dataset. Evolutionary analyses were conducted in MEGA7. The phylogeny was tested by 100 of bootstrap replications.</p></caption>
<graphic xlink:href="fmicb-07-02081-g002.tif"/>
</fig>
<p>The inserted fragments of the five uncultured BAC clones (4C6, 5E7, 5G4, 5G12, and 5H7) were completely sequenced. The length of the inserts ranged from 23.7 to 56.4 kbp. The percentages of G + C were 51.6, 49.6, 48.2, 49.2, and 49.0 for 4C6, 5E7, 5G4, 5G12, and 5H7, respectively. For the organization of the rRNA, 5E7, 5G12, and 5H7 all contained a 5.8S-23S-16S operon, whereas 5G4 had a 23S-16S operon, and 4C6 had a single 16S rRNA gene. Annotation of the inserted fragments predicted 18 open reading frames (ORFs) for 4C6, 44 for 5E7, 36 for 5G4, 42 for 5G12, and 59 for 5H7. The annotations have been uploaded to IMG system with the ID of 2695420969, 2695420970, 2695420984, 2695421012, and 2695421011 for 4C6, 5E7, 5G4, 5G12, and 5H7. The functions (pfam) found in these clones were compared with the reported soil metagenomes to show the frequency of these functions in other soil metagenomes (<bold>Supplementary Table <xref ref-type="supplementary-material" rid="SM2">S2</xref></bold>). The number of predicted ORFs assigned to the COGs categories was seven for 4C6, 32 for 5E7, 12 for 5G4, 29 for 5G12, and 39 for 5H7 (<bold>Figure <xref ref-type="fig" rid="F3">3</xref></bold>; <bold>Table <xref ref-type="table" rid="T1">1</xref></bold>). The 5E7 clone was rich in genes related to cellular processes and signaling (<bold>Figure <xref ref-type="fig" rid="F3">3</xref></bold>). The distribution of genes in 5G12 and 5H7 was uniform, but 4C6 was too short to give an overview of the trend (<bold>Figure <xref ref-type="fig" rid="F3">3</xref></bold>). The hypothetical proteins ranged from 6 to 19 (<bold>Table <xref ref-type="table" rid="T1">1</xref></bold> and Supplementary Table <xref ref-type="supplementary-material" rid="SM1">S1</xref>). Interestingly, 5G4 contained 19 hypothetical proteins in 36 predicted ORFs, and only one-third of the ORFs were annotated with known functions (<bold>Table <xref ref-type="table" rid="T1">1</xref></bold> and Supplementary Table <xref ref-type="supplementary-material" rid="SM1">S1</xref>). The proportions of unknown ORFs that could not be assigned to COGs, were 61.1%, 21.9%, 66.6%, 30.9%, and 33.8% for 4C6, 5E7, 5G4, 5G12 and 5H7, respectively. Interestingly, 13 unknown genes in 5G4 were assembled together (located between 3 nt and 9,792 nt of the insert). Because 5G4 is from an uncultured bacterium with a phylogenetic relationship distant from all known bacteria (83%), it will be interesting and important to investigate further the functions of these unknown genes. In contrast to 5G4, another clone, 5E7, with 83% identity to known species, enjoyed a clear gene annotation and only 21.9% of the genes could not be assigned to COGs.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption><p><bold>The open reading frame map.</bold> The annotated ORFs are drawn with different colors of arrows based on the COG classification.</p></caption>
<graphic xlink:href="fmicb-07-02081-g003.tif"/>
</fig>
</sec>
<sec><title>Tetranucleotide Frequency Correlations</title>
<p>To obtain a deeper understanding of the genome, an internal tetranucleotide preference analysis was performed for the five clones. It has been reported that the tetranucleotide frequencies of genomic DNA sequences are highly conserved (<xref ref-type="bibr" rid="B23">Noble et al., 1998</xref>). Tetranucleotide preference is a widely used genome signature to identify genomes (<xref ref-type="bibr" rid="B36">Teeling et al., 2004a</xref>,<xref ref-type="bibr" rid="B37">b</xref>), and it has already been used for metagenomic analysis of fosmid inserts (<xref ref-type="bibr" rid="B13">Li et al., 2012</xref>).</p>
<p>Exogenous sequences contained in the inserted fragments should show low correlations with the local part and could be detected by a heatmap (Supplementary Figure <xref ref-type="supplementary-material" rid="SM1">S4</xref>). Here, we performed the internal tetranucleotide correlations analysis with a newly developed program written with Perl and based on the algorithm reported previously (<xref ref-type="bibr" rid="B36">Teeling et al., 2004a</xref>). Generally, a lighter map indicates an unstable genome with many exogenous sequences. The <italic>R</italic>-values are summarized in a boxplot (<bold>Figure <xref ref-type="fig" rid="F4">4</xref></bold>). The internal tetranucleotide frequency correlation map of 5E7 exhibited overwhelmingly high stabilities with an <italic>R</italic>-value of 0.59 &#x00B1; 0.18; 5G12 also revealed a high internal correlation of tetranucleotide frequency and a high coding region percentage. In contrast, 5G4 was highly unstable with an average <italic>R</italic>-value of 0.20 &#x00B1; 0.16 (less than 0.6). The region rich for unknown genes was highly unstable (<bold>Figure <xref ref-type="fig" rid="F4">4</xref></bold>; Supplementary Table <xref ref-type="supplementary-material" rid="SM1">S1</xref>). It should be noted that the sections of rRNA were visibly different from the rest of genome as reported before (<xref ref-type="bibr" rid="B23">Noble et al., 1998</xref>). We suggest that the host of 5G4 was frequently transformed with motile DNA from other organisms, and the functions of these genes were not known. This hypothesis supports the low coding percentage of 5G4 (<bold>Table <xref ref-type="table" rid="T1">1</xref></bold>). Due to the insertion of exogenous DNA, endogenous genes were inactivated, and as a result, the left region became a non-coding region. In contrast, 5E7 contained a potentially stable genome with less exogenous DNA and a high proportion of coding region. In addition, the coding region was highly correlated with the R-value of the internal tetranucleotide correlation for all these five clones (<italic>R</italic> = 0.895, <italic>p</italic> = 0.039).</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption><p><bold>Internal tetranucleotide frequency correlations of five colors indicate the tetranucleotide correlation between each of the 300 bp fragments.</bold> Deep blue represents high correlation regions. ORF positions are shown on the top and left sides of each map. The boxplot shows the summary of the correlation matrix.</p></caption>
<graphic xlink:href="fmicb-07-02081-g004.tif"/>
</fig>
</sec>
</sec>
<sec><title>Discussion</title>
<p>In current study, by using an RFLP method, the metagenomic library containing 3,024 BAC clones from the DNA sample from red soil in South China constructed in a previous study (<xref ref-type="bibr" rid="B16">Liu et al., 2011</xref>) was screened for uncultured bacterial insertions. Finally, two uncultured clones 5G4 and 5E7 were isolated with low identities of 16S rDNA (83 and 82%) to known bacteria. The phylogenetic analysis showed that both of them belong to a new class in Chloroflexi. However, while 5E7 enjoys a clear and highly self-correlated genome fragment as supposed, 5G4 is rich for unknown genes and has an unstable genome, which suggest the frequent lateral gene transfer in this bacterium.</p>
<p>The findings demonstrated our limited knowledge of soil microbes, especially of functional genes in uncultured bacteria (<xref ref-type="bibr" rid="B27">Prosser, 2015</xref>). Some of the uncultured microbial sequences, including 4C6, 5E7, 5G12, and 5H7, are similar to sequences of known species; nevertheless, some of them, such as 5G4, are still beyond our understanding, and many unknown genes are waiting to be identified and classified. Interesting is, 5G4 has extremely low coding region and rich for unknown genes, and 5G4 has a low internal tetranucleotide correlation which indicate an instable genome. Based on these results, we suggest 5G4 with a lot of exogenous genes. Because the insertion of exogenous DNA into the genome would cause a disruption of local genes, which make the rate of coding region lower (<bold>Table <xref ref-type="table" rid="T1">1</xref></bold>). It is suggested that the genome of host bacteria of 5G4 is in fast evolution because acquisition of laterally transferred DNA is much more efficiency than nucleotide substitution in nature, and the former is the primary driver of bacterial speciation (<xref ref-type="bibr" rid="B24">Ochman and Bergthorsson, 1995</xref>; <xref ref-type="bibr" rid="B2">Bao et al., 2014</xref>). That would explain the low identity of 16S rDNA sequence of 5G4 to that of known bacteria (<bold>Table <xref ref-type="table" rid="T1">1</xref></bold>; <bold>Figure <xref ref-type="fig" rid="F2">2</xref></bold>). It is interesting to explore the difference in function even for the other unknown genes in 5G4.</p>
<p>The strategy developed in this research could be applied to the identification and study of uncultured bacterial genes. Although the genomes of a few uncultured microbes have already been completed, or nearly completed by single cell sequencing or metagenomic sequencing (<xref ref-type="bibr" rid="B1">Albertsen et al., 2013</xref>), searching for unknown genes from genomes of uncultured microbes was difficult due to the lack of targeted selection of the strains from the microbial mixtures. This strategy provided a deeper view of uncultured bacterial genomes. The 23&#x2013;56 kbp fragments gave a substantial amount of information about the uncultured bacteria, and the fragments were large enough for tetranucleotide analysis to identify the signatures of the genomes. This information provides pre-isolation of the interested genomes, which might be further targets for sequencing. The single cell sequencing approach is efficient in getting large draft genome of uncultured microbes (<xref ref-type="bibr" rid="B5">de Jager and Siezen, 2011</xref>; <xref ref-type="bibr" rid="B28">Rinke et al., 2013</xref>), however, the selection of the microbes to be sequenced is generally based on identification of the marker genes, such as 16S rDNA for bacteria (<xref ref-type="bibr" rid="B5">de Jager and Siezen, 2011</xref>). In current study, we showed a better way to find interesting or rare genomes from uncultured microbes, which could provide better perspective to select genomes to seq, i.e., that selecting a genome with one of the unknown genes in clone 5G4.</p>
<p>In general, these sequences give initial information to understand the host bacteria, and possibility to hybridize the larger genome fragment of these uncultured bacteria with interest in soil purposely.</p>
</sec>
<sec><title>Author Contributions</title>
<p>ZC, QS, and RZ designed the experiment. YL and LC performed the screening of the uncultured clones and the tetranucleotide analysis. DY and NZ performed the genomic analyses and annotations.</p>
</sec>
<sec><title>Conflict of Interest Statement</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
</body>
<back>
<fn-group>
<fn fn-type="financial-disclosure">
<p><bold>Funding.</bold> This work was financially supported by the National Natural Science Foundation of China (31600088), China Postdoctoral Science Foundation (2016M591297), National Key Basic Research Program of China (973 program, 2015CB150505), the the Fundamental Research Funds for the Central Universities (KYTZ201404) and the National Infrastructure of Microbial Resources (NIRM). RZ and QS were also supported by the Priority Academic Program Development (PAPD) of Jiangsu Higher Education Institutions and the 111 Project (B12009). The funders had no role in study design, data collection and interpretation, or the decision to submit the work for publication.</p></fn>
</fn-group>
<sec sec-type="supplementary material">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="http://journal.frontiersin.org/article/10.3389/fmicb.2016.02081/full#supplementary-material">http://journal.frontiersin.org/article/10.3389/fmicb.2016.02081/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Supplementary_Materials.DOC" id="SM1" mimetype="application/msword" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_2.XLS" id="SM2" mimetype="application/vnd.ms-excel" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>TABLE S2</label>
<caption><p><bold>Frequency of each ORF in metagenomics in IMG</bold>.</p></caption>
</supplementary-material>
<supplementary-material xlink:href="Table_2.XLS" id="S2" mimetype="application/vnd.ms-excel" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Albertsen</surname> <given-names>M.</given-names></name> <name><surname>Hugenholtz</surname> <given-names>P.</given-names></name> <name><surname>Skarshewski</surname> <given-names>A.</given-names></name> <name><surname>Nielsen</surname> <given-names>K. L.</given-names></name> <name><surname>Tyson</surname> <given-names>G. W.</given-names></name> <name><surname>Nielsen</surname> <given-names>P. H.</given-names></name></person-group> (<year>2013</year>). <article-title>Genome sequences of rare, uncultured bacteria obtained by differential coverage binning of multiple metagenomes.</article-title> <source><italic>Nat. Biotechnol.</italic></source> <volume>31</volume> <fpage>533</fpage>&#x2013;<lpage>538</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.2579</pub-id></citation></ref>
<ref id="B2"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bao</surname> <given-names>H.-X.</given-names></name> <name><surname>Tang</surname> <given-names>L.</given-names></name> <name><surname>Yu</surname> <given-names>L.</given-names></name> <name><surname>Wang</surname> <given-names>X.-Y.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Deng</surname> <given-names>X.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>Differential efficiency in exogenous DNA acquisition among closely related <italic>Salmonella</italic> strains: implications in bacterial speciation.</article-title> <source><italic>BMC Microbiol.</italic></source> <volume>14</volume>:<issue>157</issue>. <pub-id pub-id-type="doi">10.1186/1471-2180-14-157</pub-id></citation></ref>
<ref id="B3"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Carver</surname> <given-names>T.</given-names></name> <name><surname>Berriman</surname> <given-names>M.</given-names></name> <name><surname>Tivey</surname> <given-names>A.</given-names></name> <name><surname>Patel</surname> <given-names>C.</given-names></name> <name><surname>B&#x00F6;hme</surname> <given-names>U.</given-names></name> <name><surname>Barrell</surname> <given-names>B. G.</given-names></name><etal/></person-group> (<year>2008</year>). <article-title>Artemis and ACT: viewing, annotating and comparing sequences stored in a relational database.</article-title> <source><italic>Bioinformatics</italic></source> <volume>24</volume> <fpage>2672</fpage>&#x2013;<lpage>2676</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btn529</pub-id></citation></ref>
<ref id="B4"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cottrell</surname> <given-names>M. T.</given-names></name> <name><surname>Wood</surname> <given-names>D. N.</given-names></name> <name><surname>Yu</surname> <given-names>L.</given-names></name> <name><surname>Kirchman</surname> <given-names>D. L.</given-names></name></person-group> (<year>2000</year>). <article-title>Selected chitinase genes in cultured and uncultured marine bacteria in the alpha- and gamma-subclasses of the proteobacteria.</article-title> <source><italic>Appl. Environ. Microbiol.</italic></source> <volume>66</volume> <fpage>1195</fpage>&#x2013;<lpage>1201</lpage>. <pub-id pub-id-type="doi">10.1128/AEM.66.3.1195-1201.2000</pub-id></citation></ref>
<ref id="B5"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>de Jager</surname> <given-names>V.</given-names></name> <name><surname>Siezen</surname> <given-names>R. J.</given-names></name></person-group> (<year>2011</year>). <article-title>Single-cell genomics: unravelling the genomes of unculturable microorganisms.</article-title> <source><italic>Microb. Biotechnol.</italic></source> <volume>4</volume> <fpage>431</fpage>&#x2013;<lpage>437</lpage>. <pub-id pub-id-type="doi">10.1111/j.1751-7915.2011.00271.x</pub-id></citation></ref>
<ref id="B6"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Delcher</surname> <given-names>A. L.</given-names></name> <name><surname>Bratke</surname> <given-names>K. A.</given-names></name> <name><surname>Powers</surname> <given-names>E. C.</given-names></name> <name><surname>Salzberg</surname> <given-names>S. L.</given-names></name></person-group> (<year>2007</year>). <article-title>Identifying bacterial genes and endosymbiont DNA with Glimmer.</article-title> <source><italic>Bioinformatics</italic></source> <volume>23</volume> <fpage>673</fpage>&#x2013;<lpage>679</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btm009</pub-id></citation></ref>
<ref id="B7"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Finn</surname> <given-names>R. D.</given-names></name> <name><surname>Coggill</surname> <given-names>P.</given-names></name> <name><surname>Eberhardt</surname> <given-names>R. Y.</given-names></name> <name><surname>Eddy</surname> <given-names>S. R.</given-names></name> <name><surname>Mistry</surname> <given-names>J.</given-names></name> <name><surname>Mitchell</surname> <given-names>A. L.</given-names></name><etal/></person-group> (<year>2015</year>). <article-title>The Pfam protein families database: towards a more sustainable future.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>44</volume> <fpage>D279</fpage>&#x2013;<lpage>D285</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkv1344</pub-id></citation></ref>
<ref id="B8"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guangming</surname> <given-names>T.</given-names></name> <name><surname>Feier</surname> <given-names>W.</given-names></name> <name><surname>Yingxu</surname> <given-names>C.</given-names></name> <name><surname>Yunfeng</surname> <given-names>H.</given-names></name> <name><surname>Qinglin</surname> <given-names>F.</given-names></name> <name><surname>Kumar</surname> <given-names>S.</given-names></name><etal/></person-group> (<year>2003</year>). <article-title>Effect of different vegetation systems on soil erosion and soil nutrients in red soil region of southeastern China.</article-title> <source><italic>Pedosphere</italic></source> <volume>13</volume> <fpage>121</fpage>&#x2013;<lpage>128</lpage>.</citation></ref>
<ref id="B9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Johnson</surname> <given-names>L. S.</given-names></name> <name><surname>Eddy</surname> <given-names>S. R.</given-names></name> <name><surname>Portugaly</surname> <given-names>E.</given-names></name></person-group> (<year>2010</year>). <article-title>Hidden Markov model speed heuristic and iterative HMM search procedure.</article-title> <source><italic>BMC Bioinformatics</italic></source> <volume>11</volume>:<issue>431</issue>. <pub-id pub-id-type="doi">10.1186/1471-2105-11-431</pub-id></citation></ref>
<ref id="B10"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname> <given-names>S. J.</given-names></name> <name><surname>Lee</surname> <given-names>C. M.</given-names></name> <name><surname>Han</surname> <given-names>B. R.</given-names></name> <name><surname>Kim</surname> <given-names>M. Y.</given-names></name> <name><surname>Yeo</surname> <given-names>Y. S.</given-names></name> <name><surname>Yoon</surname> <given-names>S. H.</given-names></name><etal/></person-group> (<year>2008</year>). <article-title>Characterization of a gene encoding cellulase from uncultured soil bacteria.</article-title> <source><italic>FEMS Microbiol. Lett.</italic></source> <volume>282</volume> <fpage>44</fpage>&#x2013;<lpage>51</lpage>. <pub-id pub-id-type="doi">10.1111/j.1574-6968.2008.01097.x</pub-id></citation></ref>
<ref id="B11"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lagesen</surname> <given-names>K.</given-names></name> <name><surname>Hallin</surname> <given-names>P.</given-names></name> <name><surname>R&#x00F8;dland</surname> <given-names>E. A.</given-names></name> <name><surname>St&#x00E6;rfeldt</surname> <given-names>H. H.</given-names></name> <name><surname>Rognes</surname> <given-names>T.</given-names></name> <name><surname>Ussery</surname> <given-names>D. W.</given-names></name></person-group> (<year>2007</year>). <article-title>RNAmmer: consistent and rapid annotation of ribosomal RNA genes.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>35</volume> <fpage>3100</fpage>&#x2013;<lpage>3108</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkm160</pub-id></citation></ref>
<ref id="B12"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lalande</surname> <given-names>J.</given-names></name> <name><surname>Villemur</surname> <given-names>R.</given-names></name> <name><surname>Desch&#x00EA;nes</surname> <given-names>L.</given-names></name></person-group> (<year>2013</year>). <article-title>A new framework to accurately quantify soil bacterial community diversity from DGGE.</article-title> <source><italic>Microb. Ecol.</italic></source> <volume>66</volume> <fpage>647</fpage>&#x2013;<lpage>658</lpage>. <pub-id pub-id-type="doi">10.1007/s00248-013-0230-3</pub-id></citation></ref>
<ref id="B13"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>P.</given-names></name> <name><surname>Xie</surname> <given-names>B.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Qin</surname> <given-names>Q.</given-names></name> <name><surname>Dang</surname> <given-names>H.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name><etal/></person-group> (<year>2012</year>). <article-title>Genetic structure of three fosmid-fragments encoding 16S rRNA genes of the Miscellaneous Crenarchaeotic Group (MCG): implications for physiology and evolution of marine sedimentary archaea.</article-title> <source><italic>Environ. Microbiol.</italic></source> <volume>14</volume> <fpage>467</fpage>&#x2013;<lpage>479</lpage>. <pub-id pub-id-type="doi">10.1111/j.1462-2920.2011.02637.x</pub-id></citation></ref>
<ref id="B14"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liles</surname> <given-names>M. R.</given-names></name> <name><surname>Manske</surname> <given-names>B. F.</given-names></name> <name><surname>Bintrim</surname> <given-names>S. B.</given-names></name> <name><surname>Handelsman</surname> <given-names>J.</given-names></name> <name><surname>Goodman</surname> <given-names>R. M.</given-names></name></person-group> (<year>2003</year>). <article-title>A Census of rRNA genes and linked genomic sequences within a soil metagenomic library.</article-title> <source><italic>Appl. Environ. Microbiol.</italic></source> <volume>69</volume> <fpage>2684</fpage>&#x2013;<lpage>2691</lpage>. <pub-id pub-id-type="doi">10.1128/AEM.69.5.2684-2691.2003</pub-id></citation></ref>
<ref id="B15"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>J.</given-names></name> <name><surname>Li</surname> <given-names>J.</given-names></name> <name><surname>Feng</surname> <given-names>L.</given-names></name> <name><surname>Cao</surname> <given-names>H.</given-names></name> <name><surname>Cui</surname> <given-names>Z.</given-names></name></person-group> (<year>2010</year>). <article-title>An improved method for extracting bacteria from soil for high molecular weight DNA recovery and BAC library construction.</article-title> <source><italic>J. Microbiol.</italic></source> <volume>48</volume> <fpage>728</fpage>&#x2013;<lpage>733</lpage>. <pub-id pub-id-type="doi">10.1007/s12275-010-0139-1</pub-id></citation></ref>
<ref id="B16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>J.</given-names></name> <name><surname>Liu</surname> <given-names>W.</given-names></name> <name><surname>Zhao</surname> <given-names>X.</given-names></name> <name><surname>Shen</surname> <given-names>W.-J.</given-names></name> <name><surname>Cao</surname> <given-names>H.</given-names></name> <name><surname>Cui</surname> <given-names>Z.-L.</given-names></name></person-group> (<year>2011</year>). <article-title>Cloning and functional characterization of a novel endo-&#x03B2;-1,4-glucanase gene from a soil-derived metagenomic library.</article-title> <source><italic>Appl. Microbiol. Biotechnol.</italic></source> <volume>89</volume> <fpage>1083</fpage>&#x2013;<lpage>1092</lpage>. <pub-id pub-id-type="doi">10.1007/s00253-010-2828-4</pub-id></citation></ref>
<ref id="B17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lundberg</surname> <given-names>D. S.</given-names></name> <name><surname>Lebeis</surname> <given-names>S. L.</given-names></name> <name><surname>Paredes</surname> <given-names>S. H.</given-names></name> <name><surname>Yourstone</surname> <given-names>S.</given-names></name> <name><surname>Gehring</surname> <given-names>J.</given-names></name> <name><surname>Malfatti</surname> <given-names>S.</given-names></name><etal/></person-group> (<year>2012</year>). <article-title>Defining the core <italic>Arabidopsis thaliana</italic> root microbiome.</article-title> <source><italic>Nature</italic></source> <volume>488</volume> <fpage>86</fpage>&#x2013;<lpage>90</lpage>. <pub-id pub-id-type="doi">10.1038/nature11237</pub-id></citation></ref>
<ref id="B18"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mai</surname> <given-names>Z.</given-names></name> <name><surname>Su</surname> <given-names>H.</given-names></name> <name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Huang</surname> <given-names>S.</given-names></name> <name><surname>Zhang</surname> <given-names>S.</given-names></name></person-group> (<year>2014</year>). <article-title>Cloning and characterization of a novel GH44 family endoglucanase from mangrove soil metagenomic library.</article-title> <source><italic>Biotechnol. Lett.</italic></source> <volume>36</volume> <fpage>1701</fpage>&#x2013;<lpage>1709</lpage>. <pub-id pub-id-type="doi">10.1007/s10529-014-1531-4</pub-id></citation></ref>
<ref id="B19"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Markowitz</surname> <given-names>V. M.</given-names></name> <name><surname>Chen</surname> <given-names>I. M. A.</given-names></name> <name><surname>Chu</surname> <given-names>K.</given-names></name> <name><surname>Szeto</surname> <given-names>E.</given-names></name> <name><surname>Palaniappan</surname> <given-names>K.</given-names></name> <name><surname>Pillay</surname> <given-names>M.</given-names></name><etal/></person-group> (<year>2014a</year>). <article-title>IMG/M 4 version of the integrated metagenome comparative analysis system.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>42</volume> <fpage>D568</fpage>&#x2013;<lpage>D573</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkt919</pub-id></citation></ref>
<ref id="B20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Markowitz</surname> <given-names>V. M.</given-names></name> <name><surname>Chen</surname> <given-names>I. M. A.</given-names></name> <name><surname>Palaniappan</surname> <given-names>K.</given-names></name> <name><surname>Chu</surname> <given-names>K.</given-names></name> <name><surname>Szeto</surname> <given-names>E.</given-names></name> <name><surname>Pillay</surname> <given-names>M.</given-names></name><etal/></person-group> (<year>2014b</year>). <article-title>IMG 4 version of the integrated microbial genomes comparative analysis system.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>42</volume> <fpage>D560</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkt963</pub-id></citation></ref>
<ref id="B21"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Maron</surname> <given-names>P. A.</given-names></name> <name><surname>Maitre</surname> <given-names>M.</given-names></name> <name><surname>Mercier</surname> <given-names>A.</given-names></name> <name><surname>Henri Lejon</surname> <given-names>D. P.</given-names></name> <name><surname>Nowak</surname> <given-names>V.</given-names></name> <name><surname>Ranjard</surname> <given-names>L.</given-names></name></person-group> (<year>2008</year>). <article-title>Protein and DNA fingerprinting of a soil bacterial community inoculated into three different sterile soils.</article-title> <source><italic>Res. Microbiol.</italic></source> <volume>159</volume> <fpage>231</fpage>&#x2013;<lpage>236</lpage>. <pub-id pub-id-type="doi">10.1016/j.resmic.2008.03.004</pub-id></citation></ref>
<ref id="B22"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Massana</surname> <given-names>R.</given-names></name> <name><surname>Karniol</surname> <given-names>B.</given-names></name> <name><surname>Pommier</surname> <given-names>T.</given-names></name> <name><surname>Bodaker</surname> <given-names>I.</given-names></name> <name><surname>B&#x00E9;j&#x00E0;</surname> <given-names>O.</given-names></name></person-group> (<year>2008</year>). <article-title>Metagenomic retrieval of a ribosomal DNA repeat array from an uncultured marine alveolate.</article-title> <source><italic>Environ. Microbiol.</italic></source> <volume>10</volume> <fpage>1335</fpage>&#x2013;<lpage>1343</lpage>. <pub-id pub-id-type="doi">10.1111/j.1462-2920.2007.01549.x</pub-id></citation></ref>
<ref id="B23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Noble</surname> <given-names>P. A.</given-names></name> <name><surname>Citek</surname> <given-names>R. W.</given-names></name> <name><surname>Ogunseitan</surname> <given-names>O. A.</given-names></name></person-group> (<year>1998</year>). <article-title>Tetranucleotide frequencies in microbial genomes.</article-title> <source><italic>Electrophoresis</italic></source> <volume>19</volume> <fpage>528</fpage>&#x2013;<lpage>535</lpage>. <pub-id pub-id-type="doi">10.1002/elps.1150190412</pub-id></citation></ref>
<ref id="B24"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ochman</surname> <given-names>H.</given-names></name> <name><surname>Bergthorsson</surname> <given-names>U.</given-names></name></person-group> (<year>1995</year>). <article-title>Genome evolution in enteric bacteria.</article-title> <source><italic>Curr. Opin. Genet. Dev.</italic></source> <volume>5</volume> <fpage>734</fpage>&#x2013;<lpage>738</lpage>. <pub-id pub-id-type="doi">10.1016/0959-437x(95)80005-P</pub-id></citation></ref>
<ref id="B25"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pati</surname> <given-names>A.</given-names></name> <name><surname>Ivanova</surname> <given-names>N. N.</given-names></name> <name><surname>Mikhailova</surname> <given-names>N.</given-names></name> <name><surname>Ovchinnikova</surname> <given-names>G.</given-names></name> <name><surname>Hooper</surname> <given-names>S. D.</given-names></name> <name><surname>Lykidis</surname> <given-names>A.</given-names></name><etal/></person-group> (<year>2010</year>). <article-title>GenePRIMP: a gene prediction improvement pipeline for prokaryotic genomes.</article-title> <source><italic>Nat. Methods</italic></source> <volume>7</volume> <fpage>455</fpage>&#x2013;<lpage>457</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.1457</pub-id></citation></ref>
<ref id="B26"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Peng</surname> <given-names>Q.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Shang</surname> <given-names>M.</given-names></name> <name><surname>Huang</surname> <given-names>J.</given-names></name> <name><surname>Guan</surname> <given-names>G.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>Isolation of a novel alkaline-stable lipase from a metagenomic library and its specific application for milkfat flavor production.</article-title> <source><italic>Microb. Cell Fact.</italic></source> <volume>13</volume> <issue>1</issue>. <pub-id pub-id-type="doi">10.1186/1475-2859-13-1</pub-id></citation></ref>
<ref id="B27"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Prosser</surname> <given-names>J. I.</given-names></name></person-group> (<year>2015</year>). <article-title>Dispersing misconceptions and identifying opportunities for the use of &#x201C;omics&#x201D; in soil microbial ecology.</article-title> <source><italic>Nat. Rev. Microbiol.</italic></source> <volume>13</volume> <fpage>439</fpage>&#x2013;<lpage>446</lpage>. <pub-id pub-id-type="doi">10.1038/nrmicro3468</pub-id></citation></ref>
<ref id="B28"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rinke</surname> <given-names>C.</given-names></name> <name><surname>Schwientek</surname> <given-names>P.</given-names></name> <name><surname>Sczyrba</surname> <given-names>A.</given-names></name> <name><surname>Ivanova</surname> <given-names>N. N.</given-names></name> <name><surname>Anderson</surname> <given-names>I. J.</given-names></name> <name><surname>Cheng</surname> <given-names>J.-F.</given-names></name><etal/></person-group> (<year>2013</year>). <article-title>Insights into the phylogeny and coding potential of microbial dark matter.</article-title> <source><italic>Nature</italic></source> <volume>499</volume> <fpage>431</fpage>&#x2013;<lpage>437</lpage>. <pub-id pub-id-type="doi">10.1038/nature12352</pub-id></citation></ref>
<ref id="B29"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rondon</surname> <given-names>M. R.</given-names></name> <name><surname>August</surname> <given-names>P. R.</given-names></name> <name><surname>Bettermann</surname> <given-names>A. D.</given-names></name> <name><surname>Brady</surname> <given-names>S. F.</given-names></name> <name><surname>Grossman</surname> <given-names>T. H.</given-names></name> <name><surname>Liles</surname> <given-names>M. R.</given-names></name><etal/></person-group> (<year>2000</year>). <article-title>Cloning the soil metagenome: a strategy for accessing the genetic and functional diversity of uncultured microorganisms.</article-title> <source><italic>Appl. Environ. Microbiol.</italic></source> <volume>66</volume> <fpage>2541</fpage>&#x2013;<lpage>2547</lpage>. <pub-id pub-id-type="doi">10.1128/AEM.66.6.2541-2547.2000</pub-id></citation></ref>
<ref id="B30"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schbath</surname> <given-names>S.</given-names></name> <name><surname>Prum</surname> <given-names>B.</given-names></name> <name><surname>de Turckheim</surname> <given-names>E.</given-names></name></person-group> (<year>1995</year>). <article-title>Exceptional motifs in different Markov chain models for a statistical analysis of DNA sequences.</article-title> <source><italic>J. Comput. Biol.</italic></source> <volume>2</volume> <fpage>417</fpage>&#x2013;<lpage>437</lpage>. <pub-id pub-id-type="doi">10.1089/cmb.1995.2.417</pub-id></citation></ref>
<ref id="B31"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Streit</surname> <given-names>W. R.</given-names></name> <name><surname>Schmitz</surname> <given-names>R. A.</given-names></name></person-group> (<year>2004</year>). <article-title>Metagenomics - the key to the uncultured microbes.</article-title> <source><italic>Curr. Opin. Microbiol.</italic></source> <volume>7</volume> <fpage>492</fpage>&#x2013;<lpage>498</lpage>. <pub-id pub-id-type="doi">10.1016/j.mib.2004.08.002</pub-id></citation></ref>
<ref id="B32"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Suzuki</surname> <given-names>M. T.</given-names></name> <name><surname>Rapp&#x00E9;</surname> <given-names>M. S.</given-names></name> <name><surname>Haimberger</surname> <given-names>Z. W.</given-names></name> <name><surname>Winfied</surname> <given-names>H.</given-names></name> <name><surname>Adair</surname> <given-names>N.</given-names></name> <name><surname>Str&#x00F6;bel</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>1997</year>). <article-title>Bacterial diversity among small-subunit rRNA gene clones and cellular isolates from the same seawater sample.</article-title> <source><italic>Appl. Environ. Microbiol.</italic></source> <volume>63</volume> <fpage>983</fpage>&#x2013;<lpage>989</lpage>.</citation></ref>
<ref id="B33"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tamura</surname> <given-names>K.</given-names></name> <name><surname>Nei</surname> <given-names>M.</given-names></name></person-group> (<year>1993</year>). <article-title>Estimation of the number of base nucleotide substitutions in the control region of mitochondrial DNA in humans and chimpanzees.</article-title> <source><italic>Mol. Biol. Evol.</italic></source> <volume>10</volume> <fpage>512</fpage>&#x2013;<lpage>526</lpage>.</citation></ref>
<ref id="B34"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tamura</surname> <given-names>K.</given-names></name> <name><surname>Stecher</surname> <given-names>G.</given-names></name> <name><surname>Peterson</surname> <given-names>D.</given-names></name> <name><surname>Filipski</surname> <given-names>A.</given-names></name> <name><surname>Kumar</surname> <given-names>S.</given-names></name></person-group> (<year>2013</year>). <article-title>MEGA6: molecular evolutionary genetics analysis version 6.0.</article-title> <source><italic>Mol. Biol. Evol.</italic></source> <volume>30</volume> <fpage>2725</fpage>&#x2013;<lpage>2729</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/mst197</pub-id></citation></ref>
<ref id="B35"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tatusov</surname> <given-names>R. L.</given-names></name> <name><surname>Fedorova</surname> <given-names>N. D.</given-names></name> <name><surname>Jackson</surname> <given-names>J. D.</given-names></name> <name><surname>Jacobs</surname> <given-names>A. R.</given-names></name> <name><surname>Kiryutin</surname> <given-names>B.</given-names></name> <name><surname>Koonin</surname> <given-names>E. V.</given-names></name><etal/></person-group> (<year>2003</year>). <article-title>The COG database: an updated version includes eukaryotes.</article-title> <source><italic>BMC Bioinformatics</italic></source> <volume>4</volume>:<issue>41</issue>. <pub-id pub-id-type="doi">10.1186/1471-2105-4-41</pub-id></citation></ref>
<ref id="B36"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Teeling</surname> <given-names>H.</given-names></name> <name><surname>Meyerdierks</surname> <given-names>A.</given-names></name> <name><surname>Bauer</surname> <given-names>M.</given-names></name> <name><surname>Amann</surname> <given-names>R.</given-names></name> <name><surname>Glockner</surname> <given-names>F. O.</given-names></name></person-group> (<year>2004a</year>). <article-title>Application of tetranucleotide frequencies for the assignment of genomic fragments.</article-title> <source><italic>Environ. Microbiol.</italic></source> <volume>6</volume> <fpage>938</fpage>&#x2013;<lpage>947</lpage>. <pub-id pub-id-type="doi">10.1111/j.1462-2920.2004.00624.x</pub-id></citation></ref>
<ref id="B37"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Teeling</surname> <given-names>H.</given-names></name> <name><surname>Waldmann</surname> <given-names>J.</given-names></name> <name><surname>Lombardot</surname> <given-names>T.</given-names></name> <name><surname>Bauer</surname> <given-names>M.</given-names></name> <name><surname>Gl&#x00F6;ckner</surname> <given-names>F.</given-names></name></person-group> (<year>2004b</year>). <article-title>TETRA: a web-service and a stand-alone program for the analysis and comparison of tetranucleotide usage patterns in DNA sequences.</article-title> <source><italic>BMC Bioinformatics</italic></source> <volume>5</volume>:<issue>163</issue>. <pub-id pub-id-type="doi">10.1186/1471-2105-5-163</pub-id></citation></ref>
<ref id="B38"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Torsvik</surname> <given-names>V.</given-names></name> <name><surname>&#x00D8;vre&#x00E5;s</surname> <given-names>L.</given-names></name></person-group> (<year>2002</year>). <article-title>Microbial diversity and function in soil: from genes to ecosystems.</article-title> <source><italic>Curr. Opin. Microbiol.</italic></source> <volume>5</volume> <fpage>240</fpage>&#x2013;<lpage>245</lpage>. <pub-id pub-id-type="doi">10.1016/S1369-5274(02)00324-7</pub-id></citation></ref>
<ref id="B39"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tu</surname> <given-names>Q.</given-names></name> <name><surname>Yu</surname> <given-names>H.</given-names></name> <name><surname>He</surname> <given-names>Z.</given-names></name> <name><surname>Deng</surname> <given-names>Y.</given-names></name> <name><surname>Wu</surname> <given-names>L.</given-names></name> <name><surname>Van Nostrand</surname> <given-names>J. D.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>GeoChip 4: a functional gene-array-based high-throughput environmental technology for microbial community analysis.</article-title> <source><italic>Mol. Ecol. Resour.</italic></source> <volume>14</volume> <fpage>914</fpage>&#x2013;<lpage>928</lpage>. <pub-id pub-id-type="doi">10.1111/1755-0998.12239</pub-id></citation></ref>
<ref id="B40"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Urich</surname> <given-names>T.</given-names></name> <name><surname>Lanz&#x00E9;n</surname> <given-names>A.</given-names></name> <name><surname>Qi</surname> <given-names>J.</given-names></name> <name><surname>Huson</surname> <given-names>D. H.</given-names></name> <name><surname>Schleper</surname> <given-names>C.</given-names></name> <name><surname>Schuster</surname> <given-names>S. C.</given-names></name><etal/></person-group> (<year>2008</year>). <article-title>Simultaneous assessment of soil microbial community structure and function through analysis of the meta-transcriptome.</article-title> <source><italic>PLoS ONE</italic></source> <volume>3</volume>:<issue>e2527</issue>. <pub-id pub-id-type="doi">10.1371/journal.pone.0002527</pub-id></citation></ref>
<ref id="B41"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vavourakis</surname> <given-names>C. D.</given-names></name> <name><surname>Ghai</surname> <given-names>R.</given-names></name> <name><surname>Rodriguez-Valera</surname> <given-names>F.</given-names></name> <name><surname>Sorokin</surname> <given-names>D. Y.</given-names></name> <name><surname>Tringe</surname> <given-names>S. G.</given-names></name> <name><surname>Hugenholtz</surname> <given-names>P.</given-names></name><etal/></person-group> (<year>2016</year>). <article-title>Metagenomic insights into the uncultured diversity and physiology of microbes in four hypersaline soda lake brines.</article-title> <source><italic>Front. Microbiol.</italic></source> <volume>7</volume>:<issue>211</issue>. <pub-id pub-id-type="doi">10.3389/fmicb.2016.00211</pub-id></citation></ref>
<ref id="B42"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Q.</given-names></name> <name><surname>Garrity</surname> <given-names>G. M.</given-names></name> <name><surname>Tiedje</surname> <given-names>J. M.</given-names></name> <name><surname>Cole</surname> <given-names>J. R.</given-names></name></person-group> (<year>2007</year>). <article-title>Na&#x00EF;ve Bayesian classifier for rapid assignment of rRNA sequences into the new bacterial taxonomy.</article-title> <source><italic>Appl. Environ. Microbiol.</italic></source> <volume>73</volume> <fpage>5261</fpage>&#x2013;<lpage>5267</lpage>. <pub-id pub-id-type="doi">10.1128/AEM.00062-07</pub-id></citation></ref>
<ref id="B43"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Williams</surname> <given-names>A. V.</given-names></name> <name><surname>Nevill</surname> <given-names>P. G.</given-names></name> <name><surname>Krauss</surname> <given-names>S. L.</given-names></name></person-group> (<year>2014</year>). <article-title>Next generation restoration genetics: applications and opportunities.</article-title> <source><italic>Trends Plant Sci.</italic></source> <volume>19</volume> <fpage>529</fpage>&#x2013;<lpage>537</lpage>. <pub-id pub-id-type="doi">10.1016/j.tplants.2014.03.011</pub-id></citation></ref>
<ref id="B44"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wilson</surname> <given-names>M. J.</given-names></name> <name><surname>He</surname> <given-names>Z.</given-names></name> <name><surname>Yang</surname> <given-names>X.</given-names></name></person-group> (eds) (<year>2004</year>). <source><italic>The Red Soils of China.</italic></source> <publisher-loc>Dordrecht:</publisher-loc> <publisher-name>Springer</publisher-name>, <pub-id pub-id-type="doi">10.1007/978-1-4020-2138-1</pub-id></citation></ref>
<ref id="B45"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>R.</given-names></name> <name><surname>Zhao</surname> <given-names>A.</given-names></name> <name><surname>Li</surname> <given-names>Q.</given-names></name> <name><surname>Kong</surname> <given-names>X.</given-names></name> <name><surname>Ji</surname> <given-names>G.</given-names></name></person-group> (<year>2003</year>). <article-title>Acidity regime of the Red Soils in a subtropical region of southern China under field conditions.</article-title> <source><italic>Geoderma</italic></source> <volume>115</volume> <fpage>75</fpage>&#x2013;<lpage>84</lpage>. <pub-id pub-id-type="doi">10.1016/S0016-7061(03)00077-6</pub-id></citation></ref>
<ref id="B46"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yamada</surname> <given-names>T.</given-names></name> <name><surname>Sekiguchi</surname> <given-names>Y.</given-names></name></person-group> (<year>2009</year>). <article-title>Cultivation of uncultured chloroflexi subphyla: significance and ecophysiology of formerly uncultured chloroflexi &#x201C;subphylum i&#x201D; with natural and biotechnological relevance.</article-title> <source><italic>Microbes Environ.</italic></source> <volume>24</volume> <fpage>205</fpage>&#x2013;<lpage>216</lpage>. <pub-id pub-id-type="doi">10.1264/jsme2.ME09151S</pub-id></citation></ref>
<ref id="B47"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zheng</surname> <given-names>J.</given-names></name> <name><surname>Liu</surname> <given-names>C.</given-names></name> <name><surname>Liu</surname> <given-names>L.</given-names></name> <name><surname>Jin</surname> <given-names>Q.</given-names></name></person-group> (<year>2013</year>). <article-title>Characterisation of a thermo-alkali-stable lipase from oil-contaminated soil using a metagenomic approach.</article-title> <source><italic>Syst. Appl. Microbiol.</italic></source> <volume>36</volume> <fpage>197</fpage>&#x2013;<lpage>204</lpage>. <pub-id pub-id-type="doi">10.1016/j.syapm.2012.12.008</pub-id></citation></ref>
<ref id="B48"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>J.</given-names></name> <name><surname>Deng</surname> <given-names>Y.</given-names></name> <name><surname>He</surname> <given-names>Z.</given-names></name> <name><surname>Wu</surname> <given-names>L.</given-names></name> <name><surname>Van Nostrand</surname> <given-names>J. D.</given-names></name></person-group> (<year>2010</year>). <article-title>Applying GeoChip analysis to disparate microbial communities.</article-title> <source><italic>Microbe Mag.</italic></source> <volume>5</volume> <fpage>60</fpage>&#x2013;<lpage>65</lpage>. <pub-id pub-id-type="doi">10.1128/microbe.5.60.1</pub-id></citation></ref>
</ref-list>
<fn-group>
<fn id="fn01"><label>1</label><p><ext-link ext-link-type="uri" xlink:href="https://github.com/YunpengLiu/Tetra-nucleotide-analysis">https://github.com/YunpengLiu/Tetra-nucleotide-analysis</ext-link></p></fn>
</fn-group>
</back>
</article>