<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Microbiol.</journal-id>
<journal-title>Frontiers in Microbiology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Microbiol.</abbrev-journal-title>
<issn pub-type="epub">1664-302X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmicb.2022.1069452</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Microbiology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Global phylogenomic novelty of the Cas1 gene from hot spring microbial communities</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Salgado</surname> <given-names>Oscar</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/264812/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Guajardo-Leiva</surname> <given-names>Sergio</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/537386/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Moya-Beltr&#x00E1;n</surname> <given-names>Ana</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/396693/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Barbosa</surname> <given-names>Carla</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<xref ref-type="aff" rid="aff7"><sup>7</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2097161/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Ridley</surname> <given-names>Christina</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/406742/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Tamayo-Leiva</surname> <given-names>Javier</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Quatrini</surname> <given-names>Raquel</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="aff" rid="aff8"><sup>8</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/29631/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Mojica</surname> <given-names>Francisco J. M.</given-names></name>
<xref ref-type="aff" rid="aff9"><sup>9</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2052307/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>D&#x00ED;ez</surname> <given-names>Beatriz</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff10"><sup>10</sup></xref>
<xref ref-type="aff" rid="aff11"><sup>11</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/313648/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Molecular Genetics and Microbiology, Biological Sciences Faculty, Pontifical Catholic University of Chile</institution>, <addr-line>Santiago</addr-line>, <country>Chile</country></aff>
<aff id="aff2"><sup>2</sup><institution>N&#x00FA;cleo de Ciencias Naturales y Exactas, Universidad Adventista de Chile</institution>, <addr-line>Chill&#x00E1;n</addr-line>, <country>Chile</country></aff>
<aff id="aff3"><sup>3</sup><institution>Departamento de Microbiolog&#x00ED;a, Universidad de Talca</institution>, <addr-line>Talca</addr-line>, <country>Chile</country></aff>
<aff id="aff4"><sup>4</sup><institution>Centro de Ecolog&#x00ED;a Integrativa, Universidad de Talca</institution>, <addr-line>Talca</addr-line>, <country>Chile</country></aff>
<aff id="aff5"><sup>5</sup><institution>Centro Cient&#x00ED;fico y Tecnol&#x00F3;gico de Excelencia Ciencia &#x0026; Vida</institution>, <addr-line>Santiago</addr-line>, <country>Chile</country></aff>
<aff id="aff6"><sup>6</sup><institution>Departamento de Geolog&#x00ED;a, Facultad de Ciencias F&#x00ED;sicas y Matem&#x00E1;ticas, Universidad de Chile</institution>, <addr-line>Santiago</addr-line>, <country>Chile</country></aff>
<aff id="aff7"><sup>7</sup><institution>Centro de Excelencia en Geotermia de Los Andes (CEGA-Fondap)</institution>, <addr-line>Santiago</addr-line>, <country>Chile</country></aff>
<aff id="aff8"><sup>8</sup><institution>Facultad de Medicina y Ciencia, Universidad San Sebasti&#x00E1;n</institution>, <addr-line>Santiago</addr-line>, <country>Chile</country></aff>
<aff id="aff9"><sup>9</sup><institution>Departamento de Fisiolog&#x00ED;a, Gen&#x00E9;tica y Microbiolog&#x00ED;a, Universidad de Alicante</institution>, <addr-line>Alicante</addr-line>, <country>Spain</country></aff>
<aff id="aff10"><sup>10</sup><institution>Center for Climate and Resilience Research (CR)<sup>2</sup></institution>, <addr-line>Santiago</addr-line>, <country>Chile</country></aff>
<aff id="aff11"><sup>11</sup><institution>Millennium Institute Center for Genome Regulation (CGR)</institution>, <addr-line>Santiago</addr-line>, <country>Chile</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Mar&#x00ED;a Sof&#x00ED;a Urbieta, CONICET &#x2013; UNLP, Argentina</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Leandro Guerrero, CONICET Instituto de Investigaciones en Ingenier&#x00ED;a Gen&#x00E9;tica y Biolog&#x00ED;a Molecular Dr. H&#x00E9;ctor N. Torres (INGEBI), Argentina; Devaki Bhaya, Carnegie Institution for Science (CIS), United States; Roger Garrett, University of Copenhagen, Denmark</p></fn>
<corresp id="c001">&#x002A;Correspondence: Beatriz D&#x00ED;ez, <email>bdiez@bio.puc.cl</email></corresp>
<fn fn-type="other" id="fn004"><p>This article was submitted to Extreme Microbiology, a section of the journal Frontiers in Microbiology</p></fn>
</author-notes>
<pub-date pub-type="epub">
<day>02</day>
<month>12</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>13</volume>
<elocation-id>1069452</elocation-id>
<history>
<date date-type="received">
<day>13</day>
<month>10</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>17</day>
<month>11</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2022 Salgado, Guajardo-Leiva, Moya-Beltr&#x00E1;n, Barbosa, Ridley, Tamayo-Leiva, Quatrini, Mojica and D&#x00ED;ez.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Salgado, Guajardo-Leiva, Moya-Beltr&#x00E1;n, Barbosa, Ridley, Tamayo-Leiva, Quatrini, Mojica and D&#x00ED;ez</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>The Cas1 protein is essential for the functioning of CRISPR-Cas adaptive systems. However, despite the high prevalence of CRISPR-Cas systems in thermophilic microorganisms, few studies have investigated the occurrence and diversity of Cas1 across hot spring microbial communities. Phylogenomic analysis of 2,150 Cas1 sequences recovered from 48 metagenomes representing hot springs (42&#x2013;80&#x00B0;C, pH 6&#x2013;9) from three continents, revealed similar ecological diversity of Cas1 and 16S rRNA associated with geographic location. Furthermore, phylogenetic analysis of the Cas1 sequences exposed a broad taxonomic distribution in thermophilic bacteria, with new clades of Cas1 homologs branching at the root of the tree or at the root of known clades harboring reference Cas1 types. Additionally, a new family of casposases was identified from hot springs, which further completes the evolutionary landscape of the Cas1 superfamily. This ecological study contributes new Cas1 sequences from known and novel locations worldwide, mainly focusing on under-sampled hot spring microbial mat taxa. Results herein show that circumneutral hot springs are environments harboring high diversity and novelty related to adaptive immunity systems.</p>
</abstract>
<kwd-group>
<kwd>Cas1</kwd>
<kwd>hot spring</kwd>
<kwd>phylogenomic</kwd>
<kwd>CRISPR-Cas</kwd>
<kwd>casposase</kwd>
</kwd-group>
<contract-num rid="cn001">1190998</contract-num>
<contract-num rid="cn001">21172022</contract-num>
<contract-num rid="cn001">21171048</contract-num>
<contract-num rid="cn001">3210547</contract-num>
<contract-num rid="cn001">FB210008</contract-num>
<contract-num rid="cn001">1221035</contract-num>
<contract-num rid="cn004">PROMETEO/2021/057</contract-num>
<contract-sponsor id="cn001">Agencia Nacional de Investigaci&#x00F3;n y Desarrollo<named-content content-type="fundref-id">10.13039/501100020884</named-content></contract-sponsor>
<contract-sponsor id="cn002">Agencia Nacional de Investigaci&#x00F3;n y Desarrollo<named-content content-type="fundref-id">10.13039/501100020884</named-content></contract-sponsor>
<contract-sponsor id="cn003">Agencia Nacional de Investigaci&#x00F3;n y Desarrollo<named-content content-type="fundref-id">10.13039/501100020884</named-content></contract-sponsor>
<contract-sponsor id="cn004">Conselleria de Innovaci&#x00F3;n, Universidades, Ciencia y Sociedad Digital, Generalitat Valenciana<named-content content-type="fundref-id">10.13039/501100016386</named-content></contract-sponsor>
<counts>
<fig-count count="7"/>
<table-count count="0"/>
<equation-count count="0"/>
<ref-count count="102"/>
<page-count count="16"/>
<word-count count="11233"/>
</counts>
</article-meta>
</front>
<body>
<sec id="S1" sec-type="intro">
<title>Introduction</title>
<p>Adaptive immunity in <italic>Bacteria</italic> and <italic>Archaea</italic> is achieved by CRISPR-Cas (clustered regularly interspaced short palindromic repeats and CRISPR-associated genes) systems (<xref ref-type="bibr" rid="B72">Mohanraju et al., 2016</xref>; <xref ref-type="bibr" rid="B62">Makarova et al., 2020a</xref>). When a foreign nucleic acid invades a prokaryotic cell, it can be recognized by adaptation Cas proteins generating small fragments (spacers) that are stored in a CRISPR array of the host genome separated by the repeats (<xref ref-type="bibr" rid="B68">McGinn and Marraffini, 2019</xref>). This information establishes an immunological memory in these cells because spacer-containing transcripts will guide effector Cas to cleave the invasive nucleic acid in future encounters. In most cases, the invading nucleic acid corresponds to viruses (<xref ref-type="bibr" rid="B86">Shmakov et al., 2017a</xref>). Four modules, composed of different Cas, have been defined in the functioning of CRISPR-Cas systems (<xref ref-type="bibr" rid="B60">Makarova et al., 2015</xref>). The adaptation module integrates the spacers into the host through a complex formed by Cas1 and Cas2, assisted by non-Cas and sometimes other Cas proteins (e.g., Cas4, Cas3, or Cas9) (<xref ref-type="bibr" rid="B41">Koonin and Krupovic, 2015</xref>; <xref ref-type="bibr" rid="B4">Amitai and Sorek, 2016</xref>; <xref ref-type="bibr" rid="B37">Jackson et al., 2017</xref>). Subsequently, the expression module processes multi-spacer transcripts from the CRISPR array (pre-crRNA) to deliver single spacer-containing RNA fragments (crRNAs) that guide Cas of the interference module to act against the invading nucleic acid recognized through complementary bases-pairing with the spacer sequences (<xref ref-type="bibr" rid="B30">Hille et al., 2018</xref>). Finally, several proteins or domains in the ancillary/helper module have been described as playing accessory roles (<xref ref-type="bibr" rid="B62">Makarova et al., 2020a</xref>). CRISPR-Cas systems are categorized into two classes, six types and over thirty subtypes that could harbor a unique (signature) type gene, and differ in the identity of the associated <italic>cas</italic> genes, mainly those encoding the interference module (<xref ref-type="bibr" rid="B43">Koonin et al., 2017</xref>; <xref ref-type="bibr" rid="B87">Shmakov et al., 2017b</xref>; <xref ref-type="bibr" rid="B62">Makarova et al., 2020a</xref>).</p>
<p>Some CRISPR-Cas systems have been extensively characterized, primarily due to their biotechnological relevance, which has also encouraged the search for new variants in nature (<xref ref-type="bibr" rid="B12">Burstein et al., 2017</xref>). However, in the existing databases, some environments are more represented than others, such as human clinical samples versus environmental samples. Among the latter, hot springs are significant for studying CRISPR-Cas because these molecular systems are widespread in the indigenous microorganisms, whether thermophiles or hyperthermophiles (<xref ref-type="bibr" rid="B7">Anderson et al., 2011</xref>; <xref ref-type="bibr" rid="B97">Weinberger et al., 2012</xref>; <xref ref-type="bibr" rid="B98">Weissman et al., 2019</xref>). It has been suggested that temperature impacts the viral diversity and density in these environments by decreasing mutation rates, thereby influencing the occurrence of CRISPR-Cas systems in the host microorganisms (<xref ref-type="bibr" rid="B97">Weinberger et al., 2012</xref>; <xref ref-type="bibr" rid="B36">Iranzo et al., 2013</xref>; <xref ref-type="bibr" rid="B18">Childs et al., 2014</xref>; <xref ref-type="bibr" rid="B99">Westra et al., 2016</xref>). Lower mutation rates in thermal environments are explained by the deleterious effect of substitutions at high temperatures (<xref ref-type="bibr" rid="B21">Drake, 2009</xref>) which would define a less diverse community than in mesophilic environments. In thermal environments, the lower virus-prokaryote ratio and lower viral community diversity (<xref ref-type="bibr" rid="B78">Parmar et al., 2018</xref>) translate into a lower metabolic cost for the maintenance of the CRISPR-Cas systems against viral infection compared to mesophilic environments (<xref ref-type="bibr" rid="B99">Westra et al., 2016</xref>). In this last environment, the spacer catalog has to adapt to more diverse invading nucleic acids (<xref ref-type="bibr" rid="B97">Weinberger et al., 2012</xref>; <xref ref-type="bibr" rid="B36">Iranzo et al., 2013</xref>; <xref ref-type="bibr" rid="B95">Vale et al., 2015</xref>; <xref ref-type="bibr" rid="B100">Westra et al., 2015</xref>; <xref ref-type="bibr" rid="B13">Burstein et al., 2016</xref>; <xref ref-type="bibr" rid="B96">Van Houte et al., 2016</xref>; <xref ref-type="bibr" rid="B11">Broniewski et al., 2020</xref>; <xref ref-type="bibr" rid="B69">Meaden et al., 2021</xref>). Beyond the high presence of CRISPR-Cas systems in thermophiles, these environments exhibit low microbial complexity compared to mesophilic environments, with fewer microorganisms harboring CRISPR-Cas systems (<xref ref-type="bibr" rid="B13">Burstein et al., 2016</xref>; <xref ref-type="bibr" rid="B98">Weissman et al., 2019</xref>; <xref ref-type="bibr" rid="B62">Makarova et al., 2020a</xref>).</p>
<p>A genetic marker for all CRISPR-Cas systems cannot be established (<xref ref-type="bibr" rid="B42">Koonin and Makarova, 2019</xref>; <xref ref-type="bibr" rid="B62">Makarova et al., 2020a</xref>). However, the Cas1 protein is the most widespread and evolutionarily conserved <italic>cas</italic> gene (<xref ref-type="bibr" rid="B60">Makarova et al., 2015</xref>, <xref ref-type="bibr" rid="B62">2020a</xref>; <xref ref-type="bibr" rid="B42">Koonin and Makarova, 2019</xref>) and is essential for CRISPR-Cas adaptive immunity (<xref ref-type="bibr" rid="B48">Krupovic et al., 2014</xref>; <xref ref-type="bibr" rid="B4">Amitai and Sorek, 2016</xref>; <xref ref-type="bibr" rid="B37">Jackson et al., 2017</xref>). Therefore, <italic>cas1</italic> has been used to study the ecology of CRISPR-Cas (<xref ref-type="bibr" rid="B101">Wu et al., 2020</xref>). Notably, a protein family composed of Cas1 homologs, called casposases, which is related to the transposition of the carrier mobile genetic element (casposon) has been identified (<xref ref-type="bibr" rid="B48">Krupovic et al., 2014</xref>, <xref ref-type="bibr" rid="B49">2016</xref>). The Casposon superfamily has been proposed for the emergence of CRISPR-Cas systems, with their terminal inverted repeats (TIRs) and casposases being the presumed ancestors of CRISPR and CRISPR-associated Cas1, respectively (<xref ref-type="bibr" rid="B41">Koonin and Krupovic, 2015</xref>; <xref ref-type="bibr" rid="B46">Krupovic and Koonin, 2016</xref>; <xref ref-type="bibr" rid="B72">Mohanraju et al., 2016</xref>).</p>
<p>The diversity of Cas1 homologs discovered in new taxa and recently explored environments suggests functions other than those described for immunity (<xref ref-type="bibr" rid="B63">Makarova et al., 2020b</xref>), which encourages its study in the natural environment. Despite the high prevalence of CRISPR-Cas systems in hyper/thermophiles, the phylogenomics of the Cas1 protein has not been extensively explored in thermal environments of circumneutral pH. Therefore, to deepen our understanding of the relevance of CRISPR-Cas systems at the community level, the goal of this study was to describe the phylogenetic and environmental diversity of the Cas1 protein in 20 globally distributed hot springs. We hypothesized that these environments harbor new groups of Cas1 homologs not described to date. This study recovered 2,150 Cas1 sequences using 48 metagenomes from 20 hot springs ranging from 42 to 80&#x00B0;C and pH 6 to 9. Our results revealed a correlation between the hot spring dissimilarity observed at Cas1 and taxonomy (16S rRNA), with geographical location as the main explanatory variable of these dissimilarities. Furthermore, several Cas1 homologs did not cluster with reference Cas1 proteins from previously described CRISPR-Cas systems but were positioned at the root of specific phylogenetic groups in the tree. Finally, some Cas1 from hot springs formed a new family of casposases (proposed family 5).</p>
</sec>
<sec id="S2" sec-type="materials|methods">
<title>Materials and methods</title>
<sec id="S2.SS1">
<title>Study sites, El Tatio sampling, DNA extraction, and sequencing</title>
<p>In this study, we defined a thermophilic temperature range between 40 and 80&#x00B0;C and an approximately neutral pH (6&#x2013;9) as the most relevant physicochemical parameters to retain hot spring samples (<xref ref-type="bibr" rid="B71">Meyer-Dombard et al., 2005</xref>; <xref ref-type="bibr" rid="B34">Inskeep et al., 2010</xref>; <xref ref-type="bibr" rid="B57">L&#x00F3;pez-L&#x00F3;pez et al., 2013</xref>; <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 1</xref>). Parameters excluded most acidophilic prokaryotes and hyperthermophilic archaea, for which characterization of CRISPR-Cas systems has been previously described (<xref ref-type="bibr" rid="B8">Andersson and Banfield, 2008</xref>; <xref ref-type="bibr" rid="B94">Tyson and Banfield, 2008</xref>). In total, we analyzed 48 metagenomes, 35 of which were from publicly available data representing sites in North America, South America, and Asia, while 13 were obtained in this study from microbial mats within the El Tatio geyser field, Chile (<xref ref-type="fig" rid="F1">Figure 1</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 2</xref>). Some El Tatio metagenomes slightly exceeded defined limits (82&#x00B0;C, pH 9.27, <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 1</xref>), but were retained since their 16S rRNA profiles were similar to other samples from the same location (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 3</xref>). The geographic coordinates, physicochemical parameters, DNA source, and accession number of all samples used in this study are available in <xref ref-type="supplementary-material" rid="TS1">Supplementary Table 1</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption><p>Geographic locations of hot springs used in this study <bold>(left)</bold>, with the number of metagenomic data sets in parenthesis (proportional to circle size). The temperature and pH limits of the survey (white box) and the average temperature and pH of each hot spring site <bold>(right)</bold> are indicated. Detailed metadata of each sample are provided in <xref ref-type="supplementary-material" rid="TS1">Supplementary Table 1</xref>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-13-1069452-g001.tif"/>
</fig>
<p>The 13 El Tatio hot springs were selected to cover the entire geothermal field (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 2</xref>). Temperatures of the mat samples were measured with a forward-looking infrared camera (Fluke TiS45, WA, USA) and were corroborated on the mat with a multiparameter instrument (WTW multi 340i, NY, USA). Triplicate samples of approximately 2 ml were collected with a punch from each microbial mat, kept in cryogenic vials containing RNAlater (Thermo Fisher Scientific, Vilnius, Lithuania), and then stored at &#x2212;80&#x00B0;C until DNA extraction.</p>
<p>DNA extractions of the 13 El Tatio samples were performed according to <xref ref-type="bibr" rid="B2">Alcorta et al. (2018)</xref>. An equimolar amount of DNA (400 ng) from each replicate was pooled and sent in DNAstable tubes (Biomatrica, San Diego, CA, USA) to the Roy J. Carver Biotechnology Center (University of Illinois at Urbana-Champaign, IL, USA), where libraries were prepared using KAPA HyperPrep (Kapa Biosystems, Roche, Basel, Switzerland) and then sequenced on the Illumina NovaSeq 6000 platform (S1 flowcell, 2 &#x00D7; 150 bp). Quality filtering of reads for the El Tatio samples was performed according to <xref ref-type="bibr" rid="B28">Guajardo-Leiva et al. (2018)</xref>.</p>
</sec>
<sec id="S2.SS2">
<title>Metagenome assembly and Cas1 recovery</title>
<p><italic>De novo</italic> assembly was performed for all 48 metagenomes using SPAdes v3.10.1 (-meta) (<xref ref-type="bibr" rid="B9">Bankevich et al., 2012</xref>) except for ERR372908, where MEGAHIT v1.2.9 software (&#x2013;presets metasensitive) (<xref ref-type="bibr" rid="B55">Li et al., 2015</xref>) was used due to memory limitations. Assembly data statistics for all samples used in this study are available in <xref ref-type="supplementary-material" rid="TS1">Supplementary Table 1</xref>. The search for Cas1 orthologs across 48 metagenomes was done with the hmmsearch tool of the HMMER v3.3 package (<xref ref-type="bibr" rid="B22">Eddy, 2011</xref>), after ORF prediction with Prodigal v2.6.3 (-p meta) (<xref ref-type="bibr" rid="B32">Hyatt et al., 2010</xref>), using the eight updated Cas1 hidden Markov models published by <xref ref-type="bibr" rid="B101">Wu et al. (2020)</xref>. The <italic>E</italic>-value cut-off (0.01) was set after standardization with one representative sample metagenome (T60, BioSample <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="SAMN15500206">SAMN15500206</ext-link> from BioProject <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="PRJNA645256">PRJNA645256</ext-link>), for which recovered Cas1 candidates were thoroughly curated as described in <xref ref-type="bibr" rid="B75">Moya-Beltr&#x00E1;n et al. (2019)</xref>. This search yielded 3,556 candidate protein sequences. Putative Cas1 orthologs were compared against acknowledged Cas1 families present in the Conserved Domain Database v.3.16 (CDD; <xref ref-type="bibr" rid="B65">Marchler-Bauer et al., 2017</xref>) using CD-search (<xref ref-type="bibr" rid="B64">Marchler-Bauer and Bryant, 2004</xref>), hhsearch (<xref ref-type="bibr" rid="B23">Fidler et al., 2016</xref>), and RPS-BLAST v2.2.26 (<xref ref-type="bibr" rid="B66">Marchler-Bauer et al., 2002</xref>) and were retained if recognized as Cas1 by at least one of the three comparison tools with an <italic>E</italic>-value lower than 0.003 and 0.01 for CD-search and RPS-BLAST, respectively, and a probability higher than 81.6 for hhsearch. Next, a size filter was applied to avoid including possible chimeric proteins and limit the survey to CRISPR-Cas canonical Cas1 (Cas1 representatives smaller than 400 aa (<xref ref-type="bibr" rid="B88">Silas et al., 2017</xref>; <xref ref-type="bibr" rid="B101">Wu et al., 2020</xref>) were recovered). The 3,414 sequences fulfilling this criterium were then filtered by the relative read abundance in each metagenome. Downstream analyses did not consider sequences with an abundance below 0.1% in each sample. The 2,155 recovered Cas1 sequences were filtered for sequence redundancy at 100% aminoacidic sequence identity to remove sequences absent from public databases, considered here as possibly chimerical. The final data set for this study consisted of 2,150 Cas1 sequences.</p>
</sec>
<sec id="S2.SS3">
<title>Taxonomic assignment of Cas1 proteins</title>
<p>We applied three strategies to assign taxonomy to the 2,150 Cas1 sequences recovered in this study. First, we obtained metagenome-assembled genomes (MAGs) of the 48 metagenome data sets according to <xref ref-type="bibr" rid="B1">Alcorta et al. (2020)</xref>. Taxonomic affiliation of the MAGs was then retrieved using GTDB-tk v0.3.2 software (<xref ref-type="bibr" rid="B16">Chaumeil et al., 2020</xref>) with database version R89, identifying <italic>cas1</italic> sequences in contigs housed in the taxonomically identified MAGs. Second, for <italic>cas1</italic> genes not assigned through MAGs, we used the strategy of <xref ref-type="bibr" rid="B101">Wu et al. (2020)</xref> for Cas1 assignment and performed a BLASTp sequence similarity search against the NCBI nr database. Briefly, the five best hits were sorted according to bit score, with the best hit used for assignment if all hits belonged to the same phylum or only if the identity (%) of the best hit was at least 3% higher than the second hit (<xref ref-type="bibr" rid="B101">Wu et al., 2020</xref>). Finally, we checked for consistency in the taxonomic affiliation of the Cas1 sequences by retrieving the taxonomic affiliation of the non-Cas gene found in the vicinity of <italic>cas1</italic> in annotated metagenomic contigs (<xref ref-type="supplementary-material" rid="TS2">Supplementary Table 2</xref>). The assigned taxonomic affiliation of Cas1 sequences is shown in <xref ref-type="fig" rid="F2">Figure 2A</xref>. Aminoacidic sequences and metadata for each sequence are available in <xref ref-type="supplementary-material" rid="TS3">Supplementary Table 3</xref>.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption><p><bold>(A)</bold> Taxonomic distribution at the domain, phylum, and class level of Cas1 proteins from 48 globally distributed metagenome data sets (sequences over 1% are represented). Detailed taxonomic affiliation, metadata, and the aminoacidic sequence of each Cas1 protein used here can be retrieved in <xref ref-type="supplementary-material" rid="TS3">Supplementary Table 3</xref>. <bold>(B)</bold> Typification of the 2,150 Cas1 used in this study according to CRISPR-Cas system class and type. <sup>1</sup>Number of Cas1 proteins at the phylum level, including the unclassified category. <sup>2</sup>Number of different metagenome samples that harbor members of each phylum. <sup>3</sup>Average of samples where those phyla are present.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-13-1069452-g002.tif"/>
</fig>
</sec>
<sec id="S2.SS4">
<title>Beta-diversity of 16S rRNA and <italic>cas1</italic> genes</title>
<p>For the final set of 2,150 <italic>cas1</italic> genes, reads <italic>per</italic> Kb <italic>per</italic> Gb (RPKG) were calculated for each sample using Bowtie2 (<xref ref-type="bibr" rid="B52">Langmead and Salzberg, 2012</xref>). Coordinate diversity analyses for the Bray&#x2013;Curtis dissimilarity index were plotted using non-metric multidimensional scaling (nMDS) in the R package ampvis2 (<xref ref-type="bibr" rid="B5">Andersen et al., 2018</xref>). To identify sources of variation considering temperature, pH, altitude, and location (Universal Transverse Mercator, UTM, coordinates), permuted multivariate analysis of variance (PERMANOVA) (<xref ref-type="bibr" rid="B6">Anderson, 2001</xref>) was performed with the R package vegan (adonis2, not-sequentially added terms) (<xref ref-type="bibr" rid="B77">Oksanen et al., 2020</xref>). The same analyses were performed on the 2,980 16S rRNA genes (over 0.1% RPKG) obtained with MATAM software (<xref ref-type="bibr" rid="B79">Pericard et al., 2018</xref>). Finally, the Mantel test was used to statistically compare the 16S rRNA and Cas1 gene Bray&#x2013;Curtis matrices. The sequences and taxonomic affiliation of the 16S rRNA genes, as determined with the SILVA 138 SSU database (<xref ref-type="bibr" rid="B81">Quast et al., 2013</xref>), are listed in <xref ref-type="supplementary-material" rid="TS4">Supplementary Table 4</xref>, while rarefaction curves are shown in <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 4</xref>.</p>
</sec>
<sec id="S2.SS5">
<title>Cas1 subtypes and phylogenetic analysis</title>
<p>Phylogenetic analyses (ML) were performed with IQtree software (v.1.6.8) (<xref ref-type="bibr" rid="B76">Nguyen et al., 2015</xref>) [-m TEST: LG + F + G4 Le and Gascuel model (<xref ref-type="bibr" rid="B53">Le and Gascuel, 2008</xref>)] using ultrafast bootstrap (-bb 10,000) (<xref ref-type="bibr" rid="B31">Hoang et al., 2018</xref>) after Clustal Omega alignment (<xref ref-type="bibr" rid="B58">Madeira et al., 2019</xref>). We followed the recommendation of <xref ref-type="bibr" rid="B101">Wu et al. (2020)</xref> to identify Cas1 subtypes using 93 Cas1 reference sequences in the phylogenetic analyses. Additionally, we included casposase genes due to their importance in <italic>cas1</italic> gene evolution (<xref ref-type="bibr" rid="B61">Makarova et al., 2013</xref>; <xref ref-type="bibr" rid="B48">Krupovic et al., 2014</xref>; <xref ref-type="bibr" rid="B46">Krupovic and Koonin, 2016</xref>). The phylogenetic tree was displayed using the iTOL web server (<xref ref-type="bibr" rid="B54">Letunic and Bork, 2019</xref>) with the <italic>Streptomyces coelicolor</italic> transposase gene (NP_626990) as an outgroup to root the tree (<xref ref-type="bibr" rid="B49">Krupovic et al., 2016</xref>; <xref ref-type="bibr" rid="B101">Wu et al., 2020</xref>). Furthermore, the classification of Cas1 CRISPR-Cas subtypes was analyzed in parallel for each Cas1 using CRISPRCasTyper software (<xref ref-type="bibr" rid="B82">Russel et al., 2020</xref>), using mandatory and accessory cas-numerical-score guide typification of the tool. We decided to keep the nomenclature system of CRISPRCasTyper, where complex operons are considered hybrid (six <italic>cas</italic> genes from two or more types with a score of at least six and at least one specific <italic>cas</italic>), and not typified operons are labeled as ambiguous (non-hybrid operons with two or more <italic>cas</italic> subtypes and the same scoring) or false (neither hybrid nor ambiguous) (<xref ref-type="bibr" rid="B82">Russel et al., 2020</xref>).</p>
</sec>
<sec id="S2.SS6">
<title>Cas1 protein similarity network and gene neighborhood analysis</title>
<p>The Cas1 similarity network analysis was elaborated as described by <xref ref-type="bibr" rid="B15">Cardenas et al. (2016)</xref>. Briefly, the set of 2,150 Cas1 sequences was clustered using CD-HIT software (<xref ref-type="bibr" rid="B25">Fu et al., 2012</xref>) with the parameters outlined for Cas1 clustering (<xref ref-type="bibr" rid="B59">Makarova et al., 2011</xref>) (i.e., 90% identity over 75% coverage). The resulting 1,468 representative Cas1 genes were analyzed with BLASTp-all-against-all (default parameters) (<xref ref-type="bibr" rid="B3">Altschul et al., 1990</xref>) using an <italic>E</italic>-value of 10<sup>&#x2013;</sup>35. Finally, the pairwise bit score was used as the distance for network visualization in Cytoscape 3.9.1 (<xref ref-type="bibr" rid="B84">Shannon et al., 2003</xref>) using the <italic>organic</italic> layout. The set of reported contig sequences containing previously identified <italic>cas1</italic> was used to analyze the neighborhood (<xref ref-type="bibr" rid="B74">Moya-Beltr&#x00E1;n et al., 2021</xref>). Briefly, up to 10 ORFs upstream and/or downstream of <italic>cas1</italic> were recovered and their annotations were retrieved using a GFF file. It should be noted that differences in sequencing quality between the metagenomes used in this study (<xref ref-type="supplementary-material" rid="TS1">Supplementary Table 1</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 4</xref>) might have affected assembly and therefore the <italic>cas1</italic> genes and vicinity that could be recovered in some samples. Gene products were clustered at a similarity threshold of 0.5 and coverage threshold of 0.33 to obtain representative sequences using MMseqs2 (<xref ref-type="bibr" rid="B89">Steinegger and S&#x00F6;ding, 2017</xref>). Putative functional assignment of protein clusters was done as described in <xref ref-type="bibr" rid="B75">Moya-Beltr&#x00E1;n et al. (2019)</xref>. Results are summarized in <xref ref-type="supplementary-material" rid="TS2">Supplementary Table 2</xref>.</p>
</sec>
<sec id="S2.SS7">
<title>Putative casposase analyses</title>
<p>The 174 <italic>cas1</italic> sequences without other <italic>cas</italic>-encoding genes in the vicinity (&#x00B1;10 ORFs, <xref ref-type="fig" rid="F2">Figure 2B</xref>) were deemed as putative casposase genes. According to described casposons (<xref ref-type="bibr" rid="B48">Krupovic et al., 2014</xref>, <xref ref-type="bibr" rid="B47">2017</xref>), we decided to include <italic>cas1</italic> genes with at least seven genes in the contig and <italic>cas1</italic> not situated at the end of the contig. Seven <italic>cas1</italic> sequences fulfilled this criterion, all of which are allocated with casposase references in the phylogenetic analyses (<xref ref-type="fig" rid="F4">Figure 4</xref>); thus, we also included the remaining three Cas1 sequences of the casposase reference clade (<xref ref-type="fig" rid="F4">Figure 4</xref>) that present vicinity (Cas1_1015 was located in the casposase reference clade but without ORFs in the vicinity). In order to determine the casposase family affiliation of these sequences, we reproduced the phylogenetic analyses of <xref ref-type="bibr" rid="B48">Krupovic et al. (2014</xref>, <xref ref-type="bibr" rid="B49">2016)</xref>. EasyFig v.2.1 (<xref ref-type="bibr" rid="B91">Sullivan et al., 2011</xref>) was used to compare contigs of candidate sequences, for which TIRs were searched using TirVish (<xref ref-type="bibr" rid="B27">Gremme et al., 2013</xref>). Available TIRs belonging to described casposons (<xref ref-type="bibr" rid="B48">Krupovic et al., 2014</xref>) were also used as queries for identification of TIRs in putative casposase contigs of this study, including the reverse-complement strands.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption><p>Non-metric multidimensional scaling of beta diversity (Bray&#x2013;Curtis index) showing dissimilarity of the <bold>(A)</bold> 16S rRNA gene and <bold>(B)</bold> Cas1 genes from 48 metagenomes. Points are colored according to Universal Transverse Mercator coordinates (UTM) and country (right legend).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-13-1069452-g003.tif"/>
</fig>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption><p>Maximum-likelihood phylogenetic tree of 2,150 Cas1 proteins from 48 global hot spring metagenomes. Domain and phylum are indicated as outer and inner rings, respectively, according to the left legend. The remaining taxa over and under 1% of Cas1 sequences are represented by one respective color. Tree clades are colored according to reference Cas1 sequences of a CRISPR-Cas system subtype mentioned by <xref ref-type="bibr" rid="B101">Wu et al. (2020)</xref> or casposase genes used in the phylogenetic analyses. Hot spring tree clades without reference are labeled as NR (no reference). Branch color indicates ultrafast bootstrap values (10,000 repetitions) as a percentage, over 70% in all cases. The tree was rooted using the <italic>Streptomyces coelicolor</italic> transposase gene (NP_626990).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-13-1069452-g004.tif"/>
</fig>
</sec>
</sec>
<sec id="S3" sec-type="results">
<title>Results</title>
<sec id="S3.SS1">
<title>Hot spring sample characterization</title>
<p>Forty-eight metagenomic data sets from 20 hot springs of circumneutral pH, with temperatures in the mesothermophilic to thermophilic range, were recovered from public databases or generated herein (<xref ref-type="supplementary-material" rid="TS1">Supplementary Table 1</xref>) to search for global and local patterns of Cas1 diversity in thermophilic environments. The 48 metagenomic data sets represent nine countries in America and Asia (<xref ref-type="fig" rid="F1">Figure 1</xref>). The El Tatio samples submitted here (15 metagenomic data sets), along with those from northern Patagonia (Porcelana, Chile; 3 metagenomic data sets) and Antarctica (Kroner; 1 metagenome), are the only ones from the southern hemisphere within the target physicochemical range (<xref ref-type="fig" rid="F1">Figure 1</xref>). El Tatio sampling encompassed the 10 km<sup>2</sup> area of the high altitude (&#x223C;4,200 MAMSL) geothermal field (upper, middle, and lower geyser basin, <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 2</xref>) in the Atacama Desert (Chile) (<xref ref-type="bibr" rid="B26">Glennon and Pfaff, 2003</xref>). The 48 samples comprise 36 different temperatures ranging from 41.65&#x00B0;C (Jinata Onsen, Japan) to 82&#x00B0;C (El Tatio, Chile), with an average of 57.9&#x00B0;C and a mode and median of 55&#x00B0;C. An average pH of 7.44 with a mode of 8 and a median of 7.51 was observed. The El Tatio geothermal field recorded the highest (pH 9.72; Pto13) and lowest (pH 6; T82) pH (<xref ref-type="supplementary-material" rid="TS1">Supplementary Table 1</xref>). Over 58% of the samples were recovered from microbial mats, while 20% were from sediments and 20% were from water fractions. No information was found on the DNA source for two samples (San Vicente hot spring from Colombia and Mammoth, Liberty Cap Streamers, USA) (<xref ref-type="supplementary-material" rid="TS1">Supplementary Table 1</xref>). Extending available hot spring metagenomes to the southern hemisphere balances database samples and brings new taxonomic variants for thermophile microorganisms surveys.</p>
</sec>
<sec id="S3.SS2">
<title>Frequent hot spring taxa-affiliated Cas1 abound in thermophilic metagenomes</title>
<p>Of the 2,150 candidates Cas1 finally recovered from 48 metagenomic data sets, 912 (42.4%) were mapped to MAGs (654 MAGs). The remaining 1,238 (57.5%) Cas1 were taxonomically classified <italic>via</italic> BLASTp against the NCBI nr database (<xref ref-type="supplementary-material" rid="TS3">Supplementary Table 3</xref>). Approximately 92% of the assignments belonged to the <italic>Bacteria</italic> domain, and 7% to <italic>Archaea</italic>, and were affiliated with 52 phyla and 97 classes. The phylum Chloroflexota accounted for approximately 18% of the total recovered Cas1, represented mainly by classes Chloroflexia (&#x2248;8%) and Anaerolineae (&#x2248;6%) (<xref ref-type="fig" rid="F2">Figure 2A</xref>). The phylum Proteobacteria (&#x2248;13%) was mainly represented by Alphaproteobacteria and Gammaproteobacteria<italic>-</italic>affiliated Cas1 sequences. In contrast, the Bacteroidota phylum (&#x2248;13%) pertained to three main classes (Bacteroidia, Ignavibacteria, and Kapabacteria) (<xref ref-type="fig" rid="F2">Figure 2A</xref>). Thirty-five phyla and 76 classes were found below the 1% of Cas1 frequency (<xref ref-type="supplementary-material" rid="TS3">Supplementary Table 3</xref>). The prevalence of Cas1 in reportedly abundant bacterial hot spring taxa corroborates the importance of CRISPR-Cas systems in hot springs and points out these environments as models to study the environmental microbiology of CRISPR-Cas and their alternative functions (<xref ref-type="bibr" rid="B73">Mohanraju et al., 2022</xref>).</p>
</sec>
<sec id="S3.SS3">
<title>High frequency of class 1-type I Cas1 proteins in hot springs</title>
<p>Consolidated typification using phylogeny, CRISPRCasTyper, and vicinity approaches (<xref ref-type="supplementary-material" rid="TS3">Supplementary Table 3</xref>) allowed us to classify most Cas1 (1,403 of 2,150, 65.2%) to a CRISPR-Cas type/subtype. An overall predominance of class 1-type I (1,042, 48.4%) was observed (<xref ref-type="fig" rid="F2">Figure 2B</xref>), whereas class 1-type III was the second most represented type, followed by class 2-type II (<xref ref-type="fig" rid="F2">Figure 2B</xref>). Thirty-eight Cas1 sequences were classified in type I-type III hybrid operons. No class 1-type IV and class 2-type VI systems were found. For the 174 (8%) Cas1 without <italic>cas</italic> in the neighborhood (<xref ref-type="fig" rid="F2">Figure 2B</xref>), further analysis was performed to elucidate their relevance as Cas1 not belonging to a <italic>bona fide</italic> CRISPR-Cas locus, as casposase proteins or <italic>cas1</italic>-solo (non-casposase <italic>cas1</italic> without <italic>cas</italic> genes in the neighborhood) (see below). The Cas1 type frequencies observed in hot springs corroborate the predominance of class 1 CRISPR-Cas systems, suggesting that temperature could not be the main driver of CRISPR system types in nature.</p>
</sec>
<sec id="S3.SS4">
<title>Geographical location as the main driver of <italic>cas1</italic> variance</title>
<p>16S rRNA and <italic>cas1</italic> gene occurrence and abundance <italic>per</italic> metagenome were used to calculate the Bray&#x2013;Curtis dissimilarity indexes. At the 16S rRNA gene level (<xref ref-type="fig" rid="F3">Figure 3A</xref>), hot springs belonging to the same UTM zone were more similar than those from different UTM zones. However, samples of intermediate temperature (&#x003E;55&#x00B0;C &#x003C; 68&#x00B0;C) from different UTM zones were more similar than samples at the upper and lower end of the temperature range (<xref ref-type="fig" rid="F3">Figure 3A</xref>). This may indicate the existence of a higher gene flow between hot springs in the middle-temperature range (55&#x2013;68&#x00B0;C), regardless of their geographic origin. In contrast, temperatures at the upper and lower extremes could restrict gene flow even between nearby geographic zones. Although no significant diversity differences were observed between the <italic>cas1</italic> and 16S rRNA according to the positive correlation (0.57, 0.0001) found by the Mantel test (<xref ref-type="supplementary-material" rid="TS5">Supplementary Table 5</xref>), the <italic>cas1</italic> data (<xref ref-type="fig" rid="F3">Figure 3B</xref>) show that samples were geographically more structured by UTM zone than the results observed for 16S rRNA. The PERMANOVA of Bray&#x2013;Curtis distances indicates that geographical location explained 47 and 42% of the observed variance in the Cas1 and 16S rRNA, respectively (<xref ref-type="supplementary-material" rid="TS5">Supplementary Table 5</xref>). This result is consistent with the nMDS, where the influence of geographic location is more evident at the Cas1 level than at the 16S rRNA level. Other variables, such as temperature, pH, or altitude, scarcely contributed to the variance. Even, a higher sequential effect of geographical location was observed when the sources of variation were added according to the marginal PERMANOVA (<xref ref-type="supplementary-material" rid="TS5">Supplementary Table 5</xref>). All these results support the existence of dispersion barriers in hot springs.</p>
<p>To corroborate the relevance of geographical location, we looked for environmental variables affecting the genetic diversity of Cas1. We analyzed Cas1 at the sequence level using a genetic distance matrix (percent identity). The marginal PERMANOVA with distance matrix values revealed that all sources of variation considered here poorly explained the differences at the sequence level (<xref ref-type="supplementary-material" rid="TS5">Supplementary Table 5</xref>), suggesting that Cas1 critical structure is very conserved in hot springs, at least in the canonical protein size used here.</p>
</sec>
<sec id="S3.SS5">
<title>Novel subclades of hot spring Cas1</title>
<p>A rooted phylogenetic tree was constructed with the 2,150 sequences of Cas1 proteins from the 48 globally distributed hot springs (<xref ref-type="fig" rid="F4">Figure 4</xref>), along with 93 Cas1 reference sequences that helped typification (<xref ref-type="bibr" rid="B101">Wu et al., 2020</xref>). Most hot spring Cas1 sequences clustered with the reference sequences, showing that hot spring Cas1 are linked to CRISPR-Cas systems of diverse known subtypes (<xref ref-type="fig" rid="F4">Figure 4</xref>). However, 19 clades without reference sequences were also obtained, harboring poorly classified or unclassified Cas1 sequences (labeled NR, i.e., No Reference). Except for subclade NR16, NR clades were usually positioned next to the root of the tree or the root of internal reference Cas1 clades (<xref ref-type="fig" rid="F4">Figure 4</xref>, NR marked in gray). NR clades and their position indicate that some Cas1 from hot springs are infrequent in databases or completely new, and also suggest that Cas1 sequences close to casposase clades may be related to casposons.</p>
<p>Overlayed taxonomic affiliation showed that 52 microbial phyla were represented at least once in the tree (<xref ref-type="fig" rid="F4">Figure 4</xref>, outer and inner rings). Cas1 from the same bacterial phyla frequently clustered together, and below the phylum level, no clear associations were recovered (data not shown). However, Cas1 proteins from predominant phyla such as <italic>Chloroflexota</italic> (396 sequences) and <italic>Proteobacteria</italic> (271 sequences) were distributed in several clades within the tree (<xref ref-type="fig" rid="F4">Figure 4</xref>). Most of the 90 Cas1 sequences from members of the phylum <italic>Cyanobacteria</italic> were clustered into clades recognized as I-D and III-B (<xref ref-type="fig" rid="F4">Figure 4</xref>). Metadata overlay on the tree (e.g., temperature) was of poor value for revealing specific data patterns (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 5</xref>), except for Cas1 clades I-A and I-B belonging to <italic>Archaea</italic>, which were associated with slightly warmer temperatures. Regarding the taxonomy of NR subclades (358 total sequences), most belonged to <italic>Chloroflexota</italic> (124, 35%) and <italic>Proteobacteria</italic> (39, 11%), whereas taxonomically unassigned (phylum-level) Cas1 sequences ranked third (34, 9.5%), which could suggest new Cas1 homologs, especially for those deep clades. In general, phyla that harbored most of the Cas1 sequences (<xref ref-type="fig" rid="F2">Figure 2</xref>) also harbored most of the NR members (over 2%, <xref ref-type="fig" rid="F5">Figure 5</xref>), indicating that predominant hot spring taxa could harbor rare Cas1 homologs.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption><p>Similarity network of 1,468 representative Cas1 proteins obtained from 2,150 Cas1 from 48 hot spring metagenomes. Points are colored according to the clades of the phylogenetic tree in <xref ref-type="fig" rid="F4">Figure 4</xref>. Specific consolidated typification of NR (no reference) Cas1 clades is indicated in boxes, where the nomenclature of &#x201C;ambiguous&#x201D; or &#x201C;false&#x201D; was taken from <xref ref-type="bibr" rid="B82">Russel et al. (2020)</xref>, meaning non-hybrid operons with two or more <italic>cas</italic> subtypes and the same scoring, or neither hybrid nor ambiguous operons, respectively. NR5 is represented by one sequence and as a singleton in the network (not shown).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-13-1069452-g005.tif"/>
</fig>
<p>To further explore the nature and characteristics of the Cas1 NR subclades, we constructed a similarity network using the 1,468 representative Cas1 proteins of the aforementioned phylogenetic tree (<xref ref-type="fig" rid="F4">Figure 4</xref>). Each node of the network (<xref ref-type="fig" rid="F5">Figure 5</xref>) represents a Cas1 protein, and edges correspond to the bit score of an all-versus-all BLASTp analysis. The largest network module contains three principal regions conforming a core, where (1) most I-A, II-B, III-C-1, and Archaeal-II Cas1 sequences are separated from the region composed of (2) I-C, and (3) I-B, III-C-2, and III-D subtype proteins. Some nodes are separated by long edges and arranged as &#x201C;satellites&#x201D;: Casposase2, CasX, and some Cas1 of subclade V-A (<xref ref-type="fig" rid="F5">Figure 5</xref>), which indicates unusual Cas1 homologs. The second-largest module harbors Cas1 of subtypes II-A-II-C and III-A, together with &#x201C;satellite&#x201D; sequences (NR7 and NR8, <xref ref-type="fig" rid="F5">Figure 5</xref>). The third module is composed exclusively of I-E sequences, which in <xref ref-type="fig" rid="F4">Figure 4</xref> also clustered apart with I-F representatives (a minor yet separate module in the network; <xref ref-type="fig" rid="F5">Figure 5</xref>). Finally, Cas1 of the rare subtype CasY formed an isolated module. Several small NR modules are isolated from the rest (NR1&#x2013;NR6). However, the biggest NR clade of <xref ref-type="fig" rid="F4">Figure 4</xref> (NR16) was inside the main network module (<xref ref-type="fig" rid="F5">Figure 5</xref>), suggesting new varieties of Cas1 similar to traditional Cas1 sequences.</p>
<p>Most NR Cas1 clades of the phylogenetic tree were classified as false or ambiguous (210, 9.7%), but also several sequences were affiliated with a CRISPR-Cas system type/subtype (<xref ref-type="fig" rid="F5">Figure 5</xref> and <xref ref-type="supplementary-material" rid="TS3">Supplementary Table 3</xref>). According to the network arrangement, the main module harbors 252 sequences of subclades NR11&#x2013;NR19, where 146 (58%) are classified as false or ambiguous, and 106 (42%) effectively belong to a known CRISPR-Cas system (<xref ref-type="fig" rid="F5">Figure 5</xref> and <xref ref-type="supplementary-material" rid="TS6">Supplementary Table 6</xref>). Interestingly, modules NR7, NR8, and NR9 only include sequences classified as type II Cas1 or false. The NR10 cluster is predominantly of type I-D Cas1 sequences and is displayed as &#x201C;satellite&#x201D; of the main cluster, which is also observed for NR7, NR8, and Cas1 sequences of the Casposase2 clade (<xref ref-type="fig" rid="F5">Figure 5</xref>). These results show that Cas1 from hot springs harbor rare sequence variants. Deeper analyses of &#x201C;satellite&#x201D; casposase clade allowed us to identify new Cas1 unrelated to CRISPR-Cas immunity.</p>
</sec>
<sec id="S3.SS6">
<title>New casposase genes from hot springs</title>
<p>Performing the casposase phylogenetic analyses of <xref ref-type="bibr" rid="B48">Krupovic et al., 2014</xref>, <xref ref-type="bibr" rid="B49">2016</xref>, 6 of 10 Cas1-solo from hot springs were located in a monophyletic clade in the outgroup, close to Cas1 subtype I-A (<xref ref-type="fig" rid="F6">Figure 6</xref>), representing a novel branch of casposases. We propose that this clade corresponds to a new family of casposases (family 5) discovered in hot springs. Other hot spring casposases grouped with already known families (<xref ref-type="fig" rid="F6">Figure 6</xref>). New family 5 and Cas1 proteins belonging to known casposase families show great neighborhood genetic diversity (<xref ref-type="fig" rid="F7">Figure 7</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 6</xref>). Identity comparison also revealed great variation, where only contigs harboring Cas1_938 (Miyagi, Japan) and Cas1_1200 (Washburn, USA) presented greater similarity (<xref ref-type="fig" rid="F7">Figure 7</xref>). The same was observed for the gene content of the contig, where the most distant contig (Cas1_1244) is the only one harboring DNApol B. TIRs were identified in five contigs (Cas1_18, Cas1_938, Cas1_1200, Cas1_1226, and Cas1_1269). For sequence Cas1_1226, the TIR was identified by alignment with the TIR <italic>Candidatus</italic> &#x201C;Acetothermum autotrophicum&#x201D; (<xref ref-type="bibr" rid="B48">Krupovic et al., 2014</xref>). These results reveal the remarkable variation of genetic content inside family 5; however, casposases of this family show a high identity between them compared to other families (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 7</xref>). The genetic neighborhood of remaining non-family 5 putative casposase gene contigs are available in <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 6</xref>.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption><p>Reconstructed maximum likelihood phylogenetic tree of <xref ref-type="bibr" rid="B48">Krupovic et al. (2014</xref>, <xref ref-type="bibr" rid="B49">2016)</xref>. Ten hot spring casposases (colored red and with ID indicated in <xref ref-type="supplementary-material" rid="TS3">Supplementary Table 3</xref>) were analyzed along with 110 reference casposases. Clades are colored according to the described casposase family (1&#x2013;4) or outgroup, except family 5, which is indicated inside the outgroup in red, close to Cas1 of subtype I-A CRISPR-Cas system. Bootstrap values are indicated in branches as a percentage.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-13-1069452-g006.tif"/>
</fig>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption><p>Genetic neighborhood comparison of family 5 casposase genes from hot springs. Casposase gene ID is indicated at the beginning of the contig, and the BLASTn pairwise percentage is indicated according to the color shade. The legend indicates casposases (yellow), hypothetical proteins (aqua green), and terminal inverted repeats (black). Hypothetical proteins with numbers inside refer to the same protein sequence (another hypothetical protein number 4 can be found in <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 6</xref>, Cas1_18 of family 3 casposases). TIRs could not be identified in Cas1_1219 and Cas1_1244.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-13-1069452-g007.tif"/>
</fig>
</sec>
</sec>
<sec id="S4" sec-type="discussion">
<title>Discussion</title>
<sec id="S4.SS1">
<title>Expanding the <italic>cas1</italic> gene information to the southern hemisphere</title>
<p>The metagenomes used in this study included new El Tatio metagenomes from El Tatio geothermal field in Chile, expanding the metagenomic data available for hot springs from underrepresented geographical zones. In addition, this study contributed to the expansion of the known Cas1 sequence from the southern hemisphere, including Antarctica (<xref ref-type="fig" rid="F1">Figure 1</xref>). The global and regional distribution of the metagenomic data used allowed us to compare the diversity of 16S rRNA and Cas1 on a continental scale, but also locally, as in the case of the El Tatio geyser field. The ranges of temperature (41&#x2013;80&#x00B0;C) and pH (approximately 6&#x2013;8) used for this study have been defined as suitable for thermophilic microorganisms (<xref ref-type="bibr" rid="B102">Zablocki et al., 2018</xref>; <xref ref-type="bibr" rid="B70">Merino et al., 2019</xref>) and reveal that metagenomic samples in the mesothermophilic range of temperature (approximately 55&#x2013;65&#x00B0;C) are globally similar, corroborating that temperature and pH are the main drivers of microbial diversity in hot springs (<xref ref-type="bibr" rid="B67">Massello et al., 2020</xref>). Nevertheless, the local adaptation revealed by some Cas1 suggests the presence of dispersal barriers to gene flow which may be associated with variables not quantified in this study (<xref ref-type="fig" rid="F3">Figure 3B</xref>).</p>
<p>The metagenomic data sets used here corroborate the presence of previously described phyla abundant in hot springs (<xref ref-type="bibr" rid="B40">Klatt et al., 2011</xref>, <xref ref-type="bibr" rid="B39">2013</xref>; <xref ref-type="bibr" rid="B33">Inskeep et al., 2013</xref>; <xref ref-type="bibr" rid="B57">L&#x00F3;pez-L&#x00F3;pez et al., 2013</xref>; <xref ref-type="bibr" rid="B10">Bolhuis et al., 2014</xref>; <xref ref-type="bibr" rid="B85">Sharp et al., 2014</xref>; <xref ref-type="bibr" rid="B90">Strazzulli et al., 2017</xref>). Most hot spring metagenomes share a small core of highly represented taxa, such as the phylum <italic>Chloroflexota</italic>, and several minor phyla (<xref ref-type="fig" rid="F2">Figure 2</xref> and <xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 3</xref>). However, the species turnover evidenced by the 16S rRNA gene Bray&#x2013;Curtis dissimilarity index, was less than the turnover for the <italic>cas1</italic> gene (<xref ref-type="fig" rid="F3">Figure 3</xref>), which was more geographically structured. This is consistent with the degree of isolation and local evolution previously suggested for thermal environments and some phyla, such as thermophilic <italic>Cyanobacteria</italic> (<xref ref-type="bibr" rid="B24">Finsinger et al., 2008</xref>; <xref ref-type="bibr" rid="B50">Kunin et al., 2008</xref>; <xref ref-type="bibr" rid="B35">Ionescu et al., 2010</xref>). There are some exceptions, such as the case of Tattapani, which showed high dissimilarity for the 16S rRNA gene, but not for <italic>cas1</italic> (<xref ref-type="fig" rid="F3">Figure 3</xref>). One possible explanation could be a local adaptation of CRISPR-Cas systems to similar conditions and, for example, confronting a similar viral community. Meanwhile, other samples, such as from Gongxiaoshe, showed high similarity in the 16S rRNA gene but great dissimilarity with <italic>cas1</italic> (<xref ref-type="fig" rid="F3">Figure 3</xref>). This could be due to the presence of similar bacterial communities versus locally different viral communities, which would support differential adaptation of the host adaptation module revealed by <italic>cas1</italic>. We hypothesize that Cas1 may expose local adaptations due to the specificity of virus-host relationships (<xref ref-type="bibr" rid="B102">Zablocki et al., 2018</xref>) mediated by CRISPR-Cas systems in hot springs. This hypothesis is consistent with the argued rapid evolution observed in extreme environments (<xref ref-type="bibr" rid="B56">Li et al., 2014</xref>) and the specificity of prokaryotic genes (<xref ref-type="bibr" rid="B19">Coelho et al., 2022</xref>), which could also be necessary for thermophilic viral communities. <italic>Cas1</italic> beta-diversity variations can also suggest that the same host could have existed at the beginning of several hot spring communities, where today viruses may help reveal evolutionary changes in these host communities.</p>
</sec>
<sec id="S4.SS2">
<title>High <italic>cas1</italic> abundance in prevalent hot spring phyla</title>
<p>Abundances of <italic>cas1</italic> and 16S rRNA genes showed similarities in each sample regardless of hot spring temperature (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 3</xref>), especially for predominant phyla such as <italic>Chloroflexota, Proteobacteria, Cyanobacteria</italic>, and <italic>Bacteroidota</italic>, which highlights the role of CRISPR-Cas systems. Cas1 protein diversity analyses at the community level are scarce, and few surveys consider predominant hot springs taxa and CRISPR-Cas types, particularly for <italic>Chloroflexota</italic> and <italic>Bacteroidota</italic> phyla. <xref ref-type="bibr" rid="B101">Wu et al. (2020)</xref> observed that <italic>cas1</italic> prevalence in soil samples increased with temperature in some taxa (including <italic>Chloroflexota</italic> and <italic>Deltaproteobacteria</italic>) while <italic>rplB</italic> abundance decreased. Differences here could be explained due to the stability of hot springs versus temperature as altering the microbiostasis of soil. However, it should be noted that the low sequencing depth of some samples used here (M46_SRR2625865, M46_SRR2626160, M61_SRR3961741, M50_ERR1543536, M4564_SRR6941191, and M65_MR4530144) could overestimate diversities found. In any case, the maintenance or removal of low-coverage samples does not alter the statistical significance of the positive correlation between 16S rRNA and <italic>cas1</italic> genes (<xref ref-type="supplementary-material" rid="TS5">Supplementary Table 5</xref>). As suggested (<xref ref-type="bibr" rid="B97">Weinberger et al., 2012</xref>; <xref ref-type="bibr" rid="B36">Iranzo et al., 2013</xref>), temperature changes could determine differential fitness for CRISPR-Cas systems (affecting the viral diversity/density of specific taxa) in the long term, which may explain the correlation observed in soil. A high prevalence of Cas1 in major taxa from hot springs can reveal a community where predominant species, mainly phototrophic, could be competition and adaptive defense specialists due to low viral diversity/density. CRISPR-Cas in predominant taxa from hot springs, especially in phototrophic species (<italic>Chloroflexota</italic> and <italic>Cyanobacteria</italic> phyla), could maintain the steady state of the system, ensuring the inflow of energy.</p>
</sec>
<sec id="S4.SS3">
<title>The novelty of the <italic>cas1</italic> gene in hot springs</title>
<p>Reference sequence-guided typing (<xref ref-type="bibr" rid="B101">Wu et al., 2020</xref>) of previously described Cas1 sequences helped identify the CRISPR-Cas system category likely to be most relevant in these thermal environments. However, this approach has limitations because the <italic>in silico</italic> classification of CRISPR-Cas into a specific type/subtype must be based not only on phylogeny but also on the sequence similarity, genetic vicinity, domains, and catalytic residues (<xref ref-type="bibr" rid="B62">Makarova et al., 2020a</xref>). For this reason, we further employed CRISPRCasTyper software (<xref ref-type="bibr" rid="B82">Russel et al., 2020</xref>), to retrieve information in the contigs harboring <italic>cas1</italic> that showed type I and III CRISPR-Cas as predominant in hot springs (<xref ref-type="fig" rid="F2">Figure 2</xref>), as well as in databases (<xref ref-type="bibr" rid="B43">Koonin et al., 2017</xref>; <xref ref-type="bibr" rid="B20">Crawley et al., 2018</xref>). Type I and III systems accounted for 48 and 8.2%, respectively, of the Cas1 obtained herein, which is below the total distribution of CRISPR-Cas systems reported in nature (<xref ref-type="bibr" rid="B20">Crawley et al., 2018</xref>). Despite type IV systems having been previously found in hyper/thermophiles (<xref ref-type="bibr" rid="B38">Jung et al., 2016</xref>; <xref ref-type="bibr" rid="B92">Taylor et al., 2019</xref>; <xref ref-type="bibr" rid="B62">Makarova et al., 2020a</xref>), we did not detect type IV Cas1 genes in the analyzed metagenomes. This might be due to the almost total absence described for the adaptation module in type IV CRISPR-Cas systems (<xref ref-type="bibr" rid="B80">Pinilla-Redondo et al., 2020</xref>), being overlooked here due to the methodological approach used. Conversely, the fact that class 1 systems are predominant in both thermophilic and mesophilic environments suggests that selective pressure is not related to temperature and mutation rates, but to specific mechanistic properties (<xref ref-type="bibr" rid="B87">Shmakov et al., 2017b</xref>). In this sense, class 1 systems could be more versatile in performing several functions than class 2 systems or in escaping anti-immunity mechanisms, for example, evolving just one subunit targeted by anti-CRISPR. Regarding class 2, 146 (6.7%) of Cas1 were classified as type II and V (<xref ref-type="fig" rid="F2">Figure 2</xref>). Most of the Cas1 classified as type V belong to rare phyla such as <italic>Patescibacteria</italic> or unclassified bacteria (<xref ref-type="fig" rid="F2">Figure 2</xref>), which encourages the search for new variants of this type already described in thermophiles (<xref ref-type="bibr" rid="B17">Chen et al., 2019</xref>; <xref ref-type="bibr" rid="B93">Tian et al., 2020</xref>). The absence of type VI Cas1 could be expected because these CRISPR-Cas systems target RNA (<xref ref-type="bibr" rid="B62">Makarova et al., 2020a</xref>), maybe pointing to scarce foreign RNA entering the cell. However, as mentioned for system IV, <italic>cas1</italic> genes associated with type IV systems have only been described in a few subtypes (<xref ref-type="bibr" rid="B62">Makarova et al., 2020a</xref>). Furthermore, described bacterial species that harbor type VI systems are mesophilic and related to humans or pets, which suggests a low prevalence of this type in hot springs.</p>
<p>Several Cas1 sequences could not be classified to a described system type/subtype despite being together with other <italic>cas</italic> genes (<xref ref-type="fig" rid="F2">Figure 2</xref>). Given the genetic context, phylogeny, and network data, we speculate that some of these Cas are part of novel CRISPR-Cas systems. It should be noted that our approach was based on the tree topology, <italic>cas</italic> locus, and CRISPR array [using CRISPRCasTyper (<xref ref-type="bibr" rid="B82">Russel et al., 2020</xref>)]; however, missing contig information could hinder its assignment to a CRISPR-Cas system. Nevertheless, Cas2 and Cas4 were the most frequent <italic>cas</italic> gene in NR Cas1 clades. Regarding non-<italic>cas</italic> genes, prevalent genes were encoding hypothetical proteins as not in the CDD database (426 ORFs), RNase H-like (26 ORFs), and DUF697 or DUF370 domain-containing proteins. The fact that most gene neighborhoods are related to DNA metabolism corroborates the evidence of non-canonical functions of CRISPR-Cas systems and Cas1 (<xref ref-type="bibr" rid="B83">Sampson and Weiss, 2013</xref>; <xref ref-type="bibr" rid="B45">Krishnan et al., 2020</xref>; <xref ref-type="bibr" rid="B73">Mohanraju et al., 2022</xref>). Ongoing work will allow us to eventually characterize new molecular systems involving Cas proteins, which exceeds the current objectives of this work.</p>
<p>Our phylogenetic reconstruction is in agreement with the topology retrieved by <xref ref-type="bibr" rid="B101">Wu et al. (2020)</xref>, but also includes several clades without reference sequences (NR), located at the root of the tree and in several internal clades (<xref ref-type="fig" rid="F4">Figure 4</xref>). The phylum <italic>Chloroflexota</italic> harbors most of the Cas1 NR clades, which may be explained by the dominance of diverse members of this phylum in hot springs. The <italic>Chloroflexota</italic> Cas1 sequences were distributed in tree clades II-B, NR16, NR18, and I-U (<xref ref-type="fig" rid="F4">Figure 4</xref> and <xref ref-type="supplementary-material" rid="TS6">Supplementary Table 6</xref>), suggesting great diversity of adaptive immunity in this phylum. According to the data (<xref ref-type="bibr" rid="B59">Makarova et al., 2011</xref>, <xref ref-type="bibr" rid="B61">2013</xref>, <xref ref-type="bibr" rid="B62">2020a</xref>; <xref ref-type="bibr" rid="B13">Burstein et al., 2016</xref>), hot springs Cas1 from <italic>Chloroflexota</italic> includes types I and III, with a predominance of type I-A. However, in most cases, members that group in the tree with type II-B reference sequences (WP080019870 and WP011139432) belong to I-B (<xref ref-type="supplementary-material" rid="TS3">Supplementary Table 3</xref>), suggesting horizontal transfer of Cas1. The expected scenario was observed for C<italic>yanobacteria</italic>, where sequences were located as described in I-E (<xref ref-type="bibr" rid="B61">Makarova et al., 2013</xref>; <xref ref-type="bibr" rid="B13">Burstein et al., 2016</xref>), III-B (<xref ref-type="bibr" rid="B61">Makarova et al., 2013</xref>, <xref ref-type="bibr" rid="B62">2020a</xref>; <xref ref-type="bibr" rid="B13">Burstein et al., 2016</xref>), and the almost phylum-exclusive I-D (<xref ref-type="bibr" rid="B14">Cai et al., 2013</xref>). Our results maintain the absence of type II for <italic>Cyanobacteria</italic> and <italic>Chloroflexota</italic> (<xref ref-type="bibr" rid="B62">Makarova et al., 2020a</xref>; <xref ref-type="fig" rid="F4">Figure 4</xref>), with the majority of Cas1 of this type found in <italic>Proteobacteria</italic> and <italic>Bacteroidota</italic>, more represented by non-photosynthetic taxa (<xref ref-type="fig" rid="F2">Figure 2B</xref> and <xref ref-type="supplementary-material" rid="TS3">Supplementary Table 3</xref>), suggesting a relationship between photosynthesis and the scarcity of the Cas9 protein. Overall, it is difficult to describe the diversity of Cas1 inside these hot spring-predominant taxa using taxonomy (despite the exclusiveness of some subtypes, e.g., I-D). As mentioned for the CRISPR-Cas system (<xref ref-type="bibr" rid="B60">Makarova et al., 2015</xref>, <xref ref-type="bibr" rid="B62">2020a</xref>), the presence of the same type/subtype in several taxa could suggest no specific function of Cas1 in a host. Furthermore, the fact that hot springs did not reveal a new association regarding phylum-Cas1 type descriptions suggests that temperature has a minor selective effect for the CRISPR-Cas type/subtype. High horizontal gene transfer and viral infection events regulating CRISPR-Cas systems (<xref ref-type="bibr" rid="B60">Makarova et al., 2015</xref>; <xref ref-type="bibr" rid="B43">Koonin et al., 2017</xref>; <xref ref-type="bibr" rid="B51">Landsberger et al., 2018</xref>) could also blur the evolutionary history of Cas1 in hot springs, a hypothesis framed in the &#x201C;guns for hire&#x201D; model (<xref ref-type="bibr" rid="B44">Koonin et al., 2020</xref>). However, the case of I-D in <italic>Cyanobacteria</italic> points to a particular virus infecting them or specific function not necessarily related to adaptive immunity (<xref ref-type="bibr" rid="B73">Mohanraju et al., 2022</xref>), which could maintain Cas1 as a &#x201C;not for hire&#x201D; gun.</p>
<p>Finally, Casposase analysis highlights the novelty of Cas1 from hot springs and confirms the relevance of CRISPR-Cas in these environments. Previous work on hot springs Cas1 diversity defined four casposase gene families and a Cas1-solo outgroup, indicating that those Cas1-solo were probably vestigial genes due to non-conserved catalytic residues (<xref ref-type="bibr" rid="B48">Krupovic et al., 2014</xref>, <xref ref-type="bibr" rid="B49">2016</xref>). With new metagenomic data sets available today, we have identified a new casposase gene group (proposed family 5, <xref ref-type="fig" rid="F6">Figure 6</xref>), the closest in the tree to the Cas1-solo outgroup mentioned by <xref ref-type="bibr" rid="B48">Krupovic et al. (2014</xref>, <xref ref-type="bibr" rid="B49">2016)</xref> and next to subtype I-A Cas1. The CRISPR-Cas subtype I-A has a majority representation in the <italic>Archaea</italic> domain (<xref ref-type="bibr" rid="B60">Makarova et al., 2015</xref>), which is observed in Cas1 subtype I-A of the tree (<xref ref-type="fig" rid="F4">Figure 4</xref>), but also for family 5 casposases, suggesting a vestigial metabolic function of Cas1 related with that domain. Conversely, family 5 shows conserved catalytic site residues (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 7</xref>), suggesting that they could be active enzymes. Nevertheless, casposase Cas1_1244 is the only family member with DNApol B and shows the lowest identity value with the rest of the family 5 (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 7</xref>), whose remaining members share over 50% identity. Active site residues and the absence of DNAPol in most of the family 5, contrary to other Casposase families (<xref ref-type="bibr" rid="B48">Krupovic et al., 2014</xref>, <xref ref-type="bibr" rid="B47">2017</xref>), suggest that they are functioning as non-self-replicative transposons, which is in line with the non-relationship of casposons with eukaryotic self-synthesizing transposons (<xref ref-type="bibr" rid="B48">Krupovic et al., 2014</xref>). We speculate that family 5 represents an intermediate stage between casposases from families 1 to 4 and Cas1 of CRISPR-Cas systems, suggesting that inactive Cas1-solo [group 1 of <xref ref-type="bibr" rid="B48">Krupovic et al. (2014)</xref>] and family 5 might represent recent ancestors in the evolution of Cas1. The HTH C-terminal domain identified in family 2 casposase (<xref ref-type="bibr" rid="B29">Hickman and Dyda, 2015</xref>) was not observed in family 5 casposase (data not shown), supporting its position as an ancestor of CRISPR-Cas Cas1. The high diversity of the genetic context of family 5 casposons, composed of poorly conserved hypothetical proteins (<xref ref-type="fig" rid="F7">Figure 7</xref>), as well as their low sequence similarity concerning other families (<xref ref-type="supplementary-material" rid="DS1">Supplementary Figure 7</xref>), also suggest their rapid evolution, perhaps influenced by several potential horizontal gene transfer events. Only <italic>loci</italic> Cas1_938 and Cas1_1200 of family 5 share more than one hypothetical protein, which is intriguing because of their origin from very distant hot springs (Japan and USA, respectively, <xref ref-type="supplementary-material" rid="TS1">Supplementary Tables 1</xref>, <xref ref-type="supplementary-material" rid="TS3">3</xref>). New casposases help to shed light on the function and evolution of hot spring casposons which will also contribute to the study of the evolution of CRISPR-Cas systems, thereby revealing potential new features that would allow for better elucidation of the origin of the system.</p>
<p>The present study extends the knowledge of Cas1 diversity in thermal environments, where ecological diversity was associated with local characteristics according to geographical origin. Phylogeny and network analyses reveal new Cas1 homologs, including a new family of casposons that formally extends the currently known diversity of the gene. This work could contribute to a better understanding of the evolution of CRISPR-Cas systems by describing new variants in new genetic contexts obtained from new hot springs metagenomes. This study also corroborates that hot springs are suitable environments for obtaining novel information on CRISPR-Cas ecology and evolution, and could contribute to understanding the higher prevalence of CRISPR-Cas systems in these environments.</p>
</sec>
</sec>
<sec id="S5" sec-type="data-availability">
<title>Data availability statement</title>
<p>All data generated or analyzed during this study are included in this published article and <xref ref-type="supplementary-material" rid="DS1">Supplementary material</xref>. Metagenomic datasets from El Tatio can be found in the NCBI database under BioProject <ext-link ext-link-type="DDBJ/EMBL/GenBank" xlink:href="PRJNA858297">PRJNA858297</ext-link> (<ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/sra/">https://www.ncbi.nlm.nih.gov/sra/</ext-link>).</p>
</sec>
<sec id="S6">
<title>Author contributions</title>
<p>OS, SG-L, CB, CR, and BD made the field sampling. OS, SG-L, AM-B, and JT-L made <italic>in silico</italic> analyses. RQ, FJMM, and BD contributed significantly to the research design and writing process. OS and BD conceived the study and wrote the manuscript. All authors contributed to the article and approved the submitted version.</p>
</sec>
</body>
<back>
<sec id="S7" sec-type="funding-information">
<title>Funding</title>
<p>This work was financed in part by FONDECYT regular N&#x00B0; 1190998 (ANID) and Iniciativa de Investigaci&#x00F3;n UnACh 2021-157-Unach. OS and JT-L were supported in part by ANID National Doctoral Scholarship (Beca de Doctorado Nacional ANID) N&#x00B0; 21172022 and 21171048, respectively. SG-L was supported by ANID FONDECYT Postdoctoral N&#x00B0; 3210547. AM-B and RQ were supported by Centro Ciencia and Vida, FB210008, Financiamiento Basal para Centros Cient&#x00ED;ficos y Tecnol&#x00F3;gicos de Excelencia de ANID, and FONDECYT regular N&#x00B0; 1221035 (ANID). FJMM acknowledged research support by the Conselleria d&#x2019;Innovaci&#x00F3;, Universitats, Ci&#x00E8;ncia i Societat Digital from Generalitat Valenciana, research project PROMETEO/2021/057. BD acknowledged the Millennium Institute Center for Genome Regulation, Project ICN2021-044 supported by the ANID Millennium Scientific Initiative (Chile).</p>
</sec>
<ack>
<p>OS thanks Pablo Vergara for his technical help. BD thanks El Tatio local communities Toconce and Caspana.</p>
</ack>
<sec id="S8" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="S9" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="S10" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fmicb.2022.1069452/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fmicb.2022.1069452/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.PDF" id="DS1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_1.XLSX" id="TS1" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_2.XLS" id="TS2" mimetype="application/vnd.ms-excel" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_3.XLSX" id="TS3" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_4.XLSX" id="TS4" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_5.XLSX" id="TS5" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_6.XLSX" id="TS6" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alcorta</surname> <given-names>J.</given-names></name> <name><surname>Alarc&#x00F3;n-Schumacher</surname> <given-names>T.</given-names></name> <name><surname>Salgado</surname> <given-names>O.</given-names></name> <name><surname>D&#x00ED;ez</surname> <given-names>B.</given-names></name></person-group> (<year>2020</year>). <article-title>Taxonomic novelty and Distinctive genomic features of hot spring cyanobacteria.</article-title> <source><italic>Front. Genet.</italic></source> <volume>11</volume>:<issue>568223</issue>. <pub-id pub-id-type="doi">10.3389/fgene.2020.568223</pub-id> <pub-id pub-id-type="pmid">33250920</pub-id></citation></ref>
<ref id="B2"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alcorta</surname> <given-names>J.</given-names></name> <name><surname>Espinoza</surname> <given-names>S.</given-names></name> <name><surname>Viver</surname> <given-names>T.</given-names></name> <name><surname>Alcam&#x00E1;n-Arias</surname> <given-names>M. E.</given-names></name> <name><surname>Trefault</surname> <given-names>N.</given-names></name> <name><surname>Rossell&#x00F3;-M&#x00F3;ra</surname> <given-names>R.</given-names></name><etal/></person-group> (<year>2018</year>). <article-title>Temperature modulates <italic>Fischerella thermalis</italic> ecotypes in Porcelana hot spring.</article-title> <source><italic>Syst. Appl. Microbiol.</italic></source> <volume>41</volume> <fpage>531</fpage>&#x2013;<lpage>543</lpage>. <pub-id pub-id-type="doi">10.1016/j.syapm.2018.05.006</pub-id> <pub-id pub-id-type="pmid">30041921</pub-id></citation></ref>
<ref id="B3"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Altschul</surname> <given-names>S. F.</given-names></name> <name><surname>Gish</surname> <given-names>W.</given-names></name> <name><surname>Miller</surname> <given-names>W.</given-names></name> <name><surname>Myers</surname> <given-names>E. W.</given-names></name> <name><surname>Lipman</surname> <given-names>D. J.</given-names></name></person-group> (<year>1990</year>). <article-title>Basic local alignment search tool.</article-title> <source><italic>J. Mol. Biol.</italic></source> <volume>215</volume> <fpage>403</fpage>&#x2013;<lpage>410</lpage>. <pub-id pub-id-type="doi">10.1016/S0022-2836(05)80360-2</pub-id></citation></ref>
<ref id="B4"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Amitai</surname> <given-names>G.</given-names></name> <name><surname>Sorek</surname> <given-names>R.</given-names></name></person-group> (<year>2016</year>). <article-title>CRISPR-Cas adaptation: Insights into the mechanism of action.</article-title> <source><italic>Nat. Rev. Microbiol.</italic></source> <volume>14</volume> <fpage>67</fpage>&#x2013;<lpage>76</lpage>. <pub-id pub-id-type="doi">10.1038/nrmicro.2015.14</pub-id> <pub-id pub-id-type="pmid">26751509</pub-id></citation></ref>
<ref id="B5"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Andersen</surname> <given-names>K. S.</given-names></name> <name><surname>Kirkegaard</surname> <given-names>R. H.</given-names></name> <name><surname>Karst</surname> <given-names>S. M.</given-names></name> <name><surname>Albertsen</surname> <given-names>M.</given-names></name></person-group> (<year>2018</year>). <article-title>ampvis2: An R package to analyse and visualise 16S rRNA amplicon data.</article-title> <source><italic>bioRxiv</italic></source> [<comment>Preprint</comment>]. <pub-id pub-id-type="doi">10.1101/299537</pub-id></citation></ref>
<ref id="B6"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Anderson</surname> <given-names>M. J.</given-names></name></person-group> (<year>2001</year>). <article-title>A new method for non-parametric multivariate analysis of variance.</article-title> <source><italic>Austral Ecol.</italic></source> <volume>26</volume> <fpage>32</fpage>&#x2013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1111/j.1442-9993.2001.01070.pp.x</pub-id></citation></ref>
<ref id="B7"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Anderson</surname> <given-names>R. E.</given-names></name> <name><surname>Brazelton</surname> <given-names>W. J.</given-names></name> <name><surname>Baross</surname> <given-names>J. A.</given-names></name></person-group> (<year>2011</year>). <article-title>Using CRISPRs as ametagenomic tool to identify microbial hosts of a diffuse flow hydrothermal vent viral assemblage.</article-title> <source><italic>FEMS Microbiol. Ecol.</italic></source> <volume>77</volume> <fpage>120</fpage>&#x2013;<lpage>133</lpage>. <pub-id pub-id-type="doi">10.1111/j.1574-6941.2011.01090.x</pub-id> <pub-id pub-id-type="pmid">21410492</pub-id></citation></ref>
<ref id="B8"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Andersson</surname> <given-names>A. F.</given-names></name> <name><surname>Banfield</surname> <given-names>J. F.</given-names></name></person-group> (<year>2008</year>). <article-title>Virus population dynamics and acquired virus resistance in natural microbial communities.</article-title> <source><italic>Science</italic></source> <volume>320</volume> <fpage>1047</fpage>&#x2013;<lpage>1050</lpage>. <pub-id pub-id-type="doi">10.1126/science.1157358</pub-id> <pub-id pub-id-type="pmid">18497291</pub-id></citation></ref>
<ref id="B9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bankevich</surname> <given-names>A.</given-names></name> <name><surname>Nurk</surname> <given-names>S.</given-names></name> <name><surname>Antipov</surname> <given-names>D.</given-names></name> <name><surname>Gurevich</surname> <given-names>A. A.</given-names></name> <name><surname>Dvorkin</surname> <given-names>M.</given-names></name> <name><surname>Kulikov</surname> <given-names>A. S.</given-names></name><etal/></person-group> (<year>2012</year>). <article-title>SPAdes: A new genome assembly algorithm and its applications to single-cell sequencing.</article-title> <source><italic>J. Comput. Biol.</italic></source> <volume>19</volume> <fpage>455</fpage>&#x2013;<lpage>477</lpage>. <pub-id pub-id-type="doi">10.1089/cmb.2012.0021</pub-id> <pub-id pub-id-type="pmid">22506599</pub-id></citation></ref>
<ref id="B10"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bolhuis</surname> <given-names>H.</given-names></name> <name><surname>Cretoiu</surname> <given-names>M. S.</given-names></name> <name><surname>Stal</surname> <given-names>L. J.</given-names></name></person-group> (<year>2014</year>). <article-title>Molecular ecology of microbial mats.</article-title> <source><italic>FEMS Microbiol. Ecol.</italic></source> <volume>90</volume> <fpage>335</fpage>&#x2013;<lpage>350</lpage>. <pub-id pub-id-type="doi">10.1111/1574-6941.12408</pub-id> <pub-id pub-id-type="pmid">25109247</pub-id></citation></ref>
<ref id="B11"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Broniewski</surname> <given-names>J. M.</given-names></name> <name><surname>Meaden</surname> <given-names>S.</given-names></name> <name><surname>Paterson</surname> <given-names>S.</given-names></name> <name><surname>Buckling</surname> <given-names>A.</given-names></name> <name><surname>Westra</surname> <given-names>E. R.</given-names></name></person-group> (<year>2020</year>). <article-title>The effect of phage genetic diversity on bacterial resistance evolution.</article-title> <source><italic>ISME J.</italic></source> <volume>14</volume> <fpage>828</fpage>&#x2013;<lpage>836</lpage>. <pub-id pub-id-type="doi">10.1038/s41396-019-0577-7</pub-id> <pub-id pub-id-type="pmid">31896785</pub-id></citation></ref>
<ref id="B12"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Burstein</surname> <given-names>D.</given-names></name> <name><surname>Harrington</surname> <given-names>L. B.</given-names></name> <name><surname>Strutt</surname> <given-names>S. C.</given-names></name> <name><surname>Probst</surname> <given-names>A. J.</given-names></name> <name><surname>Anantharaman</surname> <given-names>K.</given-names></name> <name><surname>Thomas</surname> <given-names>B. C.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>New CRISPR-Cas systems from uncultivated microbes.</article-title> <source><italic>Nature</italic></source> <volume>542</volume> <fpage>237</fpage>&#x2013;<lpage>241</lpage>. <pub-id pub-id-type="doi">10.1038/nature21059</pub-id> <pub-id pub-id-type="pmid">28005056</pub-id></citation></ref>
<ref id="B13"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Burstein</surname> <given-names>D.</given-names></name> <name><surname>Sun</surname> <given-names>C. L.</given-names></name> <name><surname>Brown</surname> <given-names>C. T.</given-names></name> <name><surname>Sharon</surname> <given-names>I.</given-names></name> <name><surname>Anantharaman</surname> <given-names>K.</given-names></name> <name><surname>Probst</surname> <given-names>A. J.</given-names></name><etal/></person-group> (<year>2016</year>). <article-title>Major bacterial lineages are essentially devoid of CRISPR-Cas viral defence systems.</article-title> <source><italic>Nat. Commun.</italic></source> <volume>7</volume>:<issue>10613</issue>. <pub-id pub-id-type="doi">10.1038/ncomms10613</pub-id> <pub-id pub-id-type="pmid">26837824</pub-id></citation></ref>
<ref id="B14"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cai</surname> <given-names>F.</given-names></name> <name><surname>Axen</surname> <given-names>S. D.</given-names></name> <name><surname>Kerfeld</surname> <given-names>C. A.</given-names></name></person-group> (<year>2013</year>). <article-title>Evidence for the widespread distribution of CRISPR-Cas system in the Phylum Cyanobacteria.</article-title> <source><italic>RNA Biol.</italic></source> <volume>10</volume> <fpage>687</fpage>&#x2013;<lpage>693</lpage>. <pub-id pub-id-type="doi">10.4161/rna.24571</pub-id> <pub-id pub-id-type="pmid">23628889</pub-id></citation></ref>
<ref id="B15"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cardenas</surname> <given-names>J. P.</given-names></name> <name><surname>Quatrini</surname> <given-names>R.</given-names></name> <name><surname>Holmes</surname> <given-names>D. S.</given-names></name></person-group> (<year>2016</year>). <article-title>Aerobic lineage of the oxidative stress response protein rubrerythrin emerged in an ancient microaerobic, (hyper)thermophilic environment.</article-title> <source><italic>Front. Microbiol.</italic></source> <volume>7</volume>:<issue>1822</issue>. <pub-id pub-id-type="doi">10.3389/fmicb.2016.01822</pub-id> <pub-id pub-id-type="pmid">27917155</pub-id></citation></ref>
<ref id="B16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chaumeil</surname> <given-names>P. A.</given-names></name> <name><surname>Mussig</surname> <given-names>A. J.</given-names></name> <name><surname>Hugenholtz</surname> <given-names>P.</given-names></name> <name><surname>Parks</surname> <given-names>D. H.</given-names></name></person-group> (<year>2020</year>). <article-title>GTDB-Tk: A toolkit to classify genomes with the genome taxonomy database.</article-title> <source><italic>Bioinformatics</italic></source> <volume>36</volume> <fpage>1925</fpage>&#x2013;<lpage>1927</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btz848</pub-id> <pub-id pub-id-type="pmid">31730192</pub-id></citation></ref>
<ref id="B17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>L. X.</given-names></name> <name><surname>Al-Shayeb</surname> <given-names>B.</given-names></name> <name><surname>M&#x00E9;heust</surname> <given-names>R.</given-names></name> <name><surname>Li</surname> <given-names>W. J.</given-names></name> <name><surname>Doudna</surname> <given-names>J. A.</given-names></name> <name><surname>Banfield</surname> <given-names>J. F.</given-names></name></person-group> (<year>2019</year>). <article-title>Candidate phyla radiation roizmanbacteria from hot springs have novel and unexpectedly abundant CRISPR-cas systems.</article-title> <source><italic>Front. Microbiol.</italic></source> <volume>10</volume>:<issue>928</issue>. <pub-id pub-id-type="doi">10.3389/fmicb.2019.00928</pub-id> <pub-id pub-id-type="pmid">31130929</pub-id></citation></ref>
<ref id="B18"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Childs</surname> <given-names>L. M.</given-names></name> <name><surname>England</surname> <given-names>W. E.</given-names></name> <name><surname>Young</surname> <given-names>M. J.</given-names></name> <name><surname>Weitz</surname> <given-names>J. S.</given-names></name> <name><surname>Whitaker</surname> <given-names>R. J.</given-names></name></person-group> (<year>2014</year>). <article-title>CRISPR-induced distributed immunity in microbial populations.</article-title> <source><italic>PLoS One</italic></source> <volume>9</volume>:<issue>e101710</issue>. <pub-id pub-id-type="doi">10.1371/journal.pone.0101710</pub-id> <pub-id pub-id-type="pmid">25000306</pub-id></citation></ref>
<ref id="B19"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Coelho</surname> <given-names>L. P.</given-names></name> <name><surname>Alves</surname> <given-names>R.</given-names></name> <name><surname>del R&#x00ED;o</surname> <given-names>&#x00C1;R.</given-names></name> <name><surname>Myers</surname> <given-names>P. N.</given-names></name> <name><surname>Cantalapiedra</surname> <given-names>C. P.</given-names></name> <name><surname>Giner-Lamia</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2022</year>). <article-title>Towards the biogeography of prokaryotic genes.</article-title> <source><italic>Nature</italic></source> <volume>601</volume> <fpage>252</fpage>&#x2013;<lpage>256</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-021-04233-4</pub-id> <pub-id pub-id-type="pmid">34912116</pub-id></citation></ref>
<ref id="B20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Crawley</surname> <given-names>A. B.</given-names></name> <name><surname>Henriksen</surname> <given-names>J. R.</given-names></name> <name><surname>Barrangou</surname> <given-names>R.</given-names></name></person-group> (<year>2018</year>). <article-title>CRISPRdisco: An automated pipeline for the discovery and analysis of CRISPR-Cas systems.</article-title> <source><italic>Cris. J.</italic></source> <volume>1</volume> <fpage>171</fpage>&#x2013;<lpage>181</lpage>. <pub-id pub-id-type="doi">10.1089/crispr.2017.0022</pub-id> <pub-id pub-id-type="pmid">31021201</pub-id></citation></ref>
<ref id="B21"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Drake</surname> <given-names>J. W.</given-names></name></person-group> (<year>2009</year>). <article-title>Avoiding dangerous missense: Thermophiles display especially low mutation rates.</article-title> <source><italic>PLoS Genet.</italic></source> <volume>5</volume>:<issue>e1000520</issue>. <pub-id pub-id-type="doi">10.1371/journal.pgen.1000520</pub-id> <pub-id pub-id-type="pmid">19543367</pub-id></citation></ref>
<ref id="B22"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Eddy</surname> <given-names>S. R.</given-names></name></person-group> (<year>2011</year>). <article-title>Accelerated profile HMM searches.</article-title> <source><italic>PLoS Comput. Biol.</italic></source> <volume>7</volume>:<issue>1002195</issue>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1002195</pub-id> <pub-id pub-id-type="pmid">22039361</pub-id></citation></ref>
<ref id="B23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fidler</surname> <given-names>D. R.</given-names></name> <name><surname>Murphy</surname> <given-names>S. E.</given-names></name> <name><surname>Courtis</surname> <given-names>K.</given-names></name> <name><surname>Antonoudiou</surname> <given-names>P.</given-names></name> <name><surname>El-Tohamy</surname> <given-names>R.</given-names></name> <name><surname>Ient</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2016</year>). <article-title>Using HHsearch to tackle proteins of unknown function: A pilot study with PH domains.</article-title> <source><italic>Traffic</italic></source> <volume>17</volume> <fpage>1214</fpage>&#x2013;<lpage>1226</lpage>. <pub-id pub-id-type="doi">10.1111/tra.12432</pub-id> <pub-id pub-id-type="pmid">27601190</pub-id></citation></ref>
<ref id="B24"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Finsinger</surname> <given-names>K.</given-names></name> <name><surname>Scholz</surname> <given-names>I.</given-names></name> <name><surname>Serrano</surname> <given-names>A.</given-names></name> <name><surname>Morales</surname> <given-names>S.</given-names></name> <name><surname>Uribe-Lorio</surname> <given-names>L.</given-names></name> <name><surname>Mora</surname> <given-names>M.</given-names></name><etal/></person-group> (<year>2008</year>). <article-title>Characterization of true-branching cyanobacteria from geothermal sites and hot springs of Costa Rica.</article-title> <source><italic>Environ. Microbiol.</italic></source> <volume>10</volume> <fpage>460</fpage>&#x2013;<lpage>473</lpage>. <pub-id pub-id-type="doi">10.1111/j.1462-2920.2007.01467.x</pub-id> <pub-id pub-id-type="pmid">18093164</pub-id></citation></ref>
<ref id="B25"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fu</surname> <given-names>L.</given-names></name> <name><surname>Niu</surname> <given-names>B.</given-names></name> <name><surname>Zhu</surname> <given-names>Z.</given-names></name> <name><surname>Wu</surname> <given-names>S.</given-names></name> <name><surname>Li</surname> <given-names>W.</given-names></name></person-group> (<year>2012</year>). <article-title>CD-HIT: Accelerated for clustering the next-generation sequencing data.</article-title> <source><italic>Bioinformatics</italic></source> <volume>28</volume> <fpage>3150</fpage>&#x2013;<lpage>3152</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bts565</pub-id> <pub-id pub-id-type="pmid">23060610</pub-id></citation></ref>
<ref id="B26"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Glennon</surname> <given-names>J. A.</given-names></name> <name><surname>Pfaff</surname> <given-names>R. M.</given-names></name></person-group> (<year>2003</year>). <article-title>The extraordinary thermal activity of El tatio geyser field, antofagasta region, Chile.</article-title> <source><italic>GOSA Trans.</italic></source> <volume>8</volume> <fpage>31</fpage>&#x2013;<lpage>78</lpage>.</citation></ref>
<ref id="B27"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gremme</surname> <given-names>G.</given-names></name> <name><surname>Steinbiss</surname> <given-names>S.</given-names></name> <name><surname>Kurtz</surname> <given-names>S.</given-names></name></person-group> (<year>2013</year>). <article-title>GenomeTools: A comprehensive software library for efficient processing of structured genome annotations.</article-title> <source><italic>IEEE/ACM Trans. Comput. Biol. Bioinform.</italic></source> <volume>10</volume> <fpage>645</fpage>&#x2013;<lpage>656</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2013.68</pub-id> <pub-id pub-id-type="pmid">24091398</pub-id></citation></ref>
<ref id="B28"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guajardo-Leiva</surname> <given-names>S.</given-names></name> <name><surname>Pedr&#x00F3;s-Ali&#x00F3;</surname> <given-names>C.</given-names></name> <name><surname>Salgado</surname> <given-names>O.</given-names></name> <name><surname>Pinto</surname> <given-names>F.</given-names></name> <name><surname>D&#x00ED;ez</surname> <given-names>B.</given-names></name></person-group> (<year>2018</year>). <article-title>Active crossfire between cyanobacteria and cyanophages in phototrophic mat communities within hot springs.</article-title> <source><italic>Front. Microbiol.</italic></source> <volume>9</volume>:<issue>2039</issue>. <pub-id pub-id-type="doi">10.3389/fmicb.2018.02039</pub-id> <pub-id pub-id-type="pmid">30233525</pub-id></citation></ref>
<ref id="B29"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hickman</surname> <given-names>A. B.</given-names></name> <name><surname>Dyda</surname> <given-names>F.</given-names></name></person-group> (<year>2015</year>). <article-title>The casposon-encoded Cas1 protein from <italic>Aciduliprofundum boonei</italic> is a DNA integrase that generates target site duplications.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>43</volume> <fpage>10576</fpage>&#x2013;<lpage>10587</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkv1180</pub-id> <pub-id pub-id-type="pmid">26573596</pub-id></citation></ref>
<ref id="B30"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hille</surname> <given-names>F.</given-names></name> <name><surname>Richter</surname> <given-names>H.</given-names></name> <name><surname>Wong</surname> <given-names>S. P.</given-names></name> <name><surname>Bratovi&#x010D;</surname> <given-names>M.</given-names></name> <name><surname>Ressel</surname> <given-names>S.</given-names></name> <name><surname>Charpentier</surname> <given-names>E.</given-names></name></person-group> (<year>2018</year>). <article-title>The biology of CRISPR-Cas: Backward and Forward.</article-title> <source><italic>Cell</italic></source> <volume>172</volume> <fpage>1239</fpage>&#x2013;<lpage>1259</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2017.11.032</pub-id> <pub-id pub-id-type="pmid">29522745</pub-id></citation></ref>
<ref id="B31"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hoang</surname> <given-names>D. T.</given-names></name> <name><surname>Chernomor</surname> <given-names>O.</given-names></name> <name><surname>Von Haeseler</surname> <given-names>A.</given-names></name> <name><surname>Minh</surname> <given-names>B. Q.</given-names></name> <name><surname>Vinh</surname> <given-names>L. S.</given-names></name></person-group> (<year>2018</year>). <article-title>UFBoot2: Improving the ultrafast bootstrap approximation.</article-title> <source><italic>Mol. Biol. Evol.</italic></source> <volume>35</volume> <fpage>518</fpage>&#x2013;<lpage>522</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msx281</pub-id> <pub-id pub-id-type="pmid">29077904</pub-id></citation></ref>
<ref id="B32"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hyatt</surname> <given-names>D.</given-names></name> <name><surname>Chen</surname> <given-names>G. L.</given-names></name> <name><surname>LoCascio</surname> <given-names>P. F.</given-names></name> <name><surname>Land</surname> <given-names>M. L.</given-names></name> <name><surname>Larimer</surname> <given-names>F. W.</given-names></name> <name><surname>Hauser</surname> <given-names>L. J.</given-names></name></person-group> (<year>2010</year>). <article-title>Prodigal: Prokaryotic gene recognition and translation initiation site identification.</article-title> <source><italic>BMC Bioinformatics</italic></source> <volume>11</volume>:<issue>119</issue>. <pub-id pub-id-type="doi">10.1186/1471-2105-11-119</pub-id> <pub-id pub-id-type="pmid">20211023</pub-id></citation></ref>
<ref id="B33"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Inskeep</surname> <given-names>W. P.</given-names></name> <name><surname>Jay</surname> <given-names>Z. J.</given-names></name> <name><surname>Tringe</surname> <given-names>S. G.</given-names></name> <name><surname>Herrg&#x00E5;rd</surname> <given-names>M. J.</given-names></name> <name><surname>Rusch</surname> <given-names>D. B.</given-names></name></person-group> (<year>2013</year>). <article-title>The YNP metagenome project: Environmental parameters responsible for microbial distribution in the yellowstone geothermal ecosystem.</article-title> <source><italic>Front. Microbiol.</italic></source> <volume>4</volume>:<issue>67</issue>. <pub-id pub-id-type="doi">10.3389/fmicb.2013.00067</pub-id> <pub-id pub-id-type="pmid">23653623</pub-id></citation></ref>
<ref id="B34"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Inskeep</surname> <given-names>W. P.</given-names></name> <name><surname>Rusch</surname> <given-names>D. B.</given-names></name> <name><surname>Jay</surname> <given-names>Z. J.</given-names></name> <name><surname>Herrgard</surname> <given-names>M. J.</given-names></name> <name><surname>Kozubal</surname> <given-names>M. A.</given-names></name> <name><surname>Richardson</surname> <given-names>T. H.</given-names></name><etal/></person-group> (<year>2010</year>). <article-title>Metagenomes from high-temperature chemotrophic systems reveal geochemical controls on microbial community structure and function.</article-title> <source><italic>PLoS One</italic></source> <volume>5</volume>:<issue>e9773</issue>. <pub-id pub-id-type="doi">10.1371/journal.pone.0009773</pub-id> <pub-id pub-id-type="pmid">20333304</pub-id></citation></ref>
<ref id="B35"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ionescu</surname> <given-names>D.</given-names></name> <name><surname>Hindiyeh</surname> <given-names>M.</given-names></name> <name><surname>Malkawi</surname> <given-names>H.</given-names></name> <name><surname>Oren</surname> <given-names>A.</given-names></name></person-group> (<year>2010</year>). <article-title>Biogeography of thermophilic cyanobacteria: Insights from the Zerka Ma&#x2019;in hot springs (Jordan).</article-title> <source><italic>FEMS Microbiol. Ecol.</italic></source> <volume>72</volume> <fpage>103</fpage>&#x2013;<lpage>113</lpage>. <pub-id pub-id-type="doi">10.1111/j.1574-6941.2010.00835.x</pub-id> <pub-id pub-id-type="pmid">20180851</pub-id></citation></ref>
<ref id="B36"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Iranzo</surname> <given-names>J.</given-names></name> <name><surname>Lobkovsky</surname> <given-names>A. E.</given-names></name> <name><surname>Wolf</surname> <given-names>Y. I.</given-names></name> <name><surname>Koonin</surname> <given-names>E. V.</given-names></name></person-group> (<year>2013</year>). <article-title>Evolutionary dynamics of the prokaryotic adaptive immunity system CRISPR-Cas in an explicit ecological context.</article-title> <source><italic>J. Bacteriol.</italic></source> <volume>195</volume> <fpage>3834</fpage>&#x2013;<lpage>3844</lpage>. <pub-id pub-id-type="doi">10.1128/JB.00412-13</pub-id> <pub-id pub-id-type="pmid">23794616</pub-id></citation></ref>
<ref id="B37"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jackson</surname> <given-names>S. A.</given-names></name> <name><surname>McKenzie</surname> <given-names>R. E.</given-names></name> <name><surname>Fagerlund</surname> <given-names>R. D.</given-names></name> <name><surname>Kieper</surname> <given-names>S. N.</given-names></name> <name><surname>Fineran</surname> <given-names>P. C.</given-names></name> <name><surname>Brouns</surname> <given-names>S. J. J.</given-names></name></person-group> (<year>2017</year>). <article-title>CRISPR-Cas: Adapting to change.</article-title> <source><italic>Science</italic></source> <volume>356</volume>:<issue>eaal5056</issue>. <pub-id pub-id-type="doi">10.1126/science.aal5056</pub-id> <pub-id pub-id-type="pmid">28385959</pub-id></citation></ref>
<ref id="B38"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jung</surname> <given-names>T. Y.</given-names></name> <name><surname>Park</surname> <given-names>K. H.</given-names></name> <name><surname>An</surname> <given-names>Y.</given-names></name> <name><surname>Schulga</surname> <given-names>A.</given-names></name> <name><surname>Deyev</surname> <given-names>S.</given-names></name> <name><surname>Jung</surname> <given-names>J. H.</given-names></name><etal/></person-group> (<year>2016</year>). <article-title>Structural features of Cas2 from <italic>Thermococcus onnurineus</italic> in CRISPR-cas system type IV.</article-title> <source><italic>Protein Sci.</italic></source> <volume>25</volume> <fpage>1890</fpage>&#x2013;<lpage>1897</lpage>. <pub-id pub-id-type="doi">10.1002/pro.2981</pub-id> <pub-id pub-id-type="pmid">27400737</pub-id></citation></ref>
<ref id="B39"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Klatt</surname> <given-names>C. G.</given-names></name> <name><surname>Inskeep</surname> <given-names>W. P.</given-names></name> <name><surname>Herrgard</surname> <given-names>M. J.</given-names></name> <name><surname>Jay</surname> <given-names>Z. J.</given-names></name> <name><surname>Rusch</surname> <given-names>D. B.</given-names></name> <name><surname>Tringe</surname> <given-names>S. G.</given-names></name><etal/></person-group> (<year>2013</year>). <article-title>Community structure and function of high-temperature chlorophototrophic microbial mats inhabiting diverse geothermal environments.</article-title> <source><italic>Front. Microbiol.</italic></source> <volume>4</volume>:<issue>106</issue>. <pub-id pub-id-type="doi">10.3389/fmicb.2013.00106</pub-id> <pub-id pub-id-type="pmid">23761787</pub-id></citation></ref>
<ref id="B40"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Klatt</surname> <given-names>C. G.</given-names></name> <name><surname>Wood</surname> <given-names>J. M.</given-names></name> <name><surname>Rusch</surname> <given-names>D. B.</given-names></name> <name><surname>Bateson</surname> <given-names>M. M.</given-names></name> <name><surname>Hamamura</surname> <given-names>N.</given-names></name> <name><surname>Heidelberg</surname> <given-names>J. F.</given-names></name><etal/></person-group> (<year>2011</year>). <article-title>Community ecology of hot spring cyanobacterial mats: Predominant populations and their functional potential.</article-title> <source><italic>ISME J.</italic></source> <volume>5</volume> <fpage>1262</fpage>&#x2013;<lpage>1278</lpage>. <pub-id pub-id-type="doi">10.1038/ismej.2011.73</pub-id> <pub-id pub-id-type="pmid">21697961</pub-id></citation></ref>
<ref id="B41"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Koonin</surname> <given-names>E. V.</given-names></name> <name><surname>Krupovic</surname> <given-names>M.</given-names></name></person-group> (<year>2015</year>). <article-title>Evolution of adaptive immunity from transposable elements combined with innate immune systems.</article-title> <source><italic>Nat. Rev. Genet.</italic></source> <volume>16</volume> <fpage>184</fpage>&#x2013;<lpage>192</lpage>. <pub-id pub-id-type="doi">10.1038/nrg3859</pub-id> <pub-id pub-id-type="pmid">25488578</pub-id></citation></ref>
<ref id="B42"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Koonin</surname> <given-names>E. V.</given-names></name> <name><surname>Makarova</surname> <given-names>K. S.</given-names></name></person-group> (<year>2019</year>). <article-title>Origins and evolution of CRISPR-Cas systems.</article-title> <source><italic>Philos. Trans. R. Soc. Lond. B Biol. Sci.</italic></source> <volume>374</volume>:<issue>20180087</issue>. <pub-id pub-id-type="doi">10.1098/rstb.2018.0087</pub-id> <pub-id pub-id-type="pmid">30905284</pub-id></citation></ref>
<ref id="B43"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Koonin</surname> <given-names>E. V.</given-names></name> <name><surname>Makarova</surname> <given-names>K. S.</given-names></name> <name><surname>Zhang</surname> <given-names>F.</given-names></name></person-group> (<year>2017</year>). <article-title>Diversity, classification and evolution of CRISPR-Cas systems.</article-title> <source><italic>Curr. Opin. Microbiol.</italic></source> <volume>37</volume> <fpage>67</fpage>&#x2013;<lpage>78</lpage>. <pub-id pub-id-type="doi">10.1016/j.mib.2017.05.008</pub-id> <pub-id pub-id-type="pmid">28605718</pub-id></citation></ref>
<ref id="B44"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Koonin</surname> <given-names>E. V.</given-names></name> <name><surname>Makarova</surname> <given-names>K. S.</given-names></name> <name><surname>Wolf</surname> <given-names>Y. I.</given-names></name> <name><surname>Krupovic</surname> <given-names>M.</given-names></name></person-group> (<year>2020</year>). <article-title>Evolutionary entanglement of mobile genetic elements and host defence systems: Guns for hire.</article-title> <source><italic>Nat. Rev. Genet.</italic></source> <volume>21</volume> <fpage>119</fpage>&#x2013;<lpage>131</lpage>. <pub-id pub-id-type="doi">10.1038/s41576-019-0172-9</pub-id> <pub-id pub-id-type="pmid">31611667</pub-id></citation></ref>
<ref id="B45"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Krishnan</surname> <given-names>A.</given-names></name> <name><surname>Burroughs</surname> <given-names>A. M.</given-names></name> <name><surname>Iyer</surname> <given-names>L. M.</given-names></name> <name><surname>Aravind</surname> <given-names>L.</given-names></name></person-group> (<year>2020</year>). <article-title>Comprehensive classification of ABC ATPases and their functional radiation in nucleoprotein dynamics and biological conflict systems.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>48</volume> <fpage>10045</fpage>&#x2013;<lpage>10075</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkaa726</pub-id> <pub-id pub-id-type="pmid">32894288</pub-id></citation></ref>
<ref id="B46"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Krupovic</surname> <given-names>M.</given-names></name> <name><surname>Koonin</surname> <given-names>E. V.</given-names></name></person-group> (<year>2016</year>). <article-title>Self-synthesizing transposons: Unexpected key players in the evolution of viruses and defense systems.</article-title> <source><italic>Curr. Opin. Microbiol.</italic></source> <volume>31</volume> <fpage>25</fpage>&#x2013;<lpage>33</lpage>. <pub-id pub-id-type="doi">10.1016/j.mib.2016.01.006</pub-id> <pub-id pub-id-type="pmid">26836982</pub-id></citation></ref>
<ref id="B47"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Krupovic</surname> <given-names>M.</given-names></name> <name><surname>B&#x00E9;guin</surname> <given-names>P.</given-names></name> <name><surname>Koonin</surname> <given-names>E. V.</given-names></name></person-group> (<year>2017</year>). <article-title>Casposons: Mobile genetic elements that gave rise to the CRISPR-Cas adaptation machinery.</article-title> <source><italic>Curr. Opin. Microbiol.</italic></source> <volume>38</volume> <fpage>36</fpage>&#x2013;<lpage>43</lpage>. <pub-id pub-id-type="doi">10.1016/j.mib.2017.04.004</pub-id> <pub-id pub-id-type="pmid">28472712</pub-id></citation></ref>
<ref id="B48"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Krupovic</surname> <given-names>M.</given-names></name> <name><surname>Makarova</surname> <given-names>K. S.</given-names></name> <name><surname>Forterre</surname> <given-names>P.</given-names></name> <name><surname>Prangishvili</surname> <given-names>D.</given-names></name> <name><surname>Koonin</surname> <given-names>E. V.</given-names></name></person-group> (<year>2014</year>). <article-title>Casposons: A new superfamily of self-synthesizing DNA transposons at the origin of prokaryotic CRISPR-Cas immunity.</article-title> <source><italic>BMC Biol.</italic></source> <volume>12</volume>:<issue>36</issue>. <pub-id pub-id-type="doi">10.1186/1741-7007-12-36</pub-id> <pub-id pub-id-type="pmid">24884953</pub-id></citation></ref>
<ref id="B49"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Krupovic</surname> <given-names>M.</given-names></name> <name><surname>Shmakov</surname> <given-names>S.</given-names></name> <name><surname>Makarova</surname> <given-names>K. S.</given-names></name> <name><surname>Forterre</surname> <given-names>P.</given-names></name> <name><surname>Koonin</surname> <given-names>E. V.</given-names></name></person-group> (<year>2016</year>). <article-title>Recent mobility of casposons, self-synthesizing transposons at the origin of the CRISPR-cas immunity.</article-title> <source><italic>Genome Biol. Evol.</italic></source> <volume>8</volume> <fpage>375</fpage>&#x2013;<lpage>386</lpage>. <pub-id pub-id-type="doi">10.1093/gbe/evw006</pub-id> <pub-id pub-id-type="pmid">26764427</pub-id></citation></ref>
<ref id="B50"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kunin</surname> <given-names>V.</given-names></name> <name><surname>He</surname> <given-names>S.</given-names></name> <name><surname>Warnecke</surname> <given-names>F.</given-names></name> <name><surname>Peterson</surname> <given-names>S. B.</given-names></name> <name><surname>Garcia Martin</surname> <given-names>H.</given-names></name> <name><surname>Haynes</surname> <given-names>M.</given-names></name><etal/></person-group> (<year>2008</year>). <article-title>A bacterial metapopulation adapts locally to phage predation despite global dispersal.</article-title> <source><italic>Genome Res.</italic></source> <volume>18</volume> <fpage>293</fpage>&#x2013;<lpage>297</lpage>. <pub-id pub-id-type="doi">10.1101/gr.6835308</pub-id> <pub-id pub-id-type="pmid">18077539</pub-id></citation></ref>
<ref id="B51"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Landsberger</surname> <given-names>M.</given-names></name> <name><surname>Gandon</surname> <given-names>S.</given-names></name> <name><surname>Meaden</surname> <given-names>S.</given-names></name> <name><surname>Rollie</surname> <given-names>C.</given-names></name> <name><surname>Chevallereau</surname> <given-names>A.</given-names></name> <name><surname>Chabas</surname> <given-names>H.</given-names></name><etal/></person-group> (<year>2018</year>). <article-title>Anti-CRISPR Phages Cooperate to Overcome CRISPR-Cas Immunity.</article-title> <source><italic>Cell</italic></source> <volume>174</volume> <fpage>908</fpage>&#x2013;<lpage>916.e12</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2018.05.058</pub-id> <pub-id pub-id-type="pmid">30033365</pub-id></citation></ref>
<ref id="B52"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Langmead</surname> <given-names>B.</given-names></name> <name><surname>Salzberg</surname> <given-names>S. L.</given-names></name></person-group> (<year>2012</year>). <article-title>Fast gapped-read alignment with Bowtie 2.</article-title> <source><italic>Nat. Methods</italic></source> <volume>9</volume> <fpage>357</fpage>&#x2013;<lpage>359</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.1923</pub-id> <pub-id pub-id-type="pmid">22388286</pub-id></citation></ref>
<ref id="B53"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Le</surname> <given-names>S. Q.</given-names></name> <name><surname>Gascuel</surname> <given-names>O.</given-names></name></person-group> (<year>2008</year>). <article-title>An improved general amino acid replacement matrix.</article-title> <source><italic>Mol. Biol. Evol.</italic></source> <volume>25</volume> <fpage>1307</fpage>&#x2013;<lpage>1320</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msn067</pub-id> <pub-id pub-id-type="pmid">18367465</pub-id></citation></ref>
<ref id="B54"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Letunic</surname> <given-names>I.</given-names></name> <name><surname>Bork</surname> <given-names>P.</given-names></name></person-group> (<year>2019</year>). <article-title>Interactive Tree of Life (iTOL) v4: Recent updates and new developments.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>47</volume> <fpage>W256</fpage>&#x2013;<lpage>W259</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkz239</pub-id> <pub-id pub-id-type="pmid">30931475</pub-id></citation></ref>
<ref id="B55"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>D.</given-names></name> <name><surname>Liu</surname> <given-names>C. M.</given-names></name> <name><surname>Luo</surname> <given-names>R.</given-names></name> <name><surname>Sadakane</surname> <given-names>K.</given-names></name> <name><surname>Lam</surname> <given-names>T. W.</given-names></name></person-group> (<year>2015</year>). <article-title>MEGAHIT: An ultra-fast single-node solution for large and complex metagenomics assembly via succinct de Bruijn graph.</article-title> <source><italic>Bioinformatics</italic></source> <volume>31</volume> <fpage>1674</fpage>&#x2013;<lpage>1676</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btv033</pub-id> <pub-id pub-id-type="pmid">25609793</pub-id></citation></ref>
<ref id="B56"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>S. J.</given-names></name> <name><surname>Hua</surname> <given-names>Z. S.</given-names></name> <name><surname>Huang</surname> <given-names>L. N.</given-names></name> <name><surname>Li</surname> <given-names>J.</given-names></name> <name><surname>Shi</surname> <given-names>S. H.</given-names></name> <name><surname>Chen</surname> <given-names>L. X.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>Microbial communities evolve faster in extreme environments.</article-title> <source><italic>Sci. Rep.</italic></source> <volume>4</volume>:<issue>6205</issue>. <pub-id pub-id-type="doi">10.1038/srep06205</pub-id> <pub-id pub-id-type="pmid">25158668</pub-id></citation></ref>
<ref id="B57"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>L&#x00F3;pez-L&#x00F3;pez</surname> <given-names>O.</given-names></name> <name><surname>Cerd&#x00E1;n</surname> <given-names>M. E.</given-names></name> <name><surname>Gonz&#x00E1;lez-Siso</surname> <given-names>M. I.</given-names></name></person-group> (<year>2013</year>). <article-title>Hot spring metagenomics.</article-title> <source><italic>Life</italic></source> <volume>3</volume> <fpage>308</fpage>&#x2013;<lpage>320</lpage>. <pub-id pub-id-type="doi">10.3390/life3020308</pub-id> <pub-id pub-id-type="pmid">25369743</pub-id></citation></ref>
<ref id="B58"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Madeira</surname> <given-names>F.</given-names></name> <name><surname>Park</surname> <given-names>Y. M.</given-names></name> <name><surname>Lee</surname> <given-names>J.</given-names></name> <name><surname>Buso</surname> <given-names>N.</given-names></name> <name><surname>Gur</surname> <given-names>T.</given-names></name> <name><surname>Madhusoodanan</surname> <given-names>N.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>The EMBL-EBI search and sequence analysis tools APIs in 2019.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>47</volume> <fpage>W636</fpage>&#x2013;<lpage>W641</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkz268</pub-id> <pub-id pub-id-type="pmid">30976793</pub-id></citation></ref>
<ref id="B59"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Makarova</surname> <given-names>K. S.</given-names></name> <name><surname>Haft</surname> <given-names>D. H.</given-names></name> <name><surname>Barrangou</surname> <given-names>R.</given-names></name> <name><surname>Brouns</surname> <given-names>S. J. J.</given-names></name> <name><surname>Charpentier</surname> <given-names>E.</given-names></name> <name><surname>Horvath</surname> <given-names>P.</given-names></name><etal/></person-group> (<year>2011</year>). <article-title>Evolution and classification of the CRISPR&#x2013;Cas systems.</article-title> <source><italic>Nat. Rev. Microbiol.</italic></source> <volume>9</volume> <fpage>467</fpage>&#x2013;<lpage>477</lpage>. <pub-id pub-id-type="doi">10.1038/nrmicro2577</pub-id> <pub-id pub-id-type="pmid">21552286</pub-id></citation></ref>
<ref id="B60"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Makarova</surname> <given-names>K. S.</given-names></name> <name><surname>Wolf</surname> <given-names>Y. I.</given-names></name> <name><surname>Alkhnbashi</surname> <given-names>O. S.</given-names></name> <name><surname>Costa</surname> <given-names>F.</given-names></name> <name><surname>Shah</surname> <given-names>S. A.</given-names></name> <name><surname>Saunders</surname> <given-names>S. J.</given-names></name><etal/></person-group> (<year>2015</year>). <article-title>An updated evolutionary classification of CRISPR&#x2013;Cas systems.</article-title> <source><italic>Nat. Rev. Microbiol.</italic></source> <volume>13</volume> <fpage>722</fpage>&#x2013;<lpage>736</lpage>. <pub-id pub-id-type="doi">10.1038/nrmicro3569</pub-id> <pub-id pub-id-type="pmid">26411297</pub-id></citation></ref>
<ref id="B61"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Makarova</surname> <given-names>K. S.</given-names></name> <name><surname>Wolf</surname> <given-names>Y. I.</given-names></name> <name><surname>Koonin</surname> <given-names>E. V.</given-names></name></person-group> (<year>2013</year>). <article-title>The basic building blocks and evolution of CRISPR-Cas systems.</article-title> <source><italic>Biochem. Soc. Trans.</italic></source> <volume>41</volume> <fpage>1392</fpage>&#x2013;<lpage>1400</lpage>. <pub-id pub-id-type="doi">10.1042/BST20130038</pub-id> <pub-id pub-id-type="pmid">24256226</pub-id></citation></ref>
<ref id="B62"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Makarova</surname> <given-names>K. S.</given-names></name> <name><surname>Wolf</surname> <given-names>Y. I.</given-names></name> <name><surname>Iranzo</surname> <given-names>J.</given-names></name> <name><surname>Shmakov</surname> <given-names>S. A.</given-names></name> <name><surname>Alkhnbashi</surname> <given-names>O. S.</given-names></name> <name><surname>Brouns</surname> <given-names>S. J. J.</given-names></name><etal/></person-group> (<year>2020a</year>). <article-title>Evolutionary classification of CRISPR&#x2013;Cas systems: A burst of class 2 and derived variants.</article-title> <source><italic>Nat. Rev. Microbiol.</italic></source> <volume>18</volume> <fpage>67</fpage>&#x2013;<lpage>83</lpage>. <pub-id pub-id-type="doi">10.1038/s41579-019-0299-x</pub-id> <pub-id pub-id-type="pmid">31857715</pub-id></citation></ref>
<ref id="B63"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Makarova</surname> <given-names>K. S.</given-names></name> <name><surname>Wolf</surname> <given-names>Y. I.</given-names></name> <name><surname>Shmakov</surname> <given-names>S. A.</given-names></name> <name><surname>Liu</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>M.</given-names></name> <name><surname>Koonin</surname> <given-names>E. V.</given-names></name></person-group> (<year>2020b</year>). <article-title>Unprecedented diversity of unique CRISPR-Cas-Related Systems and Cas1 Homologs in Asgard Archaea.</article-title> <source><italic>Cris. J.</italic></source> <volume>3</volume> <fpage>156</fpage>&#x2013;<lpage>163</lpage>. <pub-id pub-id-type="doi">10.1089/crispr.2020.0012</pub-id> <pub-id pub-id-type="pmid">33555973</pub-id></citation></ref>
<ref id="B64"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Marchler-Bauer</surname> <given-names>A.</given-names></name> <name><surname>Bryant</surname> <given-names>S. H.</given-names></name></person-group> (<year>2004</year>). <article-title>CD-Search: Protein domain annotations on the fly.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>32</volume> <fpage>W327</fpage>&#x2013;<lpage>W331</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkh454</pub-id> <pub-id pub-id-type="pmid">15215404</pub-id></citation></ref>
<ref id="B65"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Marchler-Bauer</surname> <given-names>A.</given-names></name> <name><surname>Bo</surname> <given-names>Y.</given-names></name> <name><surname>Han</surname> <given-names>L.</given-names></name> <name><surname>He</surname> <given-names>J.</given-names></name> <name><surname>Lanczycki</surname> <given-names>C. J.</given-names></name> <name><surname>Lu</surname> <given-names>S.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>CDD/SPARCLE: Functional classification of proteins via subfamily domain architectures.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>45</volume> <fpage>D200</fpage>&#x2013;<lpage>D203</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkw1129</pub-id> <pub-id pub-id-type="pmid">27899674</pub-id></citation></ref>
<ref id="B66"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Marchler-Bauer</surname> <given-names>A.</given-names></name> <name><surname>Panchenko</surname> <given-names>A. R.</given-names></name> <name><surname>Shoemarker</surname> <given-names>B. A.</given-names></name> <name><surname>Thiessen</surname> <given-names>P. A.</given-names></name> <name><surname>Geer</surname> <given-names>L. Y.</given-names></name> <name><surname>Bryant</surname> <given-names>S. H.</given-names></name></person-group> (<year>2002</year>). <article-title>CDD: A database of conserved domain alignments with links to domain three-dimensional structure.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>30</volume> <fpage>281</fpage>&#x2013;<lpage>283</lpage>. <pub-id pub-id-type="doi">10.1093/nar/30.1.281</pub-id> <pub-id pub-id-type="pmid">11752315</pub-id></citation></ref>
<ref id="B67"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Massello</surname> <given-names>F. L.</given-names></name> <name><surname>Chan</surname> <given-names>C. S.</given-names></name> <name><surname>Chan</surname> <given-names>K. G.</given-names></name> <name><surname>Goh</surname> <given-names>K. M.</given-names></name> <name><surname>Donati</surname> <given-names>E.</given-names></name> <name><surname>Urbieta</surname> <given-names>M. S.</given-names></name></person-group> (<year>2020</year>). <article-title>Meta-analysis of microbial communities in hot springs: Recurrent taxa and complex shaping factors beyond ph and temperature.</article-title> <source><italic>Microorganisms</italic></source> <volume>8</volume>:<issue>906</issue>. <pub-id pub-id-type="doi">10.3390/microorganisms8060906</pub-id> <pub-id pub-id-type="pmid">32560103</pub-id></citation></ref>
<ref id="B68"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>McGinn</surname> <given-names>J.</given-names></name> <name><surname>Marraffini</surname> <given-names>L. A.</given-names></name></person-group> (<year>2019</year>). <article-title>Molecular mechanisms of CRISPR&#x2013;Cas spacer acquisition.</article-title> <source><italic>Nat. Rev. Microbiol.</italic></source> <volume>17</volume> <fpage>7</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1038/s41579-018-0071-7</pub-id> <pub-id pub-id-type="pmid">30171202</pub-id></citation></ref>
<ref id="B69"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Meaden</surname> <given-names>S.</given-names></name> <name><surname>Capria</surname> <given-names>L.</given-names></name> <name><surname>Alseth</surname> <given-names>E.</given-names></name> <name><surname>Gandon</surname> <given-names>S.</given-names></name> <name><surname>Biswas</surname> <given-names>A.</given-names></name> <name><surname>Lenzi</surname> <given-names>L.</given-names></name><etal/></person-group> (<year>2021</year>). <article-title>Phage gene expression and host responses lead to infection-dependent costs of CRISPR immunity.</article-title> <source><italic>ISME J.</italic></source> <volume>15</volume> <fpage>534</fpage>&#x2013;<lpage>544</lpage>. <pub-id pub-id-type="doi">10.1038/s41396-020-00794-w</pub-id> <pub-id pub-id-type="pmid">33011743</pub-id></citation></ref>
<ref id="B70"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Merino</surname> <given-names>N.</given-names></name> <name><surname>Aronson</surname> <given-names>H. S.</given-names></name> <name><surname>Bojanova</surname> <given-names>D. P.</given-names></name> <name><surname>Feyhl-Buska</surname> <given-names>J.</given-names></name> <name><surname>Wong</surname> <given-names>M. L.</given-names></name> <name><surname>Zhang</surname> <given-names>S.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>Living at the extremes: Extremophiles and the limits of life in a planetary context.</article-title> <source><italic>Front. Microbiol.</italic></source> <volume>10</volume>:<issue>780</issue>. <pub-id pub-id-type="doi">10.3389/fmicb.2019.00780</pub-id> <pub-id pub-id-type="pmid">31037068</pub-id></citation></ref>
<ref id="B71"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Meyer-Dombard</surname> <given-names>D. R.</given-names></name> <name><surname>Shock</surname> <given-names>E. L.</given-names></name> <name><surname>Amend</surname> <given-names>J. P.</given-names></name></person-group> (<year>2005</year>). <article-title>Archaeal and bacterial communities in geochemically diverse hot springs of Yellowstone National Park, USA.</article-title> <source><italic>Geobiology</italic></source> <volume>3</volume> <fpage>211</fpage>&#x2013;<lpage>227</lpage>. <pub-id pub-id-type="doi">10.1111/j.1472-4669.2005.00052.x</pub-id></citation></ref>
<ref id="B72"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mohanraju</surname> <given-names>P.</given-names></name> <name><surname>Makarova</surname> <given-names>K. S.</given-names></name> <name><surname>Zetsche</surname> <given-names>B.</given-names></name> <name><surname>Zhang</surname> <given-names>F.</given-names></name> <name><surname>Koonin</surname> <given-names>E. V.</given-names></name> <name><surname>Van Der Oost</surname> <given-names>J.</given-names></name></person-group> (<year>2016</year>). <article-title>Diverse evolutionary roots and mechanistic variations of the CRISPR-Cas systems.</article-title> <source><italic>Science</italic></source> <volume>353</volume> <issue>aad5147</issue>. <pub-id pub-id-type="doi">10.1126/science.aad5147</pub-id> <pub-id pub-id-type="pmid">27493190</pub-id></citation></ref>
<ref id="B73"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mohanraju</surname> <given-names>P.</given-names></name> <name><surname>Saha</surname> <given-names>C.</given-names></name> <name><surname>van Baarlen</surname> <given-names>P.</given-names></name> <name><surname>Louwen</surname> <given-names>R.</given-names></name> <name><surname>Staals</surname> <given-names>R. H. J.</given-names></name> <name><surname>van der Oost</surname> <given-names>J.</given-names></name></person-group> (<year>2022</year>). <article-title>Alternative functions of CRISPR&#x2013;Cas systems in the evolutionary arms race.</article-title> <source><italic>Nat. Rev. Microbiol.</italic></source> <volume>20</volume> <fpage>351</fpage>&#x2013;<lpage>364</lpage>. <pub-id pub-id-type="doi">10.1038/s41579-021-00663-z</pub-id> <pub-id pub-id-type="pmid">34992260</pub-id></citation></ref>
<ref id="B74"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Moya-Beltr&#x00E1;n</surname> <given-names>A.</given-names></name> <name><surname>Makarova</surname> <given-names>K. S.</given-names></name> <name><surname>Acu&#x00F1;a</surname> <given-names>L. G.</given-names></name> <name><surname>Wolf</surname> <given-names>Y. I.</given-names></name> <name><surname>Covarrubias</surname> <given-names>P. C.</given-names></name> <name><surname>Shmakov</surname> <given-names>S. A.</given-names></name><etal/></person-group> (<year>2021</year>). <article-title>Evolution of Type IV CRISPR-Cas systems: Insights from CRISPR loci in integrative conjugative elements of Acidithiobacillia.</article-title> <source><italic>Cris. J.</italic></source> <volume>4</volume> <fpage>656</fpage>&#x2013;<lpage>672</lpage>. <pub-id pub-id-type="doi">10.1089/crispr.2021.0051</pub-id> <pub-id pub-id-type="pmid">34582696</pub-id></citation></ref>
<ref id="B75"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Moya-Beltr&#x00E1;n</surname> <given-names>A.</given-names></name> <name><surname>Rojas-Villalobos</surname> <given-names>C.</given-names></name> <name><surname>D&#x00ED;az</surname> <given-names>M.</given-names></name> <name><surname>Guiliani</surname> <given-names>N.</given-names></name> <name><surname>Quatrini</surname> <given-names>R.</given-names></name> <name><surname>Castro</surname> <given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>Nucleotide second messenger-based signaling in extreme acidophiles of the Acidithiobacillus species complex: Partition between the core and variable gene complements.</article-title> <source><italic>Front. Microbiol.</italic></source> <volume>10</volume>:<issue>381</issue>. <pub-id pub-id-type="doi">10.3389/fmicb.2019.00381</pub-id> <pub-id pub-id-type="pmid">30899248</pub-id></citation></ref>
<ref id="B76"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nguyen</surname> <given-names>L.-T.</given-names></name> <name><surname>Schmidt</surname> <given-names>H. A.</given-names></name> <name><surname>von Haeseler</surname> <given-names>A.</given-names></name> <name><surname>Minh</surname> <given-names>B. Q.</given-names></name></person-group> (<year>2015</year>). <article-title>IQ-TREE: A fast and effective stochastic algorithm for estimating maximum-likelihood phylogenies.</article-title> <source><italic>Mol. Biol. Evol.</italic></source> <volume>32</volume> <fpage>268</fpage>&#x2013;<lpage>274</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msu300</pub-id> <pub-id pub-id-type="pmid">25371430</pub-id></citation></ref>
<ref id="B77"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Oksanen</surname> <given-names>J.</given-names></name> <name><surname>Blanchet</surname> <given-names>F. G.</given-names></name> <name><surname>Kindt</surname> <given-names>R.</given-names></name> <name><surname>Legendre</surname> <given-names>P.</given-names></name> <name><surname>Minchin</surname> <given-names>P. R.</given-names></name> <name><surname>O&#x2019;Hara</surname> <given-names>R. B.</given-names></name><etal/></person-group> (<year>2020</year>). <source><italic>Vegan community ecology package: Ordination methods, diversity analysis and other functions for community and vegetation ecologists. R Packag. version 2.5-7.</italic></source> <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="http://apps.worldagroforestry.org/publication/vegan-community-ecology-package-ordination-methods-diversity-analysis-and-other">http://apps.worldagroforestry.org/publication/vegan-community-ecology-package-ordination-methods-diversity-analysis-and-other</ext-link> <comment>(accessed May 21, 2021)</comment>.</citation></ref>
<ref id="B78"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Parmar</surname> <given-names>K.</given-names></name> <name><surname>Dafale</surname> <given-names>N.</given-names></name> <name><surname>Pal</surname> <given-names>R.</given-names></name> <name><surname>Tikariha</surname> <given-names>H.</given-names></name> <name><surname>Purohit</surname> <given-names>H.</given-names></name></person-group> (<year>2018</year>). <article-title>An insight into phage diversity at environmental habitats using comparative metagenomics approach.</article-title> <source><italic>Curr. Microbiol.</italic></source> <volume>75</volume> <fpage>132</fpage>&#x2013;<lpage>141</lpage>. <pub-id pub-id-type="doi">10.1007/s00284-017-1357-0</pub-id> <pub-id pub-id-type="pmid">28929212</pub-id></citation></ref>
<ref id="B79"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pericard</surname> <given-names>P.</given-names></name> <name><surname>Dufresne</surname> <given-names>Y.</given-names></name> <name><surname>Couderc</surname> <given-names>L.</given-names></name> <name><surname>Blanquart</surname> <given-names>S.</given-names></name> <name><surname>Touzet</surname> <given-names>H.</given-names></name></person-group> (<year>2018</year>). <article-title>MATAM: Reconstruction of phylogenetic marker genes from short sequencing reads in metagenomes.</article-title> <source><italic>Bioinformatics</italic></source> <volume>34</volume> <fpage>585</fpage>&#x2013;<lpage>591</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btx644</pub-id> <pub-id pub-id-type="pmid">29040406</pub-id></citation></ref>
<ref id="B80"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pinilla-Redondo</surname> <given-names>R.</given-names></name> <name><surname>Mayo-Mu&#x00F1;oz</surname> <given-names>D.</given-names></name> <name><surname>Russel</surname> <given-names>J.</given-names></name> <name><surname>Garrett</surname> <given-names>R. A.</given-names></name> <name><surname>Randau</surname> <given-names>L.</given-names></name> <name><surname>S&#x00F8;rensen</surname> <given-names>S. J.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Type IV CRISPR&#x2013;Cas systems are highly diverse and involved in competition between plasmids.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>48</volume> <fpage>2000</fpage>&#x2013;<lpage>2012</lpage>. <pub-id pub-id-type="doi">10.1093/NAR/GKZ1197</pub-id> <pub-id pub-id-type="pmid">31879772</pub-id></citation></ref>
<ref id="B81"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Quast</surname> <given-names>C.</given-names></name> <name><surname>Pruesse</surname> <given-names>E.</given-names></name> <name><surname>Yilmaz</surname> <given-names>P.</given-names></name> <name><surname>Gerken</surname> <given-names>J.</given-names></name> <name><surname>Schweer</surname> <given-names>T.</given-names></name> <name><surname>Yarza</surname> <given-names>P.</given-names></name><etal/></person-group> (<year>2013</year>). <article-title>The SILVA ribosomal RNA gene database project: Improved data processing and web-based tools.</article-title> <source><italic>Nucleic Acids Res.</italic></source> <volume>41</volume> <fpage>D590</fpage>&#x2013;<lpage>D596</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gks1219</pub-id> <pub-id pub-id-type="pmid">23193283</pub-id></citation></ref>
<ref id="B82"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Russel</surname> <given-names>J.</given-names></name> <name><surname>Pinilla-Redondo</surname> <given-names>R.</given-names></name> <name><surname>Mayo-Mu&#x00F1;oz</surname> <given-names>D.</given-names></name> <name><surname>Shah</surname> <given-names>S. A.</given-names></name> <name><surname>S&#x00F8;rensen</surname> <given-names>S. J.</given-names></name></person-group> (<year>2020</year>). <article-title>CRISPRCasTyper: Automated identification, annotation, and classification of CRISPR-Cas Loci.</article-title> <source><italic>Cris. J.</italic></source> <volume>3</volume> <fpage>462</fpage>&#x2013;<lpage>469</lpage>. <pub-id pub-id-type="doi">10.1089/crispr.2020.0059</pub-id> <pub-id pub-id-type="pmid">33275853</pub-id></citation></ref>
<ref id="B83"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sampson</surname> <given-names>T. R.</given-names></name> <name><surname>Weiss</surname> <given-names>D. S.</given-names></name></person-group> (<year>2013</year>). <article-title>Alternative Roles for CRISPR/Cas Systems in Bacterial Pathogenesis.</article-title> <source><italic>PLoS Pathog.</italic></source> <volume>9</volume>:<issue>e1003621</issue>. <pub-id pub-id-type="doi">10.1371/JOURNAL.PPAT.1003621</pub-id> <pub-id pub-id-type="pmid">24146613</pub-id></citation></ref>
<ref id="B84"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shannon</surname> <given-names>P.</given-names></name> <name><surname>Markiel</surname> <given-names>A.</given-names></name> <name><surname>Ozier</surname> <given-names>O.</given-names></name> <name><surname>Baliga</surname> <given-names>N. S.</given-names></name> <name><surname>Wang</surname> <given-names>J. T.</given-names></name> <name><surname>Ramage</surname> <given-names>D.</given-names></name><etal/></person-group> (<year>2003</year>). <article-title>Cytoscape: A software environment for integrated models of biomolecular interaction networks.</article-title> <source><italic>Genome Res.</italic></source> <volume>13</volume> <fpage>2498</fpage>&#x2013;<lpage>2504</lpage>. <pub-id pub-id-type="doi">10.1101/gr.1239303</pub-id> <pub-id pub-id-type="pmid">14597658</pub-id></citation></ref>
<ref id="B85"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sharp</surname> <given-names>C. E.</given-names></name> <name><surname>Brady</surname> <given-names>A. L.</given-names></name> <name><surname>Sharp</surname> <given-names>G. H.</given-names></name> <name><surname>Grasby</surname> <given-names>S. E.</given-names></name> <name><surname>Stott</surname> <given-names>M. B.</given-names></name> <name><surname>Dunfield</surname> <given-names>P. F.</given-names></name></person-group> (<year>2014</year>). <article-title>Humboldt&#x2019;s spa: Microbial diversity is controlled by temperature in geothermal environments.</article-title> <source><italic>ISME J.</italic></source> <volume>8</volume> <fpage>1166</fpage>&#x2013;<lpage>1174</lpage>. <pub-id pub-id-type="doi">10.1038/ismej.2013.237</pub-id> <pub-id pub-id-type="pmid">24430481</pub-id></citation></ref>
<ref id="B86"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shmakov</surname> <given-names>S. A.</given-names></name> <name><surname>Sitnik</surname> <given-names>V.</given-names></name> <name><surname>Makarova</surname> <given-names>K. S.</given-names></name> <name><surname>Wolf</surname> <given-names>Y. I.</given-names></name> <name><surname>Severinov</surname> <given-names>K. V.</given-names></name> <name><surname>Koonin</surname> <given-names>E. V.</given-names></name></person-group> (<year>2017a</year>). <article-title>The CRISPR spacer space is dominated by sequences from species-specific mobilomes.</article-title> <source><italic>MBio</italic></source> <volume>8</volume>:<issue>e01397-17</issue>. <pub-id pub-id-type="doi">10.1128/mBio.01397-17</pub-id> <pub-id pub-id-type="pmid">28928211</pub-id></citation></ref>
<ref id="B87"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shmakov</surname> <given-names>S.</given-names></name> <name><surname>Smargon</surname> <given-names>A.</given-names></name> <name><surname>Scott</surname> <given-names>D.</given-names></name> <name><surname>Cox</surname> <given-names>D.</given-names></name> <name><surname>Pyzocha</surname> <given-names>N.</given-names></name> <name><surname>Yan</surname> <given-names>W.</given-names></name><etal/></person-group> (<year>2017b</year>). <article-title>Diversity and evolution of class 2 CRISPR-Cas systems.</article-title> <source><italic>Nat. Rev. Microbiol.</italic></source> <volume>15</volume> <fpage>169</fpage>&#x2013;<lpage>182</lpage>. <pub-id pub-id-type="doi">10.1038/nrmicro.2016.184</pub-id> <pub-id pub-id-type="pmid">28111461</pub-id></citation></ref>
<ref id="B88"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Silas</surname> <given-names>S.</given-names></name> <name><surname>Makarova</surname> <given-names>K. S.</given-names></name> <name><surname>Shmakov</surname> <given-names>S.</given-names></name> <name><surname>P&#x00E1;ez-Espino</surname> <given-names>D.</given-names></name> <name><surname>Mohr</surname> <given-names>G.</given-names></name> <name><surname>Liu</surname> <given-names>Y.</given-names></name><etal/></person-group> (<year>2017</year>). <article-title>On the origin of reverse transcriptase- using CRISPR-Cas systems and their hyperdiverse, enigmatic spacer repertoires.</article-title> <source><italic>MBio</italic></source> <volume>8</volume>:<issue>e00897-17</issue>.</citation></ref>
<ref id="B89"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Steinegger</surname> <given-names>M.</given-names></name> <name><surname>S&#x00F6;ding</surname> <given-names>J.</given-names></name></person-group> (<year>2017</year>). <article-title>MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets.</article-title> <source><italic>Nat. Biotechnol.</italic></source> <volume>35</volume> <fpage>1026</fpage>&#x2013;<lpage>1028</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.3988</pub-id> <pub-id pub-id-type="pmid">29035372</pub-id></citation></ref>
<ref id="B90"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Strazzulli</surname> <given-names>A.</given-names></name> <name><surname>Fusco</surname> <given-names>S.</given-names></name> <name><surname>Cobucci-Ponzano</surname> <given-names>B.</given-names></name> <name><surname>Moracci</surname> <given-names>M.</given-names></name> <name><surname>Contursi</surname> <given-names>P.</given-names></name></person-group> (<year>2017</year>). <article-title>Metagenomics of microbial and viral life in terrestrial geothermal environments.</article-title> <source><italic>Rev. Environ. Sci. Biotechnol.</italic></source> <volume>16</volume> <fpage>425</fpage>&#x2013;<lpage>454</lpage>. <pub-id pub-id-type="doi">10.1007/s11157-017-9435-0</pub-id></citation></ref>
<ref id="B91"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sullivan</surname> <given-names>M. J.</given-names></name> <name><surname>Petty</surname> <given-names>N. K.</given-names></name> <name><surname>Beatson</surname> <given-names>S. A.</given-names></name></person-group> (<year>2011</year>). <article-title>Easyfig: A genome comparison visualizer.</article-title> <source><italic>Bioinformatics</italic></source> <volume>27</volume> <fpage>1009</fpage>&#x2013;<lpage>1010</lpage>. <pub-id pub-id-type="doi">10.1093/BIOINFORMATICS/BTR039</pub-id> <pub-id pub-id-type="pmid">21278367</pub-id></citation></ref>
<ref id="B92"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Taylor</surname> <given-names>H. N.</given-names></name> <name><surname>Warner</surname> <given-names>E. E.</given-names></name> <name><surname>Armbrust</surname> <given-names>M. J.</given-names></name> <name><surname>Crowley</surname> <given-names>V. M.</given-names></name> <name><surname>Olsen</surname> <given-names>K. J.</given-names></name> <name><surname>Jackson</surname> <given-names>R. N.</given-names></name></person-group> (<year>2019</year>). <article-title>Structural basis of Type IV CRISPR RNA biogenesis by a Cas6 endoribonuclease.</article-title> <source><italic>RNA Biol.</italic></source> <volume>16</volume> <fpage>1438</fpage>&#x2013;<lpage>1447</lpage>. <pub-id pub-id-type="doi">10.1080/15476286.2019.1634965</pub-id> <pub-id pub-id-type="pmid">31232162</pub-id></citation></ref>
<ref id="B93"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tian</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>R. R.</given-names></name> <name><surname>Xian</surname> <given-names>W. D.</given-names></name> <name><surname>Xiong</surname> <given-names>M.</given-names></name> <name><surname>Xiao</surname> <given-names>M.</given-names></name> <name><surname>Li</surname> <given-names>W. J.</given-names></name></person-group> (<year>2020</year>). <article-title>A novel thermal Cas12b from a hot spring bacterium with high target mismatch tolerance and robust DNA cleavage efficiency.</article-title> <source><italic>Int. J. Biol. Macromol.</italic></source> <volume>147</volume> <fpage>376</fpage>&#x2013;<lpage>384</lpage>. <pub-id pub-id-type="doi">10.1016/j.ijbiomac.2020.01.079</pub-id> <pub-id pub-id-type="pmid">31926228</pub-id></citation></ref>
<ref id="B94"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tyson</surname> <given-names>G. W.</given-names></name> <name><surname>Banfield</surname> <given-names>J. F.</given-names></name></person-group> (<year>2008</year>). <article-title>Rapidly evolving CRISPRs implicated in acquired resistance of microorganisms to viruses.</article-title> <source><italic>Environ. Microbiol.</italic></source> <volume>10</volume> <fpage>200</fpage>&#x2013;<lpage>207</lpage>. <pub-id pub-id-type="doi">10.1111/j.1462-2920.2007.01444.x</pub-id> <pub-id pub-id-type="pmid">17894817</pub-id></citation></ref>
<ref id="B95"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vale</surname> <given-names>P. F.</given-names></name> <name><surname>Lafforgue</surname> <given-names>G.</given-names></name> <name><surname>Gatchitch</surname> <given-names>F.</given-names></name> <name><surname>Gardan</surname> <given-names>R.</given-names></name> <name><surname>Moineau</surname> <given-names>S.</given-names></name> <name><surname>Gandon</surname> <given-names>S.</given-names></name></person-group> (<year>2015</year>). <article-title>Costs of CRISPR-Cas-mediated resistance in Streptococcus thermophilus.</article-title> <source><italic>Proc. R. Soc. B</italic></source> <volume>282</volume>:<issue>20151270</issue>. <pub-id pub-id-type="doi">10.1098/rspb.2015.1270</pub-id> <pub-id pub-id-type="pmid">26224708</pub-id></citation></ref>
<ref id="B96"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Van Houte</surname> <given-names>S.</given-names></name> <name><surname>Ekroth</surname> <given-names>A. K. E.</given-names></name> <name><surname>Broniewski</surname> <given-names>J. M.</given-names></name> <name><surname>Chabas</surname> <given-names>H.</given-names></name> <name><surname>Ashby</surname> <given-names>B.</given-names></name> <name><surname>Bondy-Denomy</surname> <given-names>J.</given-names></name><etal/></person-group> (<year>2016</year>). <article-title>The diversity-generating benefits of a prokaryotic adaptive immune system.</article-title> <source><italic>Nature</italic></source> <volume>532</volume> <fpage>385</fpage>&#x2013;<lpage>388</lpage>. <pub-id pub-id-type="doi">10.1038/nature17436</pub-id> <pub-id pub-id-type="pmid">27074511</pub-id></citation></ref>
<ref id="B97"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Weinberger</surname> <given-names>A. D.</given-names></name> <name><surname>Wolf</surname> <given-names>Y. I.</given-names></name> <name><surname>Lobkovsky</surname> <given-names>A. E.</given-names></name> <name><surname>Gilmore</surname> <given-names>M. S.</given-names></name> <name><surname>Koonin</surname> <given-names>E. V.</given-names></name></person-group> (<year>2012</year>). <article-title>Viral diversity threshold for adaptive immunity in prokaryotes.</article-title> <source><italic>MBio</italic></source> <volume>3</volume>:<issue>e00456-12</issue>. <pub-id pub-id-type="doi">10.1128/mBio.00456-12</pub-id> <pub-id pub-id-type="pmid">23221803</pub-id></citation></ref>
<ref id="B98"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Weissman</surname> <given-names>J. L.</given-names></name> <name><surname>Laljani</surname> <given-names>R. M. R.</given-names></name> <name><surname>Fagan</surname> <given-names>W. F.</given-names></name> <name><surname>Johnson</surname> <given-names>P. L. F.</given-names></name></person-group> (<year>2019</year>). <article-title>Visualization and prediction of CRISPR incidence in microbial trait-space to identify drivers of antiviral immune strategy.</article-title> <source><italic>ISME J.</italic></source> <volume>13</volume> <fpage>2589</fpage>&#x2013;<lpage>2602</lpage>. <pub-id pub-id-type="doi">10.1038/s41396-019-0411-2</pub-id> <pub-id pub-id-type="pmid">31239539</pub-id></citation></ref>
<ref id="B99"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Westra</surname> <given-names>E. R.</given-names></name> <name><surname>Dowling</surname> <given-names>A. J.</given-names></name> <name><surname>Broniewski</surname> <given-names>J. M.</given-names></name> <name><surname>van Houte</surname> <given-names>S.</given-names></name></person-group> (<year>2016</year>). <article-title>Evolution and Ecology of CRISPR.</article-title> <source><italic>Annu. Rev. Ecol. Evol. Syst.</italic></source> <volume>47</volume> <fpage>307</fpage>&#x2013;<lpage>331</lpage>. <pub-id pub-id-type="doi">10.1146/annurev-ecolsys-121415-032428</pub-id></citation></ref>
<ref id="B100"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Westra</surname> <given-names>E. R.</given-names></name> <name><surname>Van Houte</surname> <given-names>S.</given-names></name> <name><surname>Oyesiku-Blakemore</surname> <given-names>S.</given-names></name> <name><surname>Makin</surname> <given-names>B.</given-names></name> <name><surname>Broniewski</surname> <given-names>J. M.</given-names></name> <name><surname>Best</surname> <given-names>A.</given-names></name><etal/></person-group> (<year>2015</year>). <article-title>Parasite exposure drives selective evolution of constitutive versus inducible defense.</article-title> <source><italic>Curr. Biol.</italic></source> <volume>25</volume> <fpage>1043</fpage>&#x2013;<lpage>1049</lpage>. <pub-id pub-id-type="doi">10.1016/j.cub.2015.01.065</pub-id> <pub-id pub-id-type="pmid">25772450</pub-id></citation></ref>
<ref id="B101"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>R.</given-names></name> <name><surname>Chai</surname> <given-names>B.</given-names></name> <name><surname>Cole</surname> <given-names>J. R.</given-names></name> <name><surname>Gunturu</surname> <given-names>S. K.</given-names></name> <name><surname>Guo</surname> <given-names>X.</given-names></name> <name><surname>Tian</surname> <given-names>R.</given-names></name><etal/></person-group> (<year>2020</year>). <article-title>Targeted assemblies of cas1 suggest CRISPR-Cas&#x2019;s response to soil warming.</article-title> <source><italic>ISME J.</italic></source> <volume>14</volume> <fpage>1651</fpage>&#x2013;<lpage>1662</lpage>. <pub-id pub-id-type="doi">10.1038/s41396-020-0635-1</pub-id> <pub-id pub-id-type="pmid">32221408</pub-id></citation></ref>
<ref id="B102"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zablocki</surname> <given-names>O.</given-names></name> <name><surname>van Zyl</surname> <given-names>L.</given-names></name> <name><surname>Trindade</surname> <given-names>M.</given-names></name></person-group> (<year>2018</year>). <article-title>Biogeography and taxonomic overview of terrestrial hot spring thermophilic phages.</article-title> <source><italic>Extremophiles</italic></source> <volume>22</volume> <fpage>827</fpage>&#x2013;<lpage>837</lpage>. <pub-id pub-id-type="doi">10.1007/s00792-018-1052-5</pub-id> <pub-id pub-id-type="pmid">30121708</pub-id></citation></ref>
</ref-list>
</back>
</article>