<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article
  PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fgene.2019.00607</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>QS-Net: Reconstructing Phylogenetic Networks Based on Quartet and Sextet</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Tan</surname>
<given-names>Ming</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/615242"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Long</surname>
<given-names>Haixia</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/570151"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Liao</surname>
<given-names>Bo</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/570131"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Cao</surname>
<given-names>Zhi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yuan</surname>
<given-names>Dawei</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/691253"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Tian</surname>
<given-names>Geng</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhuang</surname>
<given-names>Jujuan</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Yang</surname>
<given-names>Jialiang</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/570548"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup><institution>College of Computer Science and Electronic Engineering, Hunan University</institution>, <addr-line>Changsha</addr-line>, <country>China</country></aff>
<aff id="aff2">
<sup>2</sup><institution>School of Information Science and Technology , Hainan Normal University</institution>, <addr-line>Haikou</addr-line>, <country>China</country></aff>
<aff id="aff3">
<sup>3</sup><institution>Geneis (Beijing) Co. Ltd.</institution>, <addr-line>Beijing</addr-line>, <country>China</country></aff>
<aff id="aff4">
<sup>4</sup><institution>Department of Mathematics, Dalian Martine University</institution>, <addr-line>Dalian</addr-line>, <country>China</country></aff>
<aff id="aff5">
<sup>5</sup><institution>Icahn Institute for Genomics and Multiscale Biology, Icahn School of Medicine at Mount Sinai</institution>, <addr-line>New York, NY</addr-line>, <country>United States</country></aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Marco Antoniotti, University of Milano-Bicocca, Italy</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Gianluca Della Vedova, University of Milano-Bicocca, Italy; Mohammed El-Kebir, University of Illinois at Urbana-Champaign, United States</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Bo Liao, <email xlink:href="mailto:dragonbw@163.com">dragonbw@163.com</email>; Zhi Cao, <email xlink:href="mailto:66384436@qq.com">66384436@qq.com</email>; Jialiang Yang, <email xlink:href="mailto:jialiang.yang@mssm.edu">jialiang.yang@mssm.edu</email>
</p>
</fn>
<fn fn-type="other" id="fn002">
<p>This article was submitted to Bioinformatics and Computational Biology, a section of the journal Frontiers in Genetics</p>
</fn>
<fn fn-type="equal" id="fn003">
<p>&#x2020;These authors have contributed equally to this work.</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>24</day>
<month>07</month>
<year>2019</year>
</pub-date>
<pub-date pub-type="collection">
<year>2019</year>
</pub-date>
<volume>10</volume>
<elocation-id>607</elocation-id>
<history>
<date date-type="received">
<day>24</day>
<month>09</month>
<year>2018</year>
</date>
<date date-type="accepted">
<day>11</day>
<month>06</month>
<year>2019</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2019 Tan, Long, Liao, Cao, Yuan, Tian, Zhuang and Yang</copyright-statement>
<copyright-year>2019</copyright-year>
<copyright-holder>Tan, Long, Liao, Cao, Yuan, Tian, Zhuang and Yang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Phylogenetic networks are used to estimate evolutionary relationships among biological entities or taxa involving reticulate events such as horizontal gene transfer, hybridization, recombination, and reassortment. In the past decade, many phylogenetic tree and network reconstruction methods have been proposed. Despite that they are highly accurate in reconstructing simple to moderate complex reticulate events, the performance decreases when several reticulate events are present simultaneously. In this paper, we proposed QS-Net, a phylogenetic network reconstruction method taking advantage of information on the relationship among six taxa. To evaluate the performance of QS-Net, we conducted experiments on three artificial sequence data simulated from an evolutionary tree, an evolutionary network involving three reticulate events, and a complex evolutionary network involving five reticulate events. Comparison with popular phylogenetic methods including Neighbor-Joining, Split-Decomposition, Neighbor-Net, and Quartet-Net suggests that QS-Net is comparable with other methods in reconstructing tree-like evolutionary histories, while it outperforms them in reconstructing reticulate events. In addition, we also applied QS-Net in real data including a bacterial taxonomy data consisting of 36 bacterial species and the whole genome sequences of 22 H7N9 influenza A viruses. The results indicate that QS-Net is capable of inferring commonly believed bacterial taxonomy and influenza evolution as well as identifying novel reticulate events. The software QS-Net is publically available at <uri xlink:href="https://github.com/Tmyiri/QS-Net">https://github.com/Tmyiri/QS-Net</uri>.</p>
</abstract>
<kwd-group>
<kwd>phylogenetic network</kwd>
<kwd>reticulate evolution</kwd>
<kwd>sextet</kwd>
<kwd>bacterial taxonomy</kwd>
<kwd>influenza reassortment</kwd>
</kwd-group>
<counts>
<fig-count count="5"/>
<table-count count="5"/>
<equation-count count="8"/>
<ref-count count="41"/>
<page-count count="9"/>
<word-count count="4262"/>
</counts>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<title>Introduction</title>
<p>Phylogenetic tree is usually utilized to show the evolutionary history of a set of biological entities or taxa. However, the tree-like topology cannot represent reticulate evolutionary events, such as horizontal gene transfer (HGT), hybridization, recombination, or reassortment, which have been shown to be critical in genotypic diversity, related phenotypes, estimations of evolutionary history, and virus emergence and immune evasion (<xref ref-type="bibr" rid="B9">Fenderson and Bruce, 2008</xref>; <xref ref-type="bibr" rid="B34">Vijaykrishna et al., 2015</xref>; <xref ref-type="bibr" rid="B3">Bastide et al., 2018</xref>). For example, HGT, also known as lateral gene transfer (LGT), promotes the diversification of microorganisms on the evolutionary time scale. This mechanism can change the types and characteristics of bacteria and plays a major role in the genetic diversity of bacteria (<xref ref-type="bibr" rid="B22">Ochman et al., 2000</xref>). In the long run, it may be the dominant force affecting genes in most prokaryotes. Recombination is a major source of genotypic diversity and a core force for the formation of genome and related phenotypes (<xref ref-type="bibr" rid="B19">Leducq et al., 2017</xref>). Reassortment is responsible for most antigenic shifts of influenza virus (<xref ref-type="bibr" rid="B21">Nelson et al., 2008</xref>). Hybridization has been shown to be the main evolutionary mechanism for plants and some animals (<xref ref-type="bibr" rid="B26">Rieseberg et al., 2000</xref>; <xref ref-type="bibr" rid="B40">Yu et al., 2011</xref>).</p>
<p>A phylogenetic network can serve as an alternative to phylogenetic tree. When the evolutionary history of a sequence set contains reticulate events (<xref ref-type="bibr" rid="B16">Huson et al., 2010</xref>), generally speaking, phylogenetic networks can be divided into explicit and implicit networks. The implicit phylogenetic networks, such as split network, are often adopted to illustrate incompatible data and capture conflicting signals in a data set. With the increasing sequencing data, phylogenetic networks have become more and more important in molecular evolution.</p>
<p>Over the past decades, many methods have been proposed for reconstructing phylogenetic trees or networks. The most common type of method reconstructs a network directly from the original character data, usually through a parsimony or maximum-likelihood criterion. Methods in this category include Spectronet (<xref ref-type="bibr" rid="B13">Huber et al., 2002</xref>), maximum pseudo-likelihood estimation (<xref ref-type="bibr" rid="B39">Yu and Nakhleh, 2015</xref>), HGT maximum parsimony (<xref ref-type="bibr" rid="B23">Park et al., 2010</xref>), PhyloNetwork (<xref ref-type="bibr" rid="B29">Sol&#xed;s-Lemus et al., 2017</xref>), inferring phylogenetic networks using PhyloNet (<xref ref-type="bibr" rid="B35">Wen et al., 2018</xref>), and SNaQ (<xref ref-type="bibr" rid="B7">Claudia and C&#xe9;cile, 2016</xref>). However, these methods are inefficient computationally and tend to overestimate the actual number of reticulate events in the evolutionary history (<xref ref-type="bibr" rid="B14">Huelsenbeck, 1995</xref>; <xref ref-type="bibr" rid="B23">Park et al., 2010</xref>). The second widely used method is the distance-based method, which first builds a genetic distance matrix for a taxa set and then reconstructs the phylogenetic network from the distance matrix. Methods in this category include Neighbor-Net (<xref ref-type="bibr" rid="B5">Bryant and Moulton, 2004</xref>), Split-Decomposition (<xref ref-type="bibr" rid="B2">Bandelt and Dress, 1992</xref>), FastME (<xref ref-type="bibr" rid="B20">Lefort et al., 2015</xref>), ASTRID (<xref ref-type="bibr" rid="B32">Vachaspati and Warnow, 2015</xref>), tree-average distances method (<xref ref-type="bibr" rid="B37">Willson, 2013</xref>), and large-scale Neighbor-Joining with NINJA (<xref ref-type="bibr" rid="B36">Wheeler, 2009</xref>). The distance-based methods are very fast compared with character-based methods, but they have a disadvantage in terms of reconstruction accuracy. The third kind of methods reconstructs phylogenetic networks from weighted triplets and quartets because they can retain more information than distances. Methods in this category include local maximum likelihood using triplets (<xref ref-type="bibr" rid="B25">Ranwez and Gascuel, 2002</xref>), Quartet-Net (<xref ref-type="bibr" rid="B38">Yang et al., 2013</xref>), tree with strong combinatorial evidence (<xref ref-type="bibr" rid="B4">Berry and Gascuel, 2000</xref>), QNet (<xref ref-type="bibr" rid="B11">Gr&#xfc;newald et al., 2007</xref>), SuperQ (<xref ref-type="bibr" rid="B12">Grunewald et al., 2013</xref>), DistiQue (<xref ref-type="bibr" rid="B28">Sayyari and Mirarab, 2016</xref>), level 1 network from a dense quartet (<xref ref-type="bibr" rid="B17">Keijsper and Pendavingh, 2014</xref>), and weighted QMC (<xref ref-type="bibr" rid="B1">Avni et al., 2015</xref>). In addition, there are other methods using statistical models such as stochastic local search method (<xref ref-type="bibr" rid="B31">Tria et al., 2010</xref>), clusters (<xref ref-type="bibr" rid="B33">Van Iersel et al., 2010</xref>), Bayesian inference (<xref ref-type="bibr" rid="B41">Zhang et al., 2017</xref>), statistical model (<xref ref-type="bibr" rid="B24">Pickrell and Pritchard, 2012</xref>), and Monte Carlo method (<xref ref-type="bibr" rid="B8">Eslahchi et al., 2010</xref>).</p>
<p>Quartet-Net (<xref ref-type="bibr" rid="B38">Yang et al., 2013</xref>) is a method for reconstructing phylogenetic networks from a set of weighted triplets and quartets, which uses parsimony information sites to calculate triplet and quartet weights directly from multiple sequence alignment (MSA). Based on the calculated triplet and quartet weights, Quartet-Net then performs a split expanding process to obtain all full splits and their weights, which will transform to an evolutionary tree or network. The method is a generalization of Split-Decomposition (<xref ref-type="bibr" rid="B2">Bandelt and Dress, 1992</xref>). In this paper, we further generalize Quartet-Net and propose a novel method called QS-Net to reconstruct evolutionary networks based on weighted quartets and sextets. The analysis of artificial and real data sets shows that this method can reconstruct a more accurate phylogeny when the sequence data are generated from complicated evolutionary scenarios involving many reticulate events and identifies novel reticulate evolution and reassortment events.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<title>Materials and Methods</title>
<sec id="s2_1">
<title>Background: Split and Split Weight</title>
<p>For a taxa set S = {S<sub>1</sub>, S<sub>2</sub>,&#x2026;,S<sub>n</sub>} of size n, a split consisting of two disjoint non-empty subsets of S is denoted by A | B that is, A and B. If A and B contain all the taxa in S, then A | B is called a full split; otherwise, it is called a partial split. In a phylogenetic tree, each edge is a full split that divides the tree into two parts, while in a phylogenetic network, a group of parallel edges with equal length represents a full split. If |A| = 1 or|B| = 1, the split A|B is called a trivial split. For example, the phylogenetic tree in <xref ref-type="fig" rid="f1">
<bold>Figure 1A</bold>
</xref> contains five trivial full splits, such as a|bcdef, and three non-trivial full splits de|abcf, bc|adef, and ade|bcf. In general, a split A|B with |A| = m and |B| = n is called an m|n split. In addition, W(A|B) represents the evolutionary distance between taxa groups A and B. If A or B contains more than two taxa, then W(A|B) calculates the distance between the common ancestor of A and B. For example, W(a|de) = 2, W(d|ae) = 1 in <xref ref-type="fig" rid="f1">
<bold>Figure 1A</bold>
</xref>, W(a|d) represents the evolutionary distance between taxa a and d, and therefore, through <xref ref-type="fig" rid="f1">
<bold>Figure 1A</bold>
</xref> and these definitions, we can get this equation W(a|d) = W(a|de) + W(ae|d).</p>
<fig id="f1" position="float">
<label>Figure 1</label>
<caption>
<p>Phylogenetic tree: a phylogenetic tree for illustration and a phylogenetic tree with 12 leaves. <bold>(A)</bold> A phylogenetic tree for illustration with the branch length indicating evolutionary distance. <bold>(B)</bold> A phylogenetic tree with 12 leaves used to generate the first simulation data.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fgene-10-00607-g001.tif"/>
</fig>
<p>For an MSA, a simple parsimony-based method is used to estimate the weights of quartets and sextets. For example, if the character in a site is the same for taxa a, b, and c and for taxa d, e, and f, but different for a and d, then the site is defined to support the split abc | def. For any sextet abc | def, its weight W(abc|def) is defined to be the proportion of total number of sites supporting it in the MSA. The weight of a quartet say ab|cd is calculated in a similar way. After all the quartet and sextet weights are obtained, an ever-expanding process is performed based on these weights to all full splits and their weights. As shown in previous literatures (<xref ref-type="bibr" rid="B2">Bandelt and Dress, 1992</xref>; <xref ref-type="bibr" rid="B38">Yang et al., 2013</xref>), reconstructing a phylogenetic tree or network is equivalent to calculating all the full splits and their weights. Thus, we have obtained the reconstructed tree or network by this process, which could be shown by a software SplitsTree4 (<xref ref-type="bibr" rid="B15">Huson and Bryant, 2006</xref>).</p>
</sec>
<sec id="s2_2">
<title>Ever-Expanding Process Based on Quartet and Sextet Weights</title>
<p>As represented by equation W(a|d) = W(a|de) + W(ae|d), there is such an equation W(abc|def) = W(abc|defg) + W(abcg|def), which can be seen as adding a new taxon g to either side of a split abc|def. If W(abc|def) = 0, then W(abc|defg) = 0 and W(abcg|def) = 0. If taxa group A<sub>1</sub> &#x2286; A and B<sub>1</sub> &#x2286; B, or A<sub>1</sub> &#x2286; B and B<sub>1</sub> &#x2286; A, we call the split A|B displays A<sub>1</sub>|B<sub>1</sub>. It is proven in <xref ref-type="bibr" rid="B2">Bandelt and Dress (1992)</xref> that W(A|B) &#x2264; W(A<sub>1</sub>|B<sub>1</sub>). Therefore, a split with zero weight cannot be further expanded to larger splits with positive weights.</p>
<p>For a taxa set S with size n, there are <inline-formula><mml:math id="M1" display="inline"><mml:mrow><mml:mn>10</mml:mn><mml:msubsup><mml:mo stretchy="false">(</mml:mo><mml:mn>6</mml:mn><mml:mtext>n</mml:mtext></mml:msubsup><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> sextets. We first calculate the weights of all quartets and sextets from the MSA, and then we expand them to get all full split weights using an ever-expanding process. Suppose there is a septet of abc|defg type, we have W(abc|defg) = W(abc|def) &#x2212; W(abcg|def), and there is a similar equation for W(abcg|def), so the weight of W(abc|defg) can be obtained by similar continuous calculations, as follows.</p>
<disp-formula>
<mml:math id="M2" display="block"><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mtable columnalign="left"><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mtext>W(abc|defg)</mml:mtext><mml:mo>=</mml:mo><mml:mtext>W(abc|def)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>W(abcdg|def)</mml:mtext></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mtext>W(abcg|def)</mml:mtext><mml:mo>=</mml:mo><mml:mtext>W(abg|def)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>W(abg|cdef)</mml:mtext></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mtext>W(abg|cdef)</mml:mtext><mml:mo>=</mml:mo><mml:mtext>W(abg|cde)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>W(abfg|cde)</mml:mtext></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mtext>W(abfg|cde)</mml:mtext><mml:mo>=</mml:mo><mml:mtext>W(afg|cde)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>W(afg|bcde)</mml:mtext></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mtext>W(afg|bcde)</mml:mtext><mml:mo>=</mml:mo><mml:mtext>W(afg|bcd)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>W(aefg|bcd)</mml:mtext></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mtext>W(aefg|bcd)</mml:mtext><mml:mo>=</mml:mo><mml:mtext>W(efg|bcd)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>W(efg|abcd)</mml:mtext></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mtext>W(efg|abcd)</mml:mtext><mml:mo>=</mml:mo><mml:mtext>W(efg|abc)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>W(defg|abc)</mml:mtext></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<p>Combining the above equations, we have</p>
<disp-formula>
<label>(1)</label>
<mml:math id="M3" display="block"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:mtext>W</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>a</mml:mi><mml:mi>b</mml:mi><mml:mi>c</mml:mi><mml:mo>|</mml:mo><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>f</mml:mi><mml:mi>g</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mstyle scriptlevel="+1"><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac></mml:mstyle><mml:mo>{</mml:mo><mml:mi>W</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>a</mml:mi><mml:mi>b</mml:mi><mml:mi>c</mml:mi><mml:mo>|</mml:mo><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>f</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mi>W</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>a</mml:mi><mml:mi>b</mml:mi><mml:mi>g</mml:mi><mml:mo>|</mml:mo><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>f</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>+</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>W</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>a</mml:mi><mml:mi>b</mml:mi><mml:mi>g</mml:mi><mml:mo>|</mml:mo><mml:mtext>cde</mml:mtext><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mtext>W</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>a</mml:mi><mml:mi>f</mml:mi><mml:mi>g</mml:mi><mml:mo>|</mml:mo><mml:mi>c</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>+</mml:mo><mml:mtext>W</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>a</mml:mi><mml:mi>f</mml:mi><mml:mi>g</mml:mi><mml:mo>|</mml:mo><mml:mi>b</mml:mi><mml:mi>c</mml:mi><mml:mi>d</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>W</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>e</mml:mi><mml:mi>f</mml:mi><mml:mi>g</mml:mi><mml:mo>|</mml:mo><mml:mi>b</mml:mi><mml:mi>c</mml:mi><mml:mi>d</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>+</mml:mo><mml:mi>W</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>e</mml:mi><mml:mi>f</mml:mi><mml:mi>g</mml:mi><mml:mo>|</mml:mo><mml:mi>a</mml:mi><mml:mi>b</mml:mi><mml:mi>c</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>}</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math>
</disp-formula>
<p>For |B| &#x2265; 4, taking minimum over all possible cases, we have</p>
<disp-formula>
<label>(2)</label>
<mml:math id="M4" display="block"><mml:mrow><mml:mtable columnalign="left"><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mtext>W</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>a</mml:mi><mml:mi>b</mml:mi><mml:mi>c</mml:mi><mml:mtext>|B</mml:mtext><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>max</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mstyle scriptlevel="+1"><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac></mml:mstyle><mml:munder><mml:mrow><mml:mi>min</mml:mi><mml:mo>&#x2061;</mml:mo></mml:mrow><mml:mrow><mml:mtext>defg&#x2009;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:mtext>&#x2009;B</mml:mtext></mml:mrow></mml:munder><mml:mo stretchy="false">{</mml:mo><mml:mi>W</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>a</mml:mi><mml:mi>b</mml:mi><mml:mi>c</mml:mi><mml:mtext>|</mml:mtext><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>f</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mi>W</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>a</mml:mi><mml:mi>b</mml:mi><mml:mi>g</mml:mi><mml:mtext>|</mml:mtext><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>f</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:mrow></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mi>W</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>a</mml:mi><mml:mi>b</mml:mi><mml:mi>g</mml:mi><mml:mtext>|cde</mml:mtext><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mtext>W</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>a</mml:mi><mml:mi>f</mml:mi><mml:mi>g</mml:mi><mml:mtext>|</mml:mtext><mml:mi>c</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>+</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mrow><mml:mrow><mml:mtext>W</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>a</mml:mi><mml:mi>f</mml:mi><mml:mi>g</mml:mi><mml:mtext>|bcd</mml:mtext><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mtext>W(efg|bcd)</mml:mtext><mml:mo>+</mml:mo><mml:mtext>W(efg|abc)},0</mml:mtext></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>When |A|=4 and |B|=4, the weight of the 4|4 split</p>
<disp-formula>
<label>(3)</label>
<mml:math id="M5" display="block"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:mtext>W</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mtext>A|B</mml:mtext><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>min</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:munder><mml:mrow><mml:mi>min</mml:mi><mml:mo>&#x2061;</mml:mo></mml:mrow><mml:mrow><mml:mtext>a&#x2009;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:mtext>&#x2009;A</mml:mtext></mml:mrow></mml:munder><mml:mo stretchy="false">{</mml:mo><mml:mi>W</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mtext>A</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mi>a</mml:mi><mml:mtext>|B</mml:mtext><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mi>W</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mtext>A</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mi>a</mml:mi><mml:mtext>|B&#xa0;</mml:mtext><mml:mo>+</mml:mo><mml:mi>a</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">}</mml:mo></mml:mrow></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;</mml:mtext><mml:munder><mml:mrow><mml:mi>min</mml:mi><mml:mo>&#x2061;</mml:mo></mml:mrow><mml:mrow><mml:mtext>a&#x2009;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:mtext>&#x2009;B</mml:mtext></mml:mrow></mml:munder><mml:mo>{</mml:mo><mml:mi>W</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mtext>A|B</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>e)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>W(A+e|B</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>e)</mml:mtext><mml:mo>}</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math>
</disp-formula>
<p>where A&#x2212;A&#x2032; for two sets A and A&#x2032; denotes set difference (subtraction).</p>
<p>For example A={a, b, c, d}, B={e, f, g, h}, there are eight equations for W(abcd|efgh),</p>
<disp-formula>
<mml:math id="M6" display="block"><mml:mrow><mml:mtext>W(abcd|efgh)</mml:mtext><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mtable columnalign="left"><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mtext>W(abc|efgh)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>W(abc|defgh)</mml:mtext></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mtext>W(abd|efgh)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>W(abd|cefgh)</mml:mtext></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mtext>W(acd|efgh)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>W(acd|befgh)</mml:mtext></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mtext>W(bcd|efgh)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>W(bcd|aefgh)</mml:mtext></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mtext>W(abcd|efg)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>W(abcdh|efg)</mml:mtext></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mtext>W(abcd|efh)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>W(abcdg|efh)</mml:mtext></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mtext>W(abcd|egh)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>W(abcdf|egh)</mml:mtext></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mtext>W(abcd|fgh)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>W(abcd|fgh)</mml:mtext></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<p>For any split A|B with |A| &#x2265; 4 and |B| &#x2265; 4, we traverse the elements in A and B and take out four taxa for each calculation. Suppose a, b, c, d &#x2208; A and e, f, g, h &#x2208; B, and we have</p>
<disp-formula>
<label>(4)</label>
<mml:math id="M7" display="block"><mml:mrow><mml:mtext>W(A|B)</mml:mtext><mml:mo>=</mml:mo><mml:munder><mml:mrow><mml:mi>min</mml:mi><mml:mo>&#x2061;</mml:mo></mml:mrow><mml:mrow><mml:mtext>abcd&#xa0;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:mtext>&#xa0;A;&#xa0;efgh&#xa0;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:mtext>&#xa0;B</mml:mtext></mml:mrow></mml:munder><mml:mo stretchy="false">{</mml:mo><mml:mi>W</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>a</mml:mi><mml:mi>b</mml:mi><mml:mi>c</mml:mi><mml:mi>d</mml:mi><mml:mo stretchy="false">|</mml:mo><mml:mi>e</mml:mi><mml:mi>f</mml:mi><mml:mi>g</mml:mi><mml:mi>h</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">}</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>For any 2|n split of ab|B type with c, d, e &#x2208; B, we calculate their weight by formula (5) referred in Quartet-Net (<xref ref-type="bibr" rid="B38">Yang et al., 2013</xref>),</p>
<disp-formula>
<label>(5)</label>
<mml:math id="M8" display="block"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mtext>W(ab|B)</mml:mtext><mml:mo>=</mml:mo><mml:mi>max</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mstyle scriptlevel="+1"><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac></mml:mstyle><mml:munder><mml:mrow><mml:mi>min</mml:mi><mml:mo>&#x2061;</mml:mo></mml:mrow><mml:mrow><mml:mtext>cde&#xa0;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:mtext>&#xa0;B</mml:mtext></mml:mrow></mml:munder><mml:mo stretchy="false">{</mml:mo><mml:mi>W</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>a</mml:mi><mml:mi>b</mml:mi><mml:mtext>|cd</mml:mtext><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mtext>W(ae|cd)</mml:mtext><mml:mo>+</mml:mo></mml:mrow></mml:mrow></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mrow><mml:mrow><mml:mrow><mml:mtext>W(ae|bc)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>W(bc|de)+W(ab|de)</mml:mtext><mml:mo>}</mml:mo><mml:mo>,</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Finally, for any trivial split of a|S &#x2212; a type with b, c &#x2208; S&#x2212;a in a taxa set S, we calculate the weight as follows (see also <xref ref-type="bibr" rid="B38">Yang et al., 2013</xref>):</p>
<disp-formula>
<label>(6)</label>
<mml:math id="M9" display="block"><mml:mrow><mml:mtext>W(a|S</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>a)</mml:mtext><mml:mo>=</mml:mo><mml:munder><mml:mrow><mml:mi>min</mml:mi><mml:mo>&#x2061;</mml:mo></mml:mrow><mml:mrow><mml:mtext>bc&#xa0;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:mtext>&#xa0;S</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>a</mml:mtext></mml:mrow></mml:munder><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mtext>W(a|bc)</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mstyle displaystyle="true"><mml:msub><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mtext>a&#xa0;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:mtext>&#xa0;A;&#xa0;bc&#xa0;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:mtext>&#xa0;B</mml:mtext></mml:mrow></mml:msub><mml:mrow><mml:mtext>A|B</mml:mtext></mml:mrow></mml:mstyle></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<p>Formulas (1) &#x2013; (6) are used to calculate all full splits by decomposing sextet weights iteratively.</p>
</sec>
<sec id="s2_3">
<title>The QS-Net Method</title>
<p>QS-Net takes an MSA as input. Suppose that there are n taxa in the taxa set S, which are arranged in the order of 1, 2, 3, &#x2026;, n. In the initialization step, all triplet, quartet, and sextet weights are calculated directly from the MSAs. We calculate the weights of full splits in the following ways.</p>
<list list-type="bullet">
<list-item>
<p>Full split of type A|S &#x2212; A with |A| &#x2265; 3 and |S&#x2212;A| &#x2265; 3: for the first six taxa&#x2014;1, 2, 3, 4, 5, and 6&#x2014;there are 10 sextets. We store these sextets together with their weights in a set X<sub>1</sub>. QS-Net then iteratively adds i=7, 8&#x2026;, n to the left and right parts of the splits stored in X<sub>1</sub> and use equations (2)&#x2013;(4) to calculate the weights of newly generated splits. Noticing that the only splits that cannot be generated in this way are of type i j k|S<sub>1</sub> &#x2212; {i, j, k} with j = i &#x2212; 1, i &#x2013; 2, &#x2026;, 2 and k = j &#x2013; 1, j &#x2212; 2, &#x2026;, 1, we calculate their weights using equation (2) and add them to X<sub>1</sub>. At the end of each iteration, the splits with a weight of zero are removed because they cannot be further expanded to have a positive weight. After the last iteration, all full splits of type |A| &#x2265; 3 and |S &#x2212; A| &#x2265; 3 have been calculated.</p>
</list-item>
<list-item>
<p>2|n &#x2013; 2 full splits: These splits can be calculated using equation (5). In practice, we use Quartet-Net to calculate their split weights.</p>
</list-item>
<list-item>
<p>Trivial (1|n &#x2212; 1) full splits: These splits can be calculated by equation (6).</p>
</list-item>
</list>
<p>By the above procedures, we calculate the weights of all full splits. Similar to <xref ref-type="bibr" rid="B38">Yang et al. (2013)</xref>, it is usually advisable to filter the non-trivial full splits with very low split weights, which tend to be false positives. In practice, we remove splits with weight less than c% of the average weight, where c is a user-defined threshold setting to be 1 in this study. The output file containing all non-zero full splits and their weights is stored in.NEXU file format, which can be visualized using SplitsTree4 (<xref ref-type="bibr" rid="B15">Huson and Bryant, 2006</xref>). The time complexity of QS-Net is O(n<sup>10</sup>).</p>
</sec>
</sec>
<sec id="s3">
<title>Results And Discussions</title>
<p>To demonstrate QS-Net, we analyzed three artificial data sets and two real data sets. The artificial data sets were generated from a simple tree phylogeny, a phylogenetic scenario with three reticulate events, and a more complicated phylogenetic scenario with five reticulate events. The purpose is to show that the QS-Net method can accurately reconstruct all kinds of evolutionary histories from simple to complicated ones. The real data include a bacterial taxonomy data consisting of 36 bacterial species and the whole genome sequences of 22 H7N9 influenza A viruses downloaded from NCBI influenza database.</p>
<p>The software Dawg (<xref ref-type="bibr" rid="B6">Cartwright, 2005</xref>) with model GTR + Gamma + I was used to generate three artificial data sets. The substitution rate is 0.01; the sequence length of the tree is 10,000 bp; the sequence length of the network containing three evolutionary events is 80,000 bp, while the sequence length of the network containing five evolutionary events is 320,000 bp because they are a concatenation of eight and 32 feasible trees. To avoid randomness, we performed 100 Dawg runs on each of the three artificial data sets and applied the 100 MSAs of each data set to QS-Net together with other four popular methods: Quartet-Net (<xref ref-type="bibr" rid="B38">Yang et al., 2013</xref>), Neighbor-Net (<xref ref-type="bibr" rid="B5">Bryant and Moulton, 2004</xref>), Split-Decomposition (<xref ref-type="bibr" rid="B2">Bandelt and Dress, 1992</xref>), and Neighbor-Joining (<xref ref-type="bibr" rid="B27">Saitou and Nei, 1987</xref>).</p>
<sec id="s3_1">
<title>Analysis on the Tree Data</title>
<p>The tree data were generated from <xref ref-type="fig" rid="f1">
<bold>Figure 1B</bold>
</xref> with 12 leaves. For brevity, we only listed reconstructed taxa set in the left or right block containing fewer number of taxa (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Material: Table S1</bold>
</xref>). For example, split bd|acefghijkl was listed as bd. We then normalized each split by the weight of a split successfully constructed by all methods. All trivial full splits were not listed because they can be successfully reconstructed by all five methods. As shown in <xref ref-type="table" rid="T1">
<bold>Table 1</bold>
</xref>, all five methods can successfully reconstruct all full splits in the 100 runs of the tree data; the accuracy is equal to the experimental bootstrap value divided by the real bootstrap value. The true-positive split result represents all splits in the real phylogenetic history of the simulated data sets. We listed the number of true-positive splits obtained by the five methods on all simulated data sets in <xref ref-type="table" rid="T2">
<bold>Table 2</bold>
</xref>. If a method can reconstruct the true-positive split once in 100 runs, we determined that the true-positive split can be obtained by this method. In addition to true-positive results, other split results reconstructed by the method are false-positive splits, which typically have very few weight values. Except for Neighbor-Joining, the other four methods reconstructed some false-positive splits (here we only list false-positive splits with a bootstrap value &#x2265;10). For example, Quartet-Net and QS-Net reconstruct two additional split al and ae with bootstrap values of 10 and 26, respectively (see <xref ref-type="table" rid="T3">
<bold>Table 3</bold>
</xref>). This is because QS-Net and Quartet-Net methods use the same calculation formula for split of 2|n type. Neighbor-Net identifies 35 false-positive splits with bootstrap value ranging from 10 to 40. These false-positive splits may be caused by some random mutations in the tree data set.</p>
<table-wrap id="T1" position="float">
<label>Table 1</label>
<caption>
<p>Comparison of accuracy (the total bootstrap value obtained from the experimental results is divided by the bootstrap BV value) between QS-Net and four other methods.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top">Data set</th>
<th valign="top">QS-Net</th>
<th valign="top">Quartet-Net</th>
<th valign="top">Neighbor-Net</th>
<th valign="top">Split-Decomposition</th>
<th valign="top">Neighbor-Joining</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top">Tree</td>
<td valign="top">100%</td>
<td valign="top">100%</td>
<td valign="top">100%</td>
<td valign="top">100%</td>
<td valign="top">100%</td>
</tr>
<tr>
<td valign="top">Network (3)</td>
<td valign="top">100%</td>
<td valign="top">100%</td>
<td valign="top">70.16%</td>
<td valign="top">67.24%</td>
<td valign="top">36%</td>
</tr>
<tr>
<td valign="top">Network (5)</td>
<td valign="top">100%</td>
<td valign="top">94.74%</td>
<td valign="top">58.89%</td>
<td valign="top">46.76%</td>
<td valign="top">23.68%</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Network (3) is the phylogenetic network with three reticulate events, while Network (5) is the phylogenetic network with five reticulate events.</p>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T2" position="float">
<label>Table 2</label>
<caption>
<p>The number of true-positive results can be obtained by five methods.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top">Data set</th>
<th valign="top">True</th>
<th valign="top">QS-Net</th>
<th valign="top">Quartet-Net</th>
<th valign="top">Neighbor-Net</th>
<th valign="top">Split-Decomposition</th>
<th valign="top">Neighbor-Joining</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top">Tree</td>
<td valign="top">9</td>
<td valign="top">9</td>
<td valign="top">9</td>
<td valign="top">9</td>
<td valign="top">9</td>
<td valign="top">9</td>
</tr>
<tr>
<td valign="top">Network (3)</td>
<td valign="top">25</td>
<td valign="top">25</td>
<td valign="top">25</td>
<td valign="top">21</td>
<td valign="top">23</td>
<td valign="top">9</td>
</tr>
<tr>
<td valign="top">Network (5)</td>
<td valign="top">38</td>
<td valign="top">38</td>
<td valign="top">36</td>
<td valign="top">30</td>
<td valign="top">22</td>
<td valign="top">11</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>The &#x201c;True&#x201d; column represents the real number of true-positive splits of the simulated data.</p>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T3" position="float">
<label>Table 3</label>
<caption>
<p>The number of false-positive results obtained by five methods.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top">Data set</th>
<th valign="top">QS-Net</th>
<th valign="top">Quartet-Net</th>
<th valign="top">Neighbor-Net</th>
<th valign="top">Split-Decomposition</th>
<th valign="top">Neighbor-Joining</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top">Tree</td>
<td valign="top">2</td>
<td valign="top">2</td>
<td valign="top">35</td>
<td valign="top">4</td>
<td valign="top">0</td>
</tr>
<tr>
<td valign="top">Network (3)</td>
<td valign="top">4</td>
<td valign="top">4</td>
<td valign="top">16</td>
<td valign="top">1</td>
<td valign="top">0</td>
</tr>
<tr>
<td valign="top">Network (5)</td>
<td valign="top">4</td>
<td valign="top">4</td>
<td valign="top">4</td>
<td valign="top">1</td>
<td valign="top">0</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_2">
<title>Analysis on the Network Data with Three Reticulate Events</title>
<p>The network data were generated from <xref ref-type="fig" rid="f2">
<bold>Figure 2A</bold>
</xref> containing three reticulate events A, B, and C, which can be decomposed into eight feasible underlying trees. A feasible tree can be obtained by cutting off one branch respectively at A, B, and C. For example, we can get an underlying tree by cutting off the three edges qA, mB, and oC in the three reticulate events. The sequence data of a taxon m were generated by concatenating partial sequence data from q and partial sequence data from r. All true splits and splits reconstructed by the five methods are listed in <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Material: Table S2</bold>
</xref>. The weight of the true split is the sum of the split weights in eight feasible trees. Similarly, we normalized each split with the weight of split ab and multiplied it by 4. As can be seen from the <xref ref-type="table" rid="T1">
<bold>Table 1</bold>
</xref>, QS-Net and Quartet-Net accurately reconstructed all true splits in all 100 runs, while Neighbor-Net, Split-Decomposition, and Neighbor-Joining failed to reconstruct a large number of true splits. For example, Neighbor-Net failed to reconstruct split gh, fgi, and fgh in more than 90 runs, and Split-Decomposition was unable to reconstruct split bce and bcde in all 100 runs (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Material: Table S2</bold>
</xref>). Neighbor-Joining obtained even worse result with 16 true splits missing, which is reasonable because Neighbor-Joining only reconstructs trees and retains the strongest compatible splits.</p>
<fig id="f2" position="float">
<label>Figure 2</label>
<caption>
<p>Phylogenetic network with 3/5 reticulate events. <bold>(A)</bold> A phylogenetic network with three reticulate events A, B, and C. <bold>(B)</bold> A phylogenetic network with five reticulate events A, B, C, D, and E.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fgene-10-00607-g002.tif"/>
</fig>
</sec>
<sec id="s3_3">
<title>Analysis on the Network Data with Five Reticulate Events</title>
<p>
<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Material Table S3</bold>
</xref> lists all true splits and splits reconstructed from the five methods from the network data. The data set was generated from <xref ref-type="fig" rid="f2">
<bold>Figure 2B</bold>
</xref> with a complicated phylogenetic scenario containing five reticulate events. Similarly, the weight of the true split is the sum of the weights of the splits in 32 feasible trees. We normalized each split with the weight of split ce. As can be seen from the <xref ref-type="table" rid="T1">
<bold>Table 1</bold>
</xref>, only QS-Net method obtains 100% accuracy in all 100 runs, while the other four methods fail to reconstruct some splits in most runs. For example, Quartet-Net failed in reconstructing split fgi and afg in all 100 runs. In addition to the two splits, Neighbor-Net also cannot reconstruct split hj, bcd, and bcde in more than 90 runs (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Material: Table S3</bold>
</xref>), which happens because Neighbor-Net reduces splits to make the split system planar. Split-Decomposition and Neighbor-Joining still performed poorly. In addition, all methods except for Neighbor-Joining reconstructed some false-positive splits.</p>
</sec>
<sec id="s3_4">
<title>Analysis on the Bacterial Data</title>
<p>The bacterial data set was used in <xref ref-type="bibr" rid="B30">Takahashi and Kryukov (2009)</xref> for the analysis of phylogenetic relationships among bacterial species. This data set consists of 36 bacterial genomes containing concatenated sequence of seven genes (16S rRNA, 23S rRNA, gyrB, pyrH, recA, rpoA, and rpoD). The 36 species were divided into three different groups based on different GC content (32&#x2013;38%, 50&#x2013;53%, and 64&#x2013;69%), containing 14, 11, and 11 species, respectively. We took the GC-rich data consisting of 11 bacterial species and a data of 25 species containing both GC-poor and GC-rich bacteria. The MSAs of both data were generated by ClustalW (<xref ref-type="bibr" rid="B18">Larkin et al., 2007</xref>) and further fed into to QS-Net, Quartet-Net (<xref ref-type="bibr" rid="B38">Yang et al., 2013</xref>), Neighbor-Net (<xref ref-type="bibr" rid="B5">Bryant and Moulton, 2004</xref>), Split-Decomposition (<xref ref-type="bibr" rid="B2">Bandelt and Dress, 1992</xref>), and Neighbor-Joining (<xref ref-type="bibr" rid="B27">Saitou and Nei, 1987</xref>). We ran the program on an MSI laptop with 2.8-GHz processor and 8-GB memory. A comparison of runtime between QS-Net and Quartet-Net on all data sets is shown in <xref ref-type="table" rid="T4">
<bold>Table 4</bold>
</xref>; the time statistics for three artificial data sets are the average of all 100 runtimes. The Neighbor-Joining method has the least runtime, and all other three methods can produce results in less than 2 s on all data sets. The reconstructed results were then viewed by SplitsTree4 (<xref ref-type="bibr" rid="B15">Huson and Bryant, 2006</xref>). Only three split networks reconstructed by QS-Net and Quartet-Net method on bacterial data set are shown in <xref ref-type="fig" rid="f3">
<bold>Figures 3</bold>
</xref> and <xref ref-type="fig" rid="f4">
<bold>4</bold>
</xref>.</p>
<table-wrap id="T4" position="float">
<label>Table 4</label>
<caption>
<p>A comparison of runtime between QS-Net and Quartet-Net on all data sets.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top">Method</th>
<th valign="top">Tree</th>
<th valign="top">Network (3)</th>
<th valign="top">Network (5)</th>
<th valign="top">GC rich</th>
<th valign="top">GC poor and rich</th>
<th valign="top">Influenza</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top">QS-Net</td>
<td valign="top">1.25&#xa0;s</td>
<td valign="top">6.02&#xa0;s</td>
<td valign="top">24.39&#xa0;s</td>
<td valign="top">0.92&#xa0;s</td>
<td valign="top">9.49&#xa0;min</td>
<td valign="top">3.22&#xa0;min</td>
</tr>
<tr>
<td valign="top">Quartet-Net</td>
<td valign="top">0.20&#xa0;s</td>
<td valign="top">1.05&#xa0;s</td>
<td valign="top">4.05&#xa0;s</td>
<td valign="top">0.19&#xa0;s</td>
<td valign="top">10.17&#xa0;s</td>
<td valign="top">4.54&#xa0;s</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="f3" position="float">
<label>Figure 3</label>
<caption>
<p>The reconstructed QS-Net network of 11 GC-rich bacteria.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fgene-10-00607-g003.tif"/>
</fig>
<fig id="f4" position="float">
<label>Figure 4</label>
<caption>
<p>The reconstructed network on 25 GC-poor or GC-rich bacteria. <bold>(A)</bold> The reconstructed QS-Net network of 25 GC-poor or GC-rich bacteria. <bold>(B)</bold> The reconstructed Quartet-Net network of the 25 bacteria.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fgene-10-00607-g004.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="f3">
<bold>Figure 3</bold>
</xref> shows the phylogenetic network of 11 GC-rich bacterial sequence data set by using QS-Net, which is basically consistent with the experimental results in <xref ref-type="bibr" rid="B30">Takahashi and Kryukov (2009)</xref>. The reconstructed networks of 25 GC-poor or GC-rich (32&#x2013;38% and 64&#x2013;69%) sequence data set reconstructed by QS-Net and Quartet-Net are shown in <xref ref-type="fig" rid="f4">
<bold>Figures 4A, B</bold>
</xref>, respectively. As can be seen from the figures, the differences between QS-Net and Quartet-Net are quite obvious. There are two distinct parallelograms that represent the reticulate evolution event in the reconstructed network in <xref ref-type="fig" rid="f4">
<bold>Figure 4A</bold>
</xref> but not in <xref ref-type="fig" rid="f4">
<bold>Figure 4B</bold>
</xref>, which might be neglected by Quartet-Net due to its inability to identify complicated reticulate events. The numbers of full splits reconstructed by the five methods on bacterial data set and the influenza data set are also listed in <xref ref-type="table" rid="T5">
<bold>Table 5</bold>
</xref>. QS-Net constructs a moderate total number of splits among all comparison methods, probably because the full resolution of taxa is not achieved. In the GC-rich data set, Neighbor-Net constructs three more splits than does QS-Net, while in the GC-poor and GC-rich data set, Neighbor-Net constructs 29 more splits than does QS-Net. In addition, by comparing <xref ref-type="fig" rid="f3">
<bold>Figures 3</bold>
</xref> and <xref ref-type="fig" rid="f4">
<bold>4A</bold>
</xref>, it can be found that GC content may have an important influence on the evolutionary history of bacteria.</p>
<table-wrap id="T5" position="float">
<label>Table 5</label>
<caption>
<p>The number of full splits reconstructed by five methods on bacterial data set and the influenza data set.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top">Data set</th>
<th valign="top">QS-Net</th>
<th valign="top">Quartet-Net</th>
<th valign="top">Neighbor-Net</th>
<th valign="top">Split-Decomposition</th>
<th valign="top">Neighbor-Joining</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top">GC rich</td>
<td valign="top">26</td>
<td valign="top">22</td>
<td valign="top">29</td>
<td valign="top">23</td>
<td valign="top">19</td>
</tr>
<tr>
<td valign="top">GC poor and rich</td>
<td valign="top">48</td>
<td valign="top">45</td>
<td valign="top">77</td>
<td valign="top">48</td>
<td valign="top">47</td>
</tr>
<tr>
<td valign="top">Influenza</td>
<td valign="top">47</td>
<td valign="top">45</td>
<td valign="top">68</td>
<td valign="top">36</td>
<td valign="top">41</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_5">
<title>Analysis on the Influenza Data</title>
<p>The data set consisted of the full genome sequence of 22 H7N9 influenza A viruses aligned by ClustalW (<xref ref-type="bibr" rid="B18">Larkin et al., 2007</xref>). These viruses have major relations with the H7N9 virus (<xref ref-type="bibr" rid="B10">Gao et al., 2013</xref>) that appeared in China in 2013, which caused human mortality. We estimated the phylogenetic relationships of these 22 influenza A viruses using Quartet-Net and QS-Net. The results are shown in <xref ref-type="fig" rid="f5">
<bold>Figures 5A, B</bold>
</xref>, respectively. <xref ref-type="table" rid="T5">
<bold>Table 5</bold>
</xref> lists the numbers of full splits reconstructed by the five methods on bacterial data set and the influenza data set. General split networks do not actually represent explicit evolutionary events, which makes the interpretation and comparison of reconstruction methods on real data set difficult. So we list the number of splits built by various methods. As can be seen in <xref ref-type="table" rid="T4">
<bold>Table 4</bold>
</xref>, QS-Net reconstructs 47 full splits, while Quartet-Net reconstructs 45 full splits.</p>
<fig id="f5" position="float">
<label>Figure 5</label>
<caption>
<p>The reconstructed network on influenza data. <bold>(A)</bold> The reconstructed Quartet-Net network related to H7N9 influenza A viruses. <bold>(B)</bold> The reconstructed QS-Net network related to H7N9 influenza A viruses.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fgene-10-00607-g005.tif"/>
</fig>
<p>The three viruses that caused human death (A/Shanghai/1/2013, A/Shanghai/2/2013, and A/Anhui/1/2013) were combined. The phylogenetic network indicates that these H7N9 viruses may be derived from the reassortment from influenza subtypes, including avian-origin H7N9 viruses, H9N2 viruses, and H7N3 viruses. In <xref ref-type="fig" rid="f5">
<bold>Figure 5B</bold>
</xref> (constructed by QS-Net), the internal region surrounded by H7N9, H7N7, and H7N3 is more complex than <xref ref-type="fig" rid="f5">
<bold>Figure 5A</bold>
</xref> (constructed by Quartet-Net), which indicates that the true evolutionary history of H7N9 influenza A viruses is very complex. Of course, the real evolutionary history is unknown, but at least the results constructed by QS-Net are consistent with a few previous findings.</p>
</sec>
</sec>
<sec id="s4" sec-type="conclusions">
<title>Conclusions</title>
<p>QS-Net is a method generalizing Quartet-Net. Both simulation studies and real data analyses show that QS-Net has the potential to reconstruct more accurate phylogenetic relationships than its competitors like Quartet-Net and Neighbor-Net. However, the method runs slower than other algorithms, and the major computational difficulty lies in the calculation of 3|4 splits. Nevertheless, the difficulty will be partially resolved with the development of high-speed computers and parallel algorithms. Thus, we believe QS-Net will be useful in identifying more complex reticulate events that will be ignored by other network reconstruction algorithms.</p>
</sec>
<sec id="s5">
<title>Author Contributions</title>
<p>JY and BL conceived the concept of the work and designed the experiments. MT, HL, ZC, and JZ performed literature search. MT, HL, DY, and GT collected and analyzed the data. MT and JY wrote the paper. All authors have approved the final manuscript.</p>
</sec>
<sec id="s6" sec-type="funding-information">
<title>Funding</title>
<p>This work was supported by Hainan Provincial Innovation research team (No. 2019CXTD405), National Natural Science Foundation of China (No. 61762034), Hainan Provincial Natural Science Foundation of China (No.618MS057, No.617122) , Hainan Provincial major scientific and technological plans (No.ZDKJ2017012), Natural Science Foundation of Hunan, China (Nos. 2018JJ2461 and 2018JJ3568), New Century Excellent Talents in university (No. NCET-10-0365), National Nature Science Foundation of China (Nos 11171369, 61272395, 61370171, 61300128, 61472127, 61572178, 61672214, and 61702054).</p>
</sec>
<sec id="s7">
<title>Conflict of Interest Statement</title>
<p>Authors DY and GT were employed by company Geneis (Beijing) Co. Ltd. The remaining authors declare no competing interests.</p>
</sec>
</body>
<back>
<sec sec-type="supplementary-material" id="s8">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2019.00607/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2019.00607/full#supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Table_1.docx" id="SM1" mimetype="application/docx"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Avni</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Cohen</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Snir</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Weighted quartets phylogenetics</article-title>. <source>Syst. Biol.</source> <volume>64</volume> (<issue>2</issue>), <fpage>233</fpage>&#x2013;<lpage>242</lpage>. doi: <pub-id pub-id-type="doi">10.1093/sysbio/syu087</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bandelt</surname> <given-names>H. J.</given-names>
</name>
<name>
<surname>Dress</surname> <given-names>A. W. M.</given-names>
</name>
</person-group> (<year>1992</year>). <article-title>Split decomposition: a new and useful approach to phylogenetic analysis of distance data</article-title>. <source>Mol. Phylogenet. Evol.</source> <volume>1</volume> (<issue>3</issue>), <fpage>242</fpage>&#x2013;<lpage>252</lpage>. doi: <pub-id pub-id-type="doi">10.1016/1055-7903(92)90021-8</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bastide</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Sol&#xed;s-Lemus</surname> <given-names>C.</given-names>
</name>
<name>
<surname>An&#xe9;</surname> <given-names>C.</given-names>
</name>
<name>
<surname>William Sparks</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Kriebel</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Phylogenetic comparative methods on phylogenetic networks with reticulations</article-title>. <source>Syst. Biol.</source> <volume>67</volume> (<issue>5</issue>), <fpage>800</fpage>&#x2013;<lpage>820</lpage>. doi: <pub-id pub-id-type="doi">10.1093/sysbio/syy033</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Berry</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Gascuel</surname> <given-names>O.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Inferring evolutionary trees with strong combinatorial evidence</article-title>. <source>Theor. Comput. Sci.</source> <volume>240</volume> (<issue>2</issue>), <fpage>271</fpage>&#x2013;<lpage>298</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S0304-3975(99)00235-2</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bryant</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Moulton</surname> <given-names>V.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Neighbor-Net: an agglomerative method for the construction of phylogenetic networks</article-title>. <source>Mol. Biol. Evol.</source> <volume>21</volume> (<issue>2</issue>), <fpage>255</fpage>&#x2013;<lpage>265</lpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/msh018</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cartwright</surname> <given-names>R. A.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>DNA assembly with gaps (Dawg): simulating sequence evolution</article-title>. <source>Bioinformatics</source> <volume>21</volume> <supplement>Suppl 3</supplement> (<issue>Suppl_3</issue>), <fpage>iii31</fpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/bti1200</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Claudia</surname> <given-names>S. L.</given-names>
</name>
<name>
<surname>C&#xe9;cile</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Inferring phylogenetic networks with maximum pseudolikelihood under incomplete lineage sorting</article-title>. <source>PLoS Genet.</source> <volume>12</volume> (<issue>3</issue>), <elocation-id>e1005896</elocation-id>. doi: <pub-id pub-id-type="doi">10.1371/journal.pgen.1005896</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Eslahchi</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Habibi</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Hassanzadeh</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Mottaghi</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>MC-Net: a method for the construction of phylogenetic networks based on the Monte-Carlo method</article-title>. <source>BMC Evol. Biol.</source> <volume>10</volume> (<issue>1</issue>), <fpage>254</fpage>. doi: <pub-id pub-id-type="doi">10.1186/1471-2148-10-254</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">Fenderson and Bruce, (<year>2008</year>). <article-title>Molecular biology of the cell, 5th Edition</article-title>. <source>Shock</source> <volume>30</volume> (<issue>1</issue>), <fpage>100</fpage>. doi: <pub-id pub-id-type="doi">10.1097/01.shk.0000286288.33338.f6</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>W.</given-names>
</name>
<etal/>
</person-group>. (<year>2013</year>). <article-title>Human infection with a novel avian-origin influenza A (H7N9) virus</article-title>. <source>N. Engl. J. Med.</source> <volume>368</volume> (<issue>20</issue>), <fpage>1888</fpage>. doi: <pub-id pub-id-type="doi">10.1056/NEJMoa1304459</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gr&#xfc;newald</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Forslund</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Dress</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Moulton</surname> <given-names>V.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>QNet: an agglomerative method for the construction of phylogenetic networks from weighted quartets</article-title>. <source>Mol. Biol. Evol.</source> <volume>24</volume> (<issue>2</issue>), <fpage>532</fpage>&#x2013;<lpage>538</lpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/msl180</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Grunewald</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Spillner</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Bastkowski</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Bogershausen</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Moulton</surname> <given-names>V.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>SuperQ: computing supernetworks from quartets</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinform.</source> <volume>10</volume> (<issue>1</issue>), <fpage>151</fpage>&#x2013;<lpage>160</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TCBB.2013.8</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huber</surname> <given-names>K. T.</given-names>
</name>
<name>
<surname>Langton</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Penny</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Moulton</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Hendy</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Spectronet: a package for computing spectra and median networks</article-title>. <source>Appl. Bioinformatics</source> <volume>1</volume> (<issue>3</issue>), <fpage>159</fpage>&#x2013;<lpage>161</lpage>. doi: <pub-id pub-id-type="doi">10.1385/ABAB:97:1:01</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huelsenbeck</surname> <given-names>J. P.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Performance of phylogenetic methods in simulation</article-title>. <source>Syst. Biol.</source> <volume>44</volume> (<issue>1</issue>), <fpage>17</fpage>&#x2013;<lpage>48</lpage>. doi: <pub-id pub-id-type="doi">10.1093/sysbio/44.1.17</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huson</surname> <given-names>D. H.</given-names>
</name>
<name>
<surname>Bryant</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Application of phylogenetic networks in evolutionary studies</article-title>. <source>Mol. Biol. Evol.</source> <volume>23</volume> (<issue>2</issue>), <fpage>254</fpage>&#x2013;<lpage>267</lpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/msj030</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Huson</surname> <given-names>D. H.</given-names>
</name>
<name>
<surname>Rupp</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Scornavacca</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2010</year>). <source>Phylogenetic networks: concepts, algorithms and applications</source>. <publisher-loc>Cambridge</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name>. doi: <pub-id pub-id-type="doi">10.1017/CBO9780511974076</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Keijsper</surname> <given-names>J. C. M.</given-names>
</name>
<name>
<surname>Pendavingh</surname> <given-names>R. A.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Reconstructing a phylogenetic level-1 network from quartets </article-title>. <source>Bull. Math. Biol.</source> <volume>76</volume> (<issue>10</issue>), <fpage>2517</fpage>&#x2013;<lpage>2541</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11538-014-0022-z</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Larkin</surname> <given-names>M. A.</given-names>
</name>
<name>
<surname>Blackshields</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Brown</surname> <given-names>N. P.</given-names>
</name>
<name>
<surname>Chenna</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Mcgettigan</surname> <given-names>P. A.</given-names>
</name>
<name>
<surname>Mcwilliam</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2007</year>). <article-title>Clustal W and Clustal X version 2.0</article-title>. <source>Bioinformatics</source> <volume>23</volume> (<issue>21</issue>), <fpage>2947</fpage>&#x2013;<lpage>2948</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btm404</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Leducq</surname> <given-names>J.-B.</given-names>
</name>
<name>
<surname>Henault</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Charron</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Nielly-Thibault</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Terrat</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Fiumera</surname> <given-names>H. L.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Mitochondrial recombination and introgression during speciation by hybridization</article-title>. <source>Mol. Biol. Evol.</source> <volume>34</volume> (<issue>8</issue>), <fpage>1947</fpage>&#x2013;<lpage>1959</lpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/msx139</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lefort</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Desper</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Gascuel</surname> <given-names>O.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>FastME 2.0: a comprehensive, accurate, and fast distance-based phylogeny inference program</article-title>. <source>Mol. Biol. Evol.</source> <volume>32</volume> (<issue>10</issue>), <fpage>2798</fpage>&#x2013;<lpage>2800</lpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/msv150</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nelson</surname> <given-names>M. I.</given-names>
</name>
<name>
<surname>Viboud</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Simonsen</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Bennett</surname> <given-names>R. T.</given-names>
</name>
<name>
<surname>Griesemer</surname> <given-names>S. B.</given-names>
</name>
<name>
<surname>George</surname> <given-names>K. S.</given-names>
</name>
<etal/>
</person-group>. (<year>2008</year>). <article-title>Multiple reassortment events in the evolutionary history of H1N1 influenza A virus since 1918</article-title>. <source>PLoS Pathog.</source> <volume>4</volume> (<issue>2</issue>), <elocation-id>e1000012</elocation-id>. doi: <pub-id pub-id-type="doi">10.1371/journal.ppat.1000012</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ochman</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Lawrence</surname> <given-names>J. G.</given-names>
</name>
<name>
<surname>Groisman</surname> <given-names>E. A.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Lateral gene transfer and the nature of bacterial innovation</article-title>. <source>Nature</source> <volume>405</volume> (<issue>6784</issue>), <fpage>299</fpage>&#x2013;<lpage>304</lpage>. doi: <pub-id pub-id-type="doi">10.1038/35012500</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Park</surname> <given-names>H. J.</given-names>
</name>
<name>
<surname>Jin</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Nakhleh</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Bootstrap-based support of HGT inferred by maximum parsimony</article-title>. <source>BMC Evol. Biol.</source> <volume>10</volume> (<issue>1</issue>), <elocation-id>131</elocation-id>. doi: <pub-id pub-id-type="doi">10.1186/1471-2148-10-131</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pickrell</surname> <given-names>J. K.</given-names>
</name>
<name>
<surname>Pritchard</surname> <given-names>J. K.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Inference of population splits and mixtures from genome-wide allele frequency data</article-title>. <source>PLoS Genet.</source> <volume>8</volume> (<issue>11</issue>), <elocation-id>e1002967</elocation-id>. doi: <pub-id pub-id-type="doi">10.1371/journal.pgen.1002967</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ranwez</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Gascuel</surname> <given-names>O.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Improvement of distance-based phylogenetic methods by a local maximum likelihood approach using triplets</article-title>. <source>Mol. Biol. Evol.</source> <volume>19</volume> (<issue>11</issue>), <fpage>1952</fpage>&#x2013;<lpage>1963</lpage>. doi: <pub-id pub-id-type="doi">10.1093/oxfordjournals.molbev.a004019</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rieseberg</surname> <given-names>L. H.</given-names>
</name>
<name>
<surname>Baird</surname> <given-names>S. J. E.</given-names>
</name>
<name>
<surname>Gardner</surname> <given-names>K. A.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Hybridization, introgression, and linkage evolution</article-title>. <source>Plant Mol. Biol.</source> <volume>42</volume> (<issue>1</issue>), <fpage>205</fpage>&#x2013;<lpage>224</lpage>. doi: <pub-id pub-id-type="doi">10.1023/A:1006340407546</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Saitou</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Nei</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>1987</year>). <article-title>The neighbor-joining method: a new method for reconstructing phylogenetic trees</article-title>. <source>Mol. Biol. Evol.</source> <volume>4</volume> (<issue>4</issue>), <fpage>406</fpage>. doi: <pub-id pub-id-type="doi">10.1093/oxfordjournals.molbev.a040454</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sayyari</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Mirarab</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Anchoring quartet-based phylogenetic distances and applications to species tree reconstruction</article-title>. <source>BMC Genomics</source> <volume>17</volume> (<supplement>Suppl 10</supplement>), <fpage>101</fpage>&#x2013;<lpage>113</lpage>. doi: <pub-id pub-id-type="doi">10.1186/s12864-016-3098-z</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sol&#xed;s-Lemus</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Bastide</surname> <given-names>P.</given-names>
</name>
<name>
<surname>An&#xe9;</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>PhyloNetworks: a package for phylogenetic networks</article-title>. <source>Mol. Biol. Evol.</source> <volume>34</volume> (<issue>12</issue>), <fpage>3292</fpage>&#x2013;<lpage>3298</lpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/msx235</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Takahashi</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Kryukov</surname> <given-names>K. N.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Estimation of bacterial species phylogeny through oligonucleotide frequency distances</article-title>. <source>Genomics</source> <volume>93</volume> (<issue>6</issue>), <fpage>525</fpage>&#x2013;<lpage>533</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ygeno.2009.01.009</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tria</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Caglioti</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Loreto</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Pagnani</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>A stochastic local search algorithm for distance-based phylogeny reconstruction</article-title>. <source>Mol. Biol. Evol.</source> <volume>27</volume> (<issue>11</issue>), <fpage>2587</fpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/msq154</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vachaspati</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Warnow</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>ASTRID: accurate species trees from internode distances</article-title>. <source>BMC Genomics</source> <volume>16</volume> (<issue>10</issue>), <elocation-id>S3</elocation-id>. doi: <pub-id pub-id-type="doi">10.1186/1471-2164-16-S10-S3</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Van Iersel</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Kelk</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Rupp</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Huson</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Phylogenetic networks do not need to be complex: using fewer reticulations to represent conflicting clusters</article-title>. <source>Bioinformatics</source> <volume>26</volume> (<issue>12</issue>), <fpage>i124</fpage>&#x2013;<lpage>i131</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btq202</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vijaykrishna</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Mukerji</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Smith</surname> <given-names>G. J. D.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>RNA virus reassortment: an evolutionary mechanism for host jumps and immune evasion</article-title>. <source>PLoS Pathog.</source> <volume>11</volume> (<issue>7</issue>), <elocation-id>e1004902</elocation-id>. doi: <pub-id pub-id-type="doi">10.1371/journal.ppat.1004902</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wen</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Nakhleh</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Inferring phylogenetic networks using PhyloNet</article-title>. <source>Syst. Biol.</source> <volume>67</volume> (<issue>4</issue>), <fpage>735</fpage>&#x2013;<lpage>740</lpage>. doi: <pub-id pub-id-type="doi">10.1093/sysbio/syy015</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wheeler</surname> <given-names>T. J.</given-names>
</name>
</person-group> (<year>2009</year>). <source>Large-scale Neighbor-Joining with NINJA</source>. <publisher-loc>Berlin Heidelberg</publisher-loc>: <publisher-name>Springer</publisher-name>. doi: <pub-id pub-id-type="doi">10.1007/978-3-642-04241-6_31</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Willson</surname> <given-names>S. J.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Reconstruction of certain phylogenetic networks from their tree-average distances</article-title>. <source>Bull. Math. Biol.</source> <volume>75</volume> (<issue>10</issue>), <fpage>1840</fpage>&#x2013;<lpage>1878</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11538-013-9872-z</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Gr&#xfc;newald</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Wan</surname> <given-names>X. F.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Quartet-Net: a quartet-based method to reconstruct phylogenetic networks</article-title>. <source>Mol. Biol. Evol.</source> <volume>30</volume> (<issue>5</issue>), <fpage>1206</fpage>&#x2013;<lpage>1217</lpage>. doi: <pub-id pub-id-type="doi">10.1093/molbev/mst040</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Nakhleh</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>A maximum pseudo-likelihood approach for phylogenetic networks</article-title>. <source>BMC Genomics</source> <volume>16</volume> (<issue>10</issue>), <elocation-id>S10</elocation-id>. doi: <pub-id pub-id-type="doi">10.1186/1471-2164-16-S10-S10</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Nakhleh</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Than</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Degnan</surname> <given-names>J. H.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Coalescent histories on phylogenetic networks and detection of hybridization despite incomplete lineage sorting</article-title>. <source>Syst. Biol.</source> <volume>60</volume> (<issue>2</issue>), <fpage>138</fpage>&#x2013;<lpage>149</lpage>. doi: <pub-id pub-id-type="doi">10.1093/sysbio/syq084</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Ogilvie</surname> <given-names>H. A.</given-names>
</name>
<name>
<surname>Drummond</surname> <given-names>A. J.</given-names>
</name>
<name>
<surname>Stadler</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Bayesian inference of species networks from multilocus sequence data</article-title>. <source>Mol. Biol. Evol.</source> <volume>35</volume> (<issue>2</issue>), <fpage>504</fpage>&#x2013;<lpage>517</lpage>. doi: <pub-id pub-id-type="doi">10.1101/124982</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>