<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="review-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Bioinform.</journal-id>
<journal-title>Frontiers in Bioinformatics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Bioinform.</abbrev-journal-title>
<issn pub-type="epub">2673-7647</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">927312</article-id>
<article-id pub-id-type="doi">10.3389/fbinf.2022.927312</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Bioinformatics</subject>
<subj-group>
<subject>Review</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A Review of Feature Selection Methods for Machine Learning-Based Disease Risk Prediction</article-title>
<alt-title alt-title-type="left-running-head">Pudjihartono et al.</alt-title>
<alt-title alt-title-type="right-running-head">Feature Selection for Risk Prediction</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Pudjihartono</surname>
<given-names>Nicholas</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1865047/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Fadason</surname>
<given-names>Tayaza</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/466538/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Kempa-Liehr</surname>
<given-names>Andreas W.</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1569425/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>O&#x27;Sullivan</surname>
<given-names>Justin M.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/30367/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Liggins Institute</institution>, <institution>University of Auckland</institution>, <addr-line>Auckland</addr-line>, <country>New Zealand</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Maurice Wilkins Centre for Molecular Biodiscovery</institution>, <addr-line>Auckland</addr-line>, <country>New Zealand</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Engineering Science</institution>, <institution>The University of Auckland</institution>, <addr-line>Auckland</addr-line>, <country>New Zealand</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>MRC Lifecourse Epidemiology Unit</institution>, <institution>University of Southampton</institution>, <addr-line>Southampton</addr-line>, <country>United Kingdom</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Singapore Institute for Clinical Sciences</institution>, <institution>Agency for Science, Technology and Research (A&#x2a;STAR)</institution>, <addr-line>Singapore</addr-line>, <country>Singapore</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>Australian Parkinson&#x2019;s Mission</institution>, <institution>Garvan Institute of Medical Research</institution>, <addr-line>Sydney</addr-line>, <addr-line>NSW</addr-line>, <country>Australia</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1491704/overview">Andrea Tangherloni</ext-link>, University of Bergamo, Italy</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/875818/overview">Jin-Xing Liu</ext-link>, Qufu Normal University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/876620/overview">Yongqing Zhang</ext-link>, Chengdu University of Information Technology, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/816662/overview">Yuanyuan Zhang</ext-link>, Qingdao University of Technology, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/759689/overview">Shouheng Tuo</ext-link>, Xi&#x2019;an University of Posts and Telecommunications, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Andreas W. Kempa-Liehr, <email>a.kempa-liehr@auckland.ac.nz</email>; Justin M. O&#x27;Sullivan, <email>justin.osullivan@auckland.ac.nz</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Integrative Bioinformatics, a section of the journal Frontiers in Bioinformatics</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>27</day>
<month>06</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>2</volume>
<elocation-id>927312</elocation-id>
<history>
<date date-type="received">
<day>24</day>
<month>04</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>03</day>
<month>06</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Pudjihartono, Fadason, Kempa-Liehr and O&#x27;Sullivan.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Pudjihartono, Fadason, Kempa-Liehr and O&#x27;Sullivan</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Machine learning has shown utility in detecting patterns within large, unstructured, and complex datasets. One of the promising applications of machine learning is in precision medicine, where disease risk is predicted using patient genetic data. However, creating an accurate prediction model based on genotype data remains challenging due to the so-called &#x201c;curse of dimensionality&#x201d; (i.e., extensively larger number of features compared to the number of samples). Therefore, the generalizability of machine learning models benefits from feature selection, which aims to extract only the most &#x201c;informative&#x201d; features and remove noisy &#x201c;non-informative,&#x201d; irrelevant and redundant features. In this article, we provide a general overview of the different feature selection methods, their advantages, disadvantages, and use cases, focusing on the detection of relevant features (i.e., SNPs) for disease risk prediction.</p>
</abstract>
<kwd-group>
<kwd>machine learing</kwd>
<kwd>feature selection (FS)</kwd>
<kwd>risk prediction</kwd>
<kwd>disease risk prediction</kwd>
<kwd>statistical approaches</kwd>
</kwd-group>
<contract-sponsor id="cn001">University of Auckland<named-content content-type="fundref-id">10.13039/501100001537</named-content>
</contract-sponsor>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<sec id="s1-1">
<title>1.1 Precision Medicine and Complex Disease Risk Prediction</title>
<p>The advancement of genetic sequencing technology over the last decade has re-ignited interest in precision medicine and the goal of providing healthcare based on a patient&#x2019;s individual genetic features (<xref ref-type="bibr" rid="B108">Spiegel and Hawkins, 2012</xref>). Prediction of complex disease risk (e.g., type 2 diabetes, obesity, cardiovascular diseases, etc&#x2026;) is emerging as an early success story. Successful prediction of individual disease risk has the potential to aid in disease prevention, screening, and early treatment for high-risk individuals (<xref ref-type="bibr" rid="B124">Wray et al., 2007</xref>; <xref ref-type="bibr" rid="B7">Ashley et al., 2010</xref>; <xref ref-type="bibr" rid="B79">Manolio, 2013</xref>).</p>
<p>Genome-wide association studies (GWAS) have identified single nucleotide polymorphisms (SNPs) within the human genome that are associated with complex diseases at the population level (<xref ref-type="bibr" rid="B3">Altshuler et al., 2008</xref>; <xref ref-type="bibr" rid="B32">Donnelly, 2008</xref>; <xref ref-type="bibr" rid="B52">Hindorff et al., 2009</xref>). However, most of the SNPs that have been associated with phenotypes have small effect sizes (<xref ref-type="bibr" rid="B116">Visscher et al., 2017</xref>), and collectively they only explain a fraction of the estimated heritability for each phenotype (<xref ref-type="bibr" rid="B78">Makowsky et al., 2011</xref>). This is known as the <italic>missing heritability</italic> problem. One possible explanation for the missing heritability is that GWAS typically utilize univariate filter techniques (such as the &#x3c7;<sup>2</sup> test) to evaluate a SNP&#x2019;s association with a phenotype SNP separately (<xref ref-type="bibr" rid="B49">Han et al., 2012</xref>). While univariate filter techniques are popular because of their simplicity and scalability, they do not account for the complex interactions between SNPs (i.e., epistasis effects). Ignoring interactions amongst genetic features might explain a significant portion of the missing heritability of complex diseases (<xref ref-type="bibr" rid="B77">Maher, 2008</xref>; <xref ref-type="bibr" rid="B62">K&#xf6;nig et al., 2016</xref>). Furthermore, being population-based, GWAS do not provide a model for predicting individual genetic risk. Thus, translation of GWAS association to individualized risk prediction requires quantification of the predictive utility of the SNPs that are identified. Typically, genetic risk prediction models are built by: 1) Polygenic risk scoring; or 2) Machine learning (ML) (<xref ref-type="bibr" rid="B1">Abraham and Inouye, 2015</xref>).</p>
</sec>
<sec id="s1-2">
<title>1.2 Machine Learning for Individualized Complex Disease Risk Prediction</title>
<p>ML-based approaches are a potentially effective way of predicting individualized disease risk (<xref ref-type="fig" rid="F1">Figure 1</xref>). Unlike other popular predictive models (e.g., Polygenic Risk Scores, which use a fixed additive model), ML has the potential to account for complex interactions between features (i.e. SNP-SNP interaction) (<xref ref-type="bibr" rid="B53">Ho et al., 2019</xref>). ML algorithms utilize a set of advanced function-approximation algorithms (e.g., support-vector machine, random forests, K-nearest neighbor, artificial neural network, etc&#x2026;) to create a model that maps the association between a set of risk SNPs and a particular phenotype (<xref ref-type="bibr" rid="B65">Kruppa et al., 2012</xref>; <xref ref-type="bibr" rid="B82">Mohri et al., 2018</xref>; <xref ref-type="bibr" rid="B112">Uddin et al., 2019</xref>). Thus, a patient&#x2019;s genotype data can be used as an input to the predictive ML algorithm to predict their risk for developing a disease (<xref ref-type="fig" rid="F1">Figure 1B</xref>).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>
<bold>(A)</bold> Generalized workflow for creating a predictive ML model from a genotype dataset. <bold>(B)</bold> The final model can then be used for disease risk prediction.</p>
</caption>
<graphic xlink:href="fbinf-02-927312-g001.tif"/>
</fig>
<p>The prediction of disease risk using SNP genotype data can be considered as a binary classification problem within supervised learning. There is a generalized workflow for creating a predictive ML model from a case-control genotype dataset (<xref ref-type="fig" rid="F1">Figure 1A</xref>). The first step is data pre-processing, which includes quality control and feature selection (<xref ref-type="fig" rid="F1">Figure 1A</xref>, step 1). Quality control includes, but is not limited to, removing low-quality SNPs (e.g., those with low call rates or that deviate from the Hardy-Weinberg Equilibrium), and samples (e.g. individuals with missing genotypes). SNPs with low minimum allele frequency (e.g., less than 0.01) can also be removed. Feature selection reduces the training dataset&#x2019;s dimensionality by choosing only features that are relevant to the phenotype. Feature selection is crucial in order to produce a model that generalizes well to unseen cohorts (see <xref ref-type="sec" rid="s1-3">Section 1.3</xref>). The goal of data pre-processing is to produce a high-quality dataset with which to train the prediction model.</p>
<p>The second step in a generalized predictive ML modelling workflow is the selection of the specific learning algorithm and setting the learning parameters (i.e. the &#x201c;hyperparameters&#x201d;) (<xref ref-type="fig" rid="F1">Figure 1A</xref>, step 2). Hyperparameters are algorithm-specific parameters whose values are set before training. Examples include the number of trees in a random forest, the type of kernel in an SVM, or the number of hidden layers in an artificial neural network. Different learning algorithms use different hyperparameters, and their values affect the complexity and learning behaviour of the model.</p>
<p>Once the hyperparameters have been set, the pre-processed dataset is used to train the chosen algorithm (<xref ref-type="fig" rid="F1">Figure 1A</xref>, step 3). This training step allows the algorithm to &#x201c;learn&#x201d; the association between the features (i.e., SNPs) and the class labels (i.e., phenotype status). Once learnt, the trained model&#x2019;s predictive performance (e.g. accuracy, precision, AUC) is validated (<xref ref-type="fig" rid="F1">Figure 1A</xref>, step 4). This is typically performed by K-fold cross-validation to estimate the model&#x2019;s performance on unseen data. Cross-validation on unseen data ensures that the trained model does not overfit the training data. During cross-validation, the training dataset is equally split into K parts, and each part will be used as a validation/testing set. For example, in 5-fold (K &#x3d; 5) cross-validation, the dataset is divided into 5 equal parts. The model is then trained on four of these parts and the performance is tested on the one remaining part. This process is repeated five times until all sections have been used as the testing set. The average performance of the model across all testing sets is then calculated.</p>
<p>The estimated model performance from cross-validation can be used as a guide for iterative refinement. During iterative refinement different aspects of the model building process (step 1&#x2013;4) are repeated and refined. For example, different: hyperparameters (hyperparameter tuning); learning algorithms, feature selection methods, or quality control thresholds can all be tried. The combination that produces the best average performance (in cross-validation) is chosen to build the final classification model. The process of selecting the best model development pipeline is known as model selection. The final classification model can then be tested against an independent (external) dataset to confirm the model&#x2019;s predictive performance, and finally be used for disease risk prediction (<xref ref-type="fig" rid="F1">Figure 1B</xref>).</p>
</sec>
<sec id="s1-3">
<title>1.3 Feature Selection to Reduce SNP Data Dimensionality</title>
<p>Overcoming the curse of dimensionality is one of the biggest challenges in building an accurate predictive ML model from high dimensional data (e.g. genotype or GWAS data). For example, a typical case-control genotype dataset used in a GWAS can contain up to a million SNPs and only a few thousands of samples (<xref ref-type="bibr" rid="B109">Szymczak et al., 2009</xref>). Using such data directly to train the ML classification algorithms is likely to generate an overfitted model, which performs well on the training data but poorly on unseen data. Overfitting happens when the model picks up the noise and random fluctuations in the training data as a learned concept. Furthermore, the excessive number of features increases the learning and computational time significantly because the irrelevant and redundant features clutter the learning algorithm (<xref ref-type="bibr" rid="B131">Yu and Liu, 2004</xref>).</p>
<p>Feature selection is a common way to minimize the problem of excessive and irrelevant features (<xref ref-type="fig" rid="F2">Figure 2</xref>). Generally, feature selection methods reduce the dimensionality of the training data by excluding SNPs that: 1) have low or negligible predictive power for the phenotype class; and 2) are redundant to each other (<xref ref-type="bibr" rid="B87">Okser et al., 2014</xref>). Effective feature selection can increase learning efficiency, predictive accuracy, and reduce the complexity of the learned results (<xref ref-type="bibr" rid="B61">Koller and Sahami, 1996</xref>; <xref ref-type="bibr" rid="B60">Kohavi and John, 1997</xref>; <xref ref-type="bibr" rid="B48">Hall, 2000</xref>). Furthermore, the SNPs that are incorporated into the predictive model (following feature selection) are typically assumed to be associated with loci that are mechanistically or functionally related to the underlying disease etiology (<xref ref-type="bibr" rid="B91">Pal and Foody, 2010</xref>; <xref ref-type="bibr" rid="B72">L&#xf3;pez et al., 2018</xref>). Therefore, extracting a subset of the most relevant features (through feature selection) could help researchers to understand the biological process(es) that underlie the disease (<xref ref-type="bibr" rid="B28">Cueto-L&#xf3;pez et al., 2019</xref>). In this context, feature selection can be said to be analogous to the identification of SNPs that are associated with phenotypes in GWAS.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Illustration of feature selection process. <bold>(A)</bold> The original dataset may contain an excessive number of features and a lot of irrelevant SNPs. <bold>(B)</bold> Feature selection reduces the dimensionality of the dataset by excluding irrelevant features and including only those features that are relevant for prediction. The reduced dataset contains relevant SNPs (rSNPs) which can be used to train the learning algorithm. N<sub>o</sub>: original number of features, N<sub>r</sub>: number of remaining relevant SNPs.</p>
</caption>
<graphic xlink:href="fbinf-02-927312-g002.tif"/>
</fig>
</sec>
<sec id="s1-4">
<title>1.4 The Problem of Feature Redundancy and Feature Interaction in SNP Genotype Dataset</title>
<p>GWAS typically identify multiple SNPs close to each other within a genetic window to be associated with a disease (<xref ref-type="bibr" rid="B20">Broekema et al., 2020</xref>). This occurs because of linkage disequilibrium (LD), which is the correlation between nearby variants such that they are inherited together within a population more often than by random chance (<xref ref-type="fig" rid="F3">Figure 3</xref>). In ML and prediction contexts, these highly correlated SNPs can be considered redundant because they carry similar information and can substitute for each other. The inclusion of redundant features has been shown to degrade ML performance and increase computation time (<xref ref-type="bibr" rid="B66">Kubus, 2019</xref>; <xref ref-type="bibr" rid="B29">Danasingh et al., 2020</xref>). Therefore, ideally, feature selection techniques should select one SNP (e.g., the SNP with the highest association score) to represent the entire LD cluster as a feature for prediction. However, since the SNP with the highest association signal is not necessarily the causal variant of that locus (<xref ref-type="bibr" rid="B89">Onengut-Gumuscu et al., 2015</xref>), geneticists often link an association signal to the locus they belong to rather than the SNP itself (<xref ref-type="bibr" rid="B21">Brzyski et al., 2017</xref>). If a researcher aims to identify the true causal variant within an association locus then fine-mapping techniques must be employed (see (<xref ref-type="bibr" rid="B107">Spain and Barrett, 2015</xref>; <xref ref-type="bibr" rid="B20">Broekema et al., 2020</xref>))</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Lead SNPs in GWAS studies need not be the causal variant due to linkage disequilibrium. Illustration of GWAS result where SNPs (circles) are colored according to linkage disequilibrium (LD) strength with the true causal variant within the locus (indicated with a black star). Due to LD, several SNPs near the true causal variant may show a statistically significant association with the phenotype. In ML, these highly correlated SNPs can be considered redundant to each other, therefore only one representative SNP for this LD cluster is required as a selected feature. In this example, the causal variant is not the variant with the strongest GWAS association signal.</p>
</caption>
<graphic xlink:href="fbinf-02-927312-g003.tif"/>
</fig>
<p>Relevant features may appear irrelevant (or weakly relevant) on their own but are highly correlated to the class in the presence of other features. This situation arises because these features are only relevant to the phenotype when they interact with other features (i.e., they are epistatic). <xref ref-type="fig" rid="F4">Figure 4</xref> shows a simplified example of a feature interaction that arises because of epistasis. In this example, there is an equal number of SNP 1 &#x3d; AA, Aa, or aa in cases and controls, which means that SNP 1 does not affect the distribution of the phenotype class. The same is true for SNP 2. However, the allele combinations between SNP1 and SNP2 does affect phenotype distribution. For example, there are more combinations of SNP1 &#x3d; AA and SNP2 &#x3d; AA in cases than controls, consistent with this allele combination conferring increased risk (<xref ref-type="fig" rid="F4">Figure 4B</xref>).</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>The functional impacts of SNPs can interact and may be epistatic. <bold>(A)</bold> Individually, neither SNP1 nor SNP2 affect phenotype distribution. <bold>(B)</bold> Taken together, allele combinations between SNP1 and SNP2 can affect phenotype distribution (marked with yellow star).</p>
</caption>
<graphic xlink:href="fbinf-02-927312-g004.tif"/>
</fig>
<p>It is generally advisable to consider both feature redundancy and feature interaction during feature selection. This is especially true when dealing with genotype data, where linkage disequilibrium (LD) and the non-random association of alleles create redundant SNPs within loci. Moreover, complex epistatic interactions between SNPs can account for some of the missing heritability of complex diseases and should be considered when undertaking feature selection. Indeed, studies have demonstrated the benefits to predictive power of ML approaches that consider feature interactions when compared to those that only consider simple additive risk contributions (<xref ref-type="bibr" rid="B27">Couronn&#xe9; et al., 2018</xref>; <xref ref-type="bibr" rid="B90">Ooka et al., 2021</xref>). However, searching for relevant feature interactions undoubtedly comes with additional computational costs. As such, deciding whether different aspects of it must be done (i.e., searching for relevant interactions) is a problem-specific question that depends upon the nature of the input data and the <italic>a priori</italic> assumptions of the underlying mechanisms of the disease. For example, if the genetic data originates from whole-genome sequencing (WGS), or a genotyping array, and the target phenotype is a complex disease (i.e. best explained by non-linear interactions between loci) then using a feature selection approach that considers interactions will be beneficial. By contrast, if the input genetic data does not uniformly cover the genome (i.e., the density of the SNPs is much higher in known disease associated loci; e.g. Immunochip genotyping array) then interactions may not aid the selection as the lack of data leads to potentially important interactions with SNPs outside known disease associated loci being missed. Furthermore, not all diseases are recognized as involving complex epistatic effects. In such cases, searching for feature interactions might lead to additional computation complexity without obvious predictive benefits. For example, Romagnoni et al. (<xref ref-type="bibr" rid="B98">Romagnoni et al., 2019</xref>) reported that searching for possible epistatic interactions did not yield a significant increase in predictive accuracy for Crohn&#x2019;s disease. Notably, the authors concluded that epistatic effects might make limited contributions to the genetic architecture of Crohn&#x2019;s disease, and the use of the Immunochip genotyping array might have caused interaction effects with SNPs outside of the known autoimmune risk loci to have been missed.</p>
<p>The goal of feature selection is to select a minimum subset of features (which includes individually relevant and interacting features) that can be used to explain the different classes with as little information loss as possible (<xref ref-type="bibr" rid="B131">Yu and Liu, 2004</xref>). It is possible that there are multiple possible minimum feature subsets due to redundancies. Thus, this is &#x201c;a minimum subset&#x201d; and not &#x201c;the minimum set.&#x201d;</p>
<p>In the remainder of this manuscript we discuss the advantages and disadvantages of representative filter, wrapper, and embedded methods of feature selection (<xref ref-type="sec" rid="s2">Section 2</xref>). We then assess expansions of these feature selection methods (e.g. hybrid, ensemble, and integrative methods; <xref ref-type="sec" rid="s3-1">Sections 3.1</xref>&#x2013;<xref ref-type="sec" rid="s3-2">3.2</xref>) and exhaustive search methods for higher-order (&#x2265;3) SNP-SNP interaction/epistasis effects (<xref ref-type="sec" rid="s4">Section 4</xref>).</p>
</sec>
</sec>
<sec id="s2">
<title>2 Feature Selection Techniques</title>
<p>The feature selection methods that are routinely used in classification can be split into three methodological categories (<xref ref-type="bibr" rid="B47">Guyon et al., 2008</xref>; <xref ref-type="bibr" rid="B14">Bol&#xf3;n-Canedo et al., 2013</xref>): 1) filters; 2) wrappers; and 3) embedded methods (<xref ref-type="table" rid="T1">Table 1</xref>). These methods differ in terms of 1) the feature selection aspect being separate or integrated as a part of the learning algorithm; 2) evaluation metrics; 3) computational complexities; 4) the potential to detect redundancies and interactions between features. The particular strengths and weaknesses of each methodological category mean they are more suitable for particular use cases (<xref ref-type="bibr" rid="B99">Saeys et al., 2007</xref>; <xref ref-type="bibr" rid="B88">Okser et al., 2013</xref>; <xref ref-type="bibr" rid="B31">De et al., 2014</xref>; <xref ref-type="bibr" rid="B96">Remeseiro and Bolon-Canedo, 2019</xref>) (<xref ref-type="table" rid="T1">Table 1</xref>).</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Strengths, weaknesses, and examples of the three main feature selection categories.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Feature Selection Method</th>
<th align="center">Strengths</th>
<th align="center">Weaknesses</th>
<th align="center">Examples</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="6" align="left">Filter&#x2014;Univariate</td>
<td align="left">- Fast and scalable</td>
<td align="left">- Feature dependencies not modeled</td>
<td align="left">- &#x3c7;<sup>2</sup>/chi-squared test</td>
</tr>
<tr>
<td align="left">- Independent of classifier</td>
<td align="left">- Interaction with classifer not modeled</td>
<td align="left">- Fisher&#x2019;s exact test</td>
</tr>
<tr>
<td align="left">- Reduce risk of overfitting</td>
<td align="left"/>
<td align="left">- Pearson correlation</td>
</tr>
<tr>
<td align="left"/>
<td align="left"/>
<td align="left">- Information gain</td>
</tr>
<tr>
<td align="left"/>
<td align="left"/>
<td align="left">- <italic>t</italic>-test</td>
</tr>
<tr>
<td align="left"/>
<td align="left"/>
<td align="left">- Mann-Whitney U test</td>
</tr>
<tr>
<td rowspan="3" align="left">Filter&#x2014;Multivariate</td>
<td align="left">- Can model feature dependencies</td>
<td align="left">- Slower and not as scalable as univariate filters</td>
<td align="left">- Fast correlation-based filter (FCBF) (<xref ref-type="bibr" rid="B131">Yu and Liu, 2004</xref>)</td>
</tr>
<tr>
<td align="left">- Independent of the classifier</td>
<td align="left">- Interaction with classifier not modeled</td>
<td align="left">- Minimal-redundancy-maximal-relevance (mRMR) (<xref ref-type="bibr" rid="B94">Peng et al., 2005</xref>)</td>
</tr>
<tr>
<td align="left">- Less risk of overfitting</td>
<td align="left"/>
<td align="left">- Relief-based algorithms (<xref ref-type="bibr" rid="B58">Kira and Rendell, 1992</xref>; <xref ref-type="bibr" rid="B63">Kononenko, 1994</xref>; <xref ref-type="bibr" rid="B83">Moore and White, 2007</xref>; <xref ref-type="bibr" rid="B41">Greene et al., 2009</xref>; <xref ref-type="bibr" rid="B42">Greene et al., 2010</xref>; <xref ref-type="bibr" rid="B40">Granizo-Mackenzie and Moore, 2013</xref>; <xref ref-type="bibr" rid="B114">Urbanowicz et al., 2018a</xref>)</td>
</tr>
<tr>
<td rowspan="4" align="left">Wrapper</td>
<td align="left">- Model feature dependencies</td>
<td align="left">- Slower than filter and embedded methods</td>
<td align="left">- Sequential forward and backward selection (<xref ref-type="bibr" rid="B59">Kittler, 1978</xref>)</td>
</tr>
<tr>
<td align="left">- Better performance than filter method</td>
<td align="left">- More prone to overfitting</td>
<td align="left">- Randomized hill climbing (<xref ref-type="bibr" rid="B106">Skalak, 1994</xref>)</td>
</tr>
<tr>
<td align="left">-Model interaction with classifier</td>
<td align="left">- Selected features are classifier dependent</td>
<td align="left">- Genetic algorithm (<xref ref-type="bibr" rid="B50">Hayes-Roth, 1975</xref>)</td>
</tr>
<tr>
<td align="left"/>
<td align="left"/>
<td align="left">- Recursive feature elimination</td>
</tr>
<tr>
<td rowspan="3" align="left">Embedded</td>
<td align="left">- Model feature dependencies</td>
<td align="left">- Slower than filter methods</td>
<td align="left">- Random forest (<xref ref-type="bibr" rid="B19">Breiman, 2001</xref>)</td>
</tr>
<tr>
<td align="left">- Faster than wrapper method</td>
<td align="left">- Selected features are classifier dependent</td>
<td align="left">- Lasso (L1) or elastic net regression</td>
</tr>
<tr>
<td align="left">- Model interaction with classifier</td>
<td align="left"/>
<td align="left"/>
</tr>
</tbody>
</table>
</table-wrap>
<sec id="s2-1">
<title>2.1 Filter Methods for Feature Selection</title>
<p>Filter methods use feature ranking as the evaluation metric for feature selection. Generally, features are ranked based on their scores in various statistical tests for their correlation with the class. Features that score below a certain threshold are removed, while features that score above it are selected. Once a subset of features is selected, it can then be presented as an input to the chosen classifier algorithm. Unlike the other feature selection methods (wrapper and embedded), filter methods are independent/separate from the classifier algorithm (<xref ref-type="fig" rid="F5">Figure 5A</xref>). This separation means that filter methods are free from classifier&#x2019;s bias which reduces overfitting. However, this independence also means that interaction with the classifier is not considered during feature selection (<xref ref-type="bibr" rid="B56">John et al., 1994</xref>). Thus, the selected feature set is more general and not fine-tuned to any specific classifier (<xref ref-type="bibr" rid="B133">Zhang et al., 2013</xref>). This lack of tuning means that filter methods tend to produce models that have reduced predictive performance compared to those produced by wrapper or embedded methods. The main advantage of filter methods over other feature selection methods is that they are generally less computationally demanding, and thus can easily be scaled to very high dimensional data (e.g. SNP genotype datasets).</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Generalized illustrations of methods. <bold>(A)</bold> Schematic of filter method, where feature selection is independent of the classifier. <bold>(B)</bold>The wrapper method. Feature selection relies on the performance of the classifier algorithm on the various generated feature subsets. <bold>(C)</bold> The embedded method. In embedded methods, feature selection is integrated as a part of the classifier algorithm. <bold>(D)</bold> Hybrid methods. In hybrid methods, features are reduced through the application of a filter method before the reduced feature set is passed through a wrapper or embedded method to obtain the final feature subset. <bold>(E)</bold> Integrative methods. In integrative methods, external information is used as a filter to reduce feature search space before the reduced feature set is passed through a wrapper or embedded method to obtain the final feature subset.</p>
</caption>
<graphic xlink:href="fbinf-02-927312-g005.tif"/>
</fig>
<p>Existing filter methods can be broadly categorized as either univariate or multivariate. Univariate methods test each feature individually, while multivariate methods consider a subset of features simultaneously. Due to their speed and simplicity, univariate methods (e.g., &#x3c7;2 test, Fisher&#x2019;s exact test, information gain, Euclidean distance, Pearson correlation, Mann-Whitney U test, <italic>t</italic>-test, etc...) have attracted the most attention in fields that work with high dimensional datasets (<xref ref-type="bibr" rid="B99">Saeys et al., 2007</xref>; <xref ref-type="bibr" rid="B16">Bol&#xf3;n-Canedo et al., 2014</xref>). However, since each feature is considered separately, univariate methods only focus on feature relevance and cannot detect feature redundancy, or interactions. This decreases model predictor performance because: 1) the inclusion of redundant features makes the feature subset larger than necessary; and 2) ignoring feature interactions can lead to the loss of important information.</p>
<p>More advanced multivariate filter techniques, including mutual information feature selection (MIFS) (<xref ref-type="bibr" rid="B9">Battiti, 1994</xref>), minimal-redundancy-maximal-relevance (mRMR) (<xref ref-type="bibr" rid="B94">Peng et al., 2005</xref>), conditional mutual information maximization (CMIM) (<xref ref-type="bibr" rid="B101">Schlittgen, 2011</xref>), and fast correlation-based filter (FCBF), (<xref ref-type="bibr" rid="B131">Yu and Liu, 2004</xref>), have been developed to detect relevant features and eliminate redundancies between features without information loss. Other algorithms like BOOST (<xref ref-type="bibr" rid="B118">Wan et al., 2010</xref>), FastEpistasis (<xref ref-type="bibr" rid="B102">Sch&#xfc;pbach et al., 2010</xref>), and TEAM (<xref ref-type="bibr" rid="B132">Zhang et al., 2010</xref>) have been designed to exhaustively search for all possible feature interactions. However, they are restricted to two-way (pairwise) interactions and they cannot eliminate redundancy. More recent algorithms (e.g., the feature selection based on relevance, redundancy and complementarity [FS-RRC] (<xref ref-type="bibr" rid="B68">Li et al., 2020</xref>), Conditional Mutual Information-based Feature Selection considering Interaction [CMIFSI] (<xref ref-type="bibr" rid="B70">Liang et al., 2019</xref>)) have been demonstrated to be able to detect feature interactions and eliminate redundancies. However, again, they are mostly constrained to pair-wise feature interactions. Another popular family of filter algorithms is the Relief-based algorithm (RBA) family (e.g., Relief (<xref ref-type="bibr" rid="B58">Kira and Rendell, 1992</xref>), ReliefF (<xref ref-type="bibr" rid="B63">Kononenko, 1994</xref>), TURF (<xref ref-type="bibr" rid="B83">Moore and White, 2007</xref>), SURF (<xref ref-type="bibr" rid="B41">Greene et al., 2009</xref>), SURF&#x2a; (<xref ref-type="bibr" rid="B42">Greene et al., 2010</xref>), MultiSURF (<xref ref-type="bibr" rid="B114">Urbanowicz et al., 2018a</xref>), MultiSURF&#x2a; (<xref ref-type="bibr" rid="B40">Granizo-Mackenzie and Moore, 2013</xref>), etc&#x2026;). Relief does not exhaustively search for feature interactions. Instead, it scores the importance of a feature according to how well the feature&#x2019;s value distinguishes samples that are similar to each other (e.g., similar genotype) but belong to different classes (e.g., case and control). Notably, RBAs can detect pair-wise feature interactions, some RBAs (e.g., ReliefF, MultiSURF) can even detect higher order (&#x3e;2 way) interactions (<xref ref-type="bibr" rid="B114">Urbanowicz et al., 2018a</xref>). However, RBAs cannot eliminate redundant features. Different RBAs have been reviewed and compared previously (<xref ref-type="bibr" rid="B114">Urbanowicz et al., 2018a</xref>; <xref ref-type="bibr" rid="B113">Urbanowicz et al., 2018b</xref>).</p>
<p>Despite its advantages, it should be noted that multivariate methods are more computationally heavy than univariate methods and thus cannot as effectively be scaled to very high dimensional data. Furthermore, multivariate filters still suffer from some of the same limitations as univariate filters due to their independence from the classifier algorithm (i.e., it ignores interaction with the classifier). In this context, wrapper and embedded methods represent an alternative way to perform multivariate feature selection while allowing for interactions with the classifier although again there is a computational cost (see <xref ref-type="sec" rid="s2-2">Sections 2.2</xref>, <xref ref-type="sec" rid="s2-3">2.3</xref>).</p>
<sec id="s2-1-1">
<title>2.1.1 The Multiple Comparison Correction Problem and Choosing the Appropriate Filter Threshold</title>
<p>Filter methods often return a ranked list of features rather than an explicit best subset of features (as occurs in wrapper methods). For example, univariate statistical approaches like &#x3c7;2 test and fisher exact test rank features based on <italic>p</italic> value. Due to the large number of hypothesis tests made, relying on the usual statistical significance threshold of <italic>p</italic> &#x3c; 0.05 will result in a preponderance of type 1 errors (false positive). As an illustration, if we perform hypothesis tests on 1 million SNPs at a <italic>p</italic> value threshold &#x3c;0.05, we can expect around 50,000 false positives, which is a considerable number. Therefore, choosing an appropriate threshold for relevant features adds a layer of complexity to predictive modelling when using feature selection methods that return ranked feature lists.</p>
<p>For methods that return a <italic>p</italic> value, the <italic>p</italic> value threshold is commonly adjusted by controlling for FWER (family-wise error rate) or FDR (false discovery rate). FWER is the probability of making at least one type 1 error across all tests performed (i.e., 5% FWER means there is 5% chance of making at least one type 1 error across all hypothesis tests). FWER can be controlled below a certain threshold (most commonly &#x3c;5%) by applying a Bonferroni correction (<xref ref-type="bibr" rid="B34">Dunn, 1961</xref>). The Bonferroni correction works by dividing the desired probability of type 1 error <italic>p</italic> (e.g., <italic>p</italic> &#x3c; 0.05) by the total number of independent hypotheses tested. This is a relatively conservative test that assumes that all the hypotheses being tested are independent of each other. However, this assumption is likely to be violated in genetic analyses where SNPs that are close to each other in the linear DNA sequence tend to be highly correlated due to LD (<xref ref-type="fig" rid="F3">Figure 3</xref>). Thus, the effective number of independent hypothesis tests is likely to be smaller than the number of SNPs examined. Not taking LD into account will lead to overcorrection for the number of tests performed. For example, the most commonly accepted <italic>p</italic> value threshold used in GWAS (<italic>p</italic> &#x3c; 5 &#xd7; 10<sup>&#x2212;8</sup>) is based on a Bonferroni correction on all independent common SNPs after taking account of the LD structure of the genome (<xref ref-type="bibr" rid="B33">Dudbridge and Gusnanto, 2008</xref>; <xref ref-type="bibr" rid="B127">Xu et al., 2014</xref>). Despite its widespread use in GWAS, this threshold has been criticized for being too conservative, leading to excessive false negatives (<xref ref-type="bibr" rid="B92">Panagiotou and Ioannidis, 2012</xref>). Panagiotou et al. (<xref ref-type="bibr" rid="B92">Panagiotou and Ioannidis, 2012</xref>) noted that a considerable number of legitimate and replicable associations can have <italic>p</italic> values just above this threshold; therefore, a possible relaxation of this commonly accepted threshold has been suggested.</p>
<p>Alternatively, one can apply <italic>p</italic> value adjustment to control for FDR instead of FWER. Controlling for FDR is a less stringent metric than controlling for FWER because it is the allowed proportion of false positives among all positive findings (i.e., 5% FDR means that approximately 5% of all positive findings are false). Despite potentially including more false positives in the selected features, FDR has been shown to be more attractive if prediction (rather than inference) is the end goal (<xref ref-type="bibr" rid="B2">Abramovich et al., 2006</xref>).</p>
<p>FDR can be controlled by applying the Benjamini-Hochberg (B-H) procedure (<xref ref-type="bibr" rid="B10">Benjamini and Hochberg, 1995</xref>). However, like the Bonferroni correction, the B-H procedure assumes independent hypothesis tests. To satisfy this assumption, for example, <xref ref-type="bibr" rid="B21">Brzyski et al. (2017)</xref> proposed a strategy that clusters tested SNPs based on LD before applying B&#x2013;H. Alternatively, there also exist procedures that control FDR without making any assumptions such as the Benjamini-Yekutieli (B-Y) procedure (<xref ref-type="bibr" rid="B11">Benjamini and Yekutieli, 2001</xref>). However, the B-Y procedure is more stringent, leading to less power compared to procedures that assume independence like B-H (<xref ref-type="bibr" rid="B35">Farcomeni, 2008</xref>).</p>
<p>The question remains, when applying a Bonferroni, B-H or B-Y correction, which FWER/FDR threshold is optimum (e.g., 5, 7, or 10%)? In a ML context, this threshold can be viewed as a hyperparameter. Thus, the optimum threshold that produces the best performance can be approximated by cross-validation as a part of the model selection process (<xref ref-type="fig" rid="F1">Figure 1A</xref>, step 5). The threshold for feature selection methods that do not directly produce a <italic>p</italic> value (e.g., multivariate algorithms like mRMR (<xref ref-type="bibr" rid="B94">Peng et al., 2005</xref>)) can also be chosen using cross validation (e.g. by taking the top <italic>n</italic> SNPs as the selected features).</p>
</sec>
</sec>
<sec id="s2-2">
<title>2.2 Wrapper Methods for Feature Selection</title>
<p>In contrast to filter methods, wrapper methods use the performance of the chosen classifier algorithm as a metric to aid the selection of the best feature subset (<xref ref-type="fig" rid="F5">Figure 5B</xref>). Thus, wrapper methods identify the best-performing set of features for the chosen classifier algorithm (<xref ref-type="bibr" rid="B46">Guyon and Elisseeff, 2003</xref>; <xref ref-type="bibr" rid="B96">Remeseiro and Bolon-Canedo, 2019</xref>). This is the main advantage of wrapper methods, and has been shown to result in higher predictive performance than can be obtained with filter methods (<xref ref-type="bibr" rid="B55">Inza et al., 2004</xref>; <xref ref-type="bibr" rid="B117">Wah et al., 2018</xref>; <xref ref-type="bibr" rid="B38">Ghosh et al., 2020</xref>). However, exhaustive searches of the total possible feature combination space are computationally infeasible (<xref ref-type="bibr" rid="B12">Bins and Draper, 2001</xref>). Therefore, heuristic search strategies across the space of possible feature subsets must be defined (e.g., randomized (<xref ref-type="bibr" rid="B80">Mao and Yang, 2019</xref>), sequential search (<xref ref-type="bibr" rid="B126">Xiong et al., 2001</xref>), genetic algorithm (<xref ref-type="bibr" rid="B129">Yang and Honavar, 1998</xref>; <xref ref-type="bibr" rid="B69">Li et al., 2004</xref>), ant colony optimization (<xref ref-type="bibr" rid="B37">Forsati et al., 2014</xref>), etc&#x2026;) to generate a subset of features. A specific classification algorithm is then trained and evaluated using the generated feature subsets. The classification performances of the generated subsets are compared, and the subset that results in the best performance [typically estimated using AUC (area under the receiver operating characteristic curve)] is chosen as the optimum subset. Practically, any search strategy and classifier algorithm can be combined to produce a wrapper method.</p>
<p>Wrapper methods implicitly take into consideration feature dependencies, including interactions and redundancies, during the selection of the best subset. However, due to the high number of computations required to generate the feature subsets and evaluate them, wrapper methods are computationally heavy (relative to filter and embedded methods) (<xref ref-type="bibr" rid="B24">Chandrashekar and Sahin, 2014</xref>). As such, applying wrapper methods to SNP genotype data is usually not favored, due to the very high dimensionality of SNP data sets (<xref ref-type="bibr" rid="B64">Kotzyba - Hibert et al., 1995</xref>; <xref ref-type="bibr" rid="B16">Bol&#xf3;n-Canedo et al., 2014</xref>).</p>
<p>Wrapper methods are dependent on the classifier used. Therefore, there is no guarantee that the selected features will remain optimum if another classifier is used. In some cases, using classifier performance as a guide for feature selection might produce a feature subset with good accuracy within the training dataset, but poor generalizability to external datasets) (i.e., more prone to overfitting) (<xref ref-type="bibr" rid="B60">Kohavi and John, 1997</xref>).</p>
<p>Unlike filter methods which produce a ranked list of features, wrapper methods produce a &#x201c;best&#x201d; feature subset as the output. This has both advantages and disadvantages. One advantage of this is that the user does not need to determine the most optimum threshold or number of features selected (because the output is already a feature subset). The disadvantage is that it is not immediately obvious which features are relatively more important within the set. Overall, this means that although wrapper methods can produce better classification performance, they are less useful in exposing the relationship between the features and the class.</p>
</sec>
<sec id="s2-3">
<title>2.3 Embedded Methods for Feature Selection</title>
<p>In an embedded method, feature selection is integrated or built into the classifier algorithm. During the training step, the classifier adjusts its internal parameters and determines the appropriate weights/importance given for each feature to produce the best classification accuracy. Therefore, the search for the optimum feature subset and model construction in an embedded method is combined in a single step (<xref ref-type="bibr" rid="B46">Guyon and Elisseeff, 2003</xref>) (<xref ref-type="fig" rid="F5">Figure 5C</xref>). Some examples of embedded methods include decision tree-based algorithms (e.g., decision tree, random forest, gradient boosting), and feature selection using regularization models (e.g., LASSO or elastic net). Regularization methods usually work with linear classifiers (e.g., SVM, logistic regression) by penalizing/shrinking the coefficient of features that do not contribute to the model in a meaningful way (<xref ref-type="bibr" rid="B88">Okser et al., 2013</xref>). It should be noted that like many filter methods, decision tree-based and regularization methods mentioned above also return a ranked list of features. Decision tree-based algorithms rank feature importance based on metrics like the Mean Decrease Impurity (MDI) (<xref ref-type="bibr" rid="B74">Louppe et al., 2013</xref>). For regularization methods, the ranking of features is provided by the magnitude of the feature coefficients.</p>
<p>Embedded methods are an intermediate solution between filter and wrapper methods in the sense that the embedded methods combine the qualities of both methods (<xref ref-type="bibr" rid="B45">Guo et al., 2019</xref>). Specifically, like filter methods, embedded methods are computationally lighter than wrapper methods (albeit still more demanding than filter methods). This reduced computational load occurs even though the embedded method allows for interactions with the classifier (i.e., it incorporates classifier&#x2019;s bias into feature selection, which tends to produce better classifier performance) as is done for wrapper methods.</p>
<p>Some embedded methods (i.e&#x2026;, random forest and other decision tree-based algorithms) do allow for feature interactions. Notably, unlike most multivariate filters, tree-based approaches can consider higher-order interactions (i.e., more than two). Historically, random forest is rarely applied directly to whole-genome datasets due to computational and memory constraints (<xref ref-type="bibr" rid="B109">Szymczak et al., 2009</xref>; <xref ref-type="bibr" rid="B103">Schwarz et al., 2010</xref>). For example, it has been shown that the original Random Forest algorithm (developed by Breiman and Cutler, 2004) can be applied to analyze no more than 10,000 SNPs (<xref ref-type="bibr" rid="B103">Schwarz et al., 2010</xref>). Indeed, many applications of random forest have been focused on low-dimensional dataset. For example, Bureau et al. (<xref ref-type="bibr" rid="B22">Bureau et al., 2005</xref>), identified relevant SNPs from a dataset of just 42 SNPs. Lopez et al. (<xref ref-type="bibr" rid="B72">L&#xf3;pez et al., 2018</xref>) implemented a random forest algorithm to identify relevant SNPs from a dataset that contains a total of 101 SNPs that have been previously associated with type 2 diabetes.</p>
<p>Nevertheless, recent advances in computational power, together with optimizations and modifications of the random forest algorithm (e.g., the Random Jungle (<xref ref-type="bibr" rid="B103">Schwarz et al., 2010</xref>)) have resulted in efficiency gains that enable it to be applied to whole-genome datasets. However, studies have indicated that the effectiveness of random forest to detect feature interactions declines as the number of features increases, thus limiting the useful application of random forest approaches to highly dimensional datasets (<xref ref-type="bibr" rid="B75">Lunetta et al., 2004</xref>; <xref ref-type="bibr" rid="B122">Winham et al., 2012</xref>). Furthermore, the ability of standard random forest to detect feature interactions is somewhat dependent on strong individual effects, potentially losing epistatic SNPs with a weak individual effect. Several modified random forest algorithms have been developed to better account for epistatic interactions between SNPs with weak individual effect (e.g., T-tree (<xref ref-type="bibr" rid="B18">Botta et al., 2014</xref>), GWGGI (<xref ref-type="bibr" rid="B120">Wei and Lu, 2014</xref>)). These modified algorithms are still less sensitive than exhaustive search methods (<xref ref-type="sec" rid="s4">Section 4</xref>).</p>
<p>Unlike some multivariate filters (<xref ref-type="sec" rid="s2-1">Section 2.1</xref>), random forest does not automatically eliminate redundant features. Indeed, Mariusz Kubus (<xref ref-type="bibr" rid="B66">Kubus, 2019</xref>) showed that the presence of redundant features decreases the performance of the random forest algorithm. A potential solution to this problem includes filtering out the redundant features before applying random forest [see hybrid method (<xref ref-type="sec" rid="s3-1">Section 3.1</xref>)]. Another possible solution might be aggregating the information carried by these redundant features (e.g., using haplotypes instead of SNPs to build the model). Some software packages like T-tree (<xref ref-type="bibr" rid="B18">Botta et al., 2014</xref>) have a built-in capability to account for redundancy by transforming the input SNPs into groups of SNPs in high-LD with each other.</p>
<p>In contrast to decision tree-based algorithms, penalized methods (e.g., LASSO) can discard redundant features, but it have no built-in ability to detect feature interactions (<xref ref-type="bibr" rid="B8">Barrera-G&#xf3;mez et al., 2017</xref>). Instead, interaction terms must be explicitly included in the analysis (<xref ref-type="bibr" rid="B105">Signorino and Kirchner, 2018</xref>). This is commonly achieved by exhaustively including all (usually pairwise) interaction terms for the features. While this approach can be effective for data with low dimensionality, it can be inaccurate and computationally prohibitive in highly dimensional data settings. Two-stage or hybrid strategies that result in reduced search spaces are potential solutions to this problem (<xref ref-type="sec" rid="s3-1">Section 3.1</xref>).</p>
</sec>
<sec id="s2-4">
<title>2.4 Which Feature Selection Method Is Optimal?</title>
<p>The &#x201c;no free lunch&#x201d; theorem states that in searching for a solution, no single algorithm can be specialized to be optimal for all problem settings (<xref ref-type="bibr" rid="B123">Wolpert and Macready, 1997</xref>). This is true for feature selection methods, each of which has its own strengths and weaknesses (<xref ref-type="table" rid="T1">Table 1</xref>), relying on different metrics and underlying assumptions. Several studies have compared the predictive performance of the different feature selection methods (<xref ref-type="bibr" rid="B36">Forman, 2003</xref>; <xref ref-type="bibr" rid="B14">Bol&#xf3;n-Canedo et al., 2013</xref>; <xref ref-type="bibr" rid="B6">Aphinyanaphongs et al., 2014</xref>; <xref ref-type="bibr" rid="B117">Wah et al., 2018</xref>; <xref ref-type="bibr" rid="B17">Bommert et al., 2020</xref>). These comparative studies have resulted in the widely held opinion that there is no such thing as the &#x201c;best method&#x201d; that is fit for all problem settings.</p>
<p>Which feature selection method is best is a problem-specific question that depends on the dataset being analyzed and the specific goals that the researcher aims to accomplish. For example, suppose the aim is to identify which features are relatively the most important (which can be useful to help uncover the biological mechanism behind the disease). In that case, filter methods are better because they produce a ranked list of features and are the most computationally efficient. If the dataset contains a relatively low number of features (e.g., tens to hundreds), applying wrapper methods likely results in the best predictive performance. Indeed, in this case, model selection algorithms can be applied to identify which wrapper algorithm is the best. By contrast, for the typical SNP genotype dataset with up to a million features, computational limitations mean that directly applying wrapper or embedded methods might not be computationally practical even though they model feature dependencies and tend to produce better classifier accuracy than filter methods.</p>
<p>New feature selection strategies are emerging that either: 1), use a two-step strategy with a combination of different feature selection methods (hybrid methods); or 2), combine the output of multiple feature selection methods (ensemble methods). These strategies take advantage of the strengths of the different feature selection methods that they include.</p>
</sec>
</sec>
<sec id="s3">
<title>3 Hybrid Methods&#x2014;Combining Different Feature Selection Approaches</title>
<p>Hybrid methods combine different feature selection methods in a multi-step process to take advantage of the strengths of the component methods (<xref ref-type="fig" rid="F5">Figure 5D</xref>). For example, univariate filter-wrapper hybrid methods incorporate a univariate filter method as the first step to reduce the initial feature set size, thus limiting the search space and computational load for the subsequent wrapper step. In this instance, the filter method is used because of its simplicity and speed. By contrast, the wrapper method is used because it can model feature dependencies and allow interactions with the classifier, thus producing better performance. Typically, a relaxed scoring threshold is used for the filtering step because the main goal is to prioritize a subset of SNPs for further selection by the wrapper method. For example, when using the univariate &#x3c7;2 test in the initial feature selection step, instead of the genome-wide significance threshold commonly used in GWAS (<italic>p</italic> &#x3e; 5 &#xd7; 10<sup>&#x2013;8</sup>), one might choose a less stringent threshold (e.g., <italic>p</italic> &#x3e; 5 &#xd7; 10<sup>&#x2013;4</sup>), or adjust by FDR instead. While this might result in more false positives, these can be further eliminated and SNPs with weak individual effects, but strong interacting effects will be able to survive the filtering step and thus can be detected by the wrapper method in the subsequent step. Practically, any filter, wrapper, or embedded method can be combined to create a hybrid method.</p>
<p>In a hybrid method, implementing the filter step reduces the feature search space thus allowing for the subsequent use of computationally expensive wrapper or embedded methods for high-dimensional datasets (which might otherwise be computationally unfeasible). For example, Yoshida and Koike (<xref ref-type="bibr" rid="B130">Yoshida and Koike, 2011</xref>) presented a novel embedded method to detect interacting SNPs associated with rheumatoid arthritis called SNPInterForest (a modification of random forest algorithm). To accommodate the computational load of the proposed algorithm, the authors first narrowed the feature size from 500,000 SNPs to 10,000 SNPs using a univariate filter before further selection using SNPInterForest.</p>
<p>
<xref ref-type="bibr" rid="B121">Wei et al. (2013)</xref> built a Crohn&#x2019;s disease prediction model that employed a single SNP association test (a univariate filter method), followed by logistic regression with L1 (LASSO) regularization (an embedded method). The first filtering step reduced the original feature size from 178,822 SNPs to 10,000 SNPs for further selection with LASSO. The final predictive model achieved a respectable AUC of 0.86 in the testing set.</p>
<p>There is always a trade-off between computational complexity and performance in feature selection. In this context, hybrid methods can be considered a &#x201c;middle ground&#x201d; solution between the simple filter method and the more computationally complex but performant wrapper and embedded methods. Indeed, many examples in the literature have shown that a hybrid method tends to produce better performance than a simple filter while also being less computationally expensive than a purely wrapper method. For example, <xref ref-type="bibr" rid="B5">Alzubi et al. (2017)</xref> proposed a feature selection strategy using a hybrid of the CMIM filter and RFE-SVM wrapper method to classify healthy and diseased patients. They used SNP datasets for five conditions (thyroid cancer, autism, colorectal cancer, intellectual disability, and breast cancer). The authors showed that generally, the SNPs selected by the hybrid CMIM &#x2b; RFE-SVM produce better classification performance than using any single filter method like mRMR (<xref ref-type="bibr" rid="B94">Peng et al., 2005</xref>), CMIM (<xref ref-type="bibr" rid="B101">Schlittgen, 2011</xref>), FCBF (<xref ref-type="bibr" rid="B131">Yu and Liu, 2004</xref>), and ReliefF (<xref ref-type="bibr" rid="B113">Urbanowicz et al., 2018b</xref>), thus showing the superiority of the hybrid method.</p>
<p>
<xref ref-type="bibr" rid="B38">Ghosh et al. (2020)</xref> demonstrated that a hybrid filter-wrapper feature selection technique, based on ant colony optimization, performs better than those based solely on filter techniques. The proposed hybrid method was less computationally complex than those based on the wrapper technique while preserving its relatively higher accuracy than the filter technique. Similarly, <xref ref-type="bibr" rid="B23">Butler-Yeoman et al. (2015)</xref> proposed a novel filter-wrapper hybrid feature selection algorithm that was based on particle swarm optimisation (FastPSO and RapidPSO). The authors further showed that the proposed hybrid method performs better than a pure filter algorithm (FilterPSO), while being less computationally complex than a pure wrapper algorithm (WrapperPSO).</p>
<p>Hybrid methods still have limitations despite their advantages when compared to purely filter, embedded, and wrapper methods. For example, relevant interacting SNPs with no significant individual effects (i.e., exclusively epistatic) can potentially be lost during the filtering step. This is because most filter methods cannot model feature-feature interactions. This can be mitigated by using filter algorithms that can model feature interactions (<xref ref-type="sec" rid="s2-1">Section 2.1</xref>).</p>
<sec id="s3-1">
<title>3.1 Integrative Method&#x2014;Incorporating External Knowledge to Limit Feature Search Space</title>
<p>Integrative methods incorporate biological knowledge as an <italic>a priori</italic> filter for SNP pre-selection (<xref ref-type="fig" rid="F5">Figure 5E</xref>). This enables the researcher to narrow the search space to &#x201c;interesting&#x201d; SNPs that are recognized as being relevant to the phenotype of interest. Limiting the search space means limiting the computational complexity for downstream analysis.</p>
<p>To integrate external knowledge, one can obtain information from public protein-protein interaction databases (e.g., IntAct, ChEMBLOR, BioGRID) or pathway databases (KEGG, Reactome). Software (e.g., INTERSNP (<xref ref-type="bibr" rid="B51">Herold et al., 2009</xref>)) has also been developed to help select a combination of &#x201c;interesting&#x201d; SNPs based on <italic>a priori</italic> knowledge (e.g., genomic location, pathway information, and statistical evidence). This information enables a reduction in the search space to only those SNPs that are mapped to genes that researchers contend are involved in relevant protein interactions or pathways of interest. For example, <xref ref-type="bibr" rid="B76">Ma et al. (2015)</xref> successfully identified SNP-SNP interactions that are associated with high-density lipoprotein cholesterol (HDL-C) levels. The search space was reduced by limiting the search to SNPs that have previously been associated with lipid levels, SNPs mapped to genes in known lipid-related pathways and those that are involved in relevant protein-protein interactions. In other examples, the SNP search space has been limited to SNPs that are located within known risk loci. For example, D&#x2019;angelo et al. (<xref ref-type="bibr" rid="B30">D&#x2019;Angelo et al., 2009</xref>) identified significant gene-gene interactions that are associated with rheumatoid arthritis (RA) by restricting their search to chromosome 6 (a known as risk locus for RA (<xref ref-type="bibr" rid="B85">Newton et al., 2004</xref>)) and using a combined LASSO-PCA approach.</p>
<p>An obvious limitation with these types of integrative approaches is the fact that online databases and our current biological knowledge are incomplete. Therefore, relying on external <italic>a priori</italic> knowledge will hinder the identification of novel variants outside our current biological understanding.</p>
</sec>
<sec id="s3-2">
<title>3.2 Ensemble Method&#x2014;Combining the Output of Different Feature Selections</title>
<p>Ensemble feature selection methods are based on the assumption that combining the output of multiple algorithms is better than using the output of a single algorithm (<xref ref-type="fig" rid="F6">Figure 6</xref>) (<xref ref-type="bibr" rid="B16">Bol&#xf3;n-Canedo et al., 2014</xref>). In theory, an ensemble of multiple feature selection methods allows the user to combine the strengths of the different methods while overcoming their weaknesses (<xref ref-type="bibr" rid="B95">Pes, 2020</xref>). This is possible because different feature selection algorithms can retain complementary but different information. Several studies have shown that ensemble feature selection methods tend to produce better classification accuracy than is achieved using single feature selection methods (<xref ref-type="bibr" rid="B104">Seijo-Pardo et al., 2015</xref>; <xref ref-type="bibr" rid="B54">Hoque et al., 2017</xref>; <xref ref-type="bibr" rid="B119">Wang et al., 2019</xref>; <xref ref-type="bibr" rid="B110">Tsai and Sung, 2020</xref>). Furthermore, ensemble feature selection can improve the stability of the selected feature set (i.e., it is more robust to small changes in the input data) (<xref ref-type="bibr" rid="B128">Yang and Mao, 2011</xref>). Stability and reproducibility of results is important because it increase the confidence of users when inferring knowledge from the selected features (<xref ref-type="bibr" rid="B100">Saeys et al., 2008</xref>).</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>
<bold>(A)</bold> Generalized illustration of ensemble methods. In ensemble methods, the outputs of several feature selection methods are aggregated to obtain the final selected features. FS &#x3d; feature selection. <bold>(B)</bold> Generalized illustration of majority voting system where the different generated feature subsets are used to train and test a specific classifier. The final output is the class predicted by the majority of the classifiers.</p>
</caption>
<graphic xlink:href="fbinf-02-927312-g006.tif"/>
</fig>
<p>When designing an ensemble approach, the first thing to consider is the choice of individual feature selection algorithms to be included. Using more than one feature selection method will increase the computation time, therefore filter and (to a lesser extent) embedded methods are usually preferred. By contrast, wrappers are generally avoided. Researchers must also make sure that the included algorithms will output diverse feature sets because there is no point in building an ensemble of algorithms that all produce the same results. Several metrics can be used to measure diversity (e.g. pair-wise Q statistics (<xref ref-type="bibr" rid="B67">Kuncheva et al., 2002</xref>)).</p>
<p>It is also important to consider how to combine the partial outputs generated by each algorithm into one final output; this is known as the aggregation method. Several aggregation methods have been proposed, the simplest works by taking the union or intersection of the top-ranked outputs of the different algorithms. While taking the intersection seems logical (i.e., if all algorithms select a feature, it might be highly relevant), this approach results in a restrictive set of features and tends to produce worse results than selecting the union (<xref ref-type="bibr" rid="B4">&#xc1;lvarez-Est&#xe9;vez et al., 2011</xref>). To overcome this, other popular aggregation methods assign each feature the mean or median position it has achieved among the outputs of all algorithms and use these positions to produce a final ranked feature subset. The final fusion rank of each feature can also be calculated as a weighted sum of the ranks assigned by the individual algorithms, where the weight of each algorithm is determined based on metrics such as the classification performance of the algorithm (<xref ref-type="bibr" rid="B71">Long et al., 2001</xref>). Alternatively, majority voting systems (<xref ref-type="bibr" rid="B15">Bol&#xf3;n-Canedo et al., 2012</xref>) (<xref ref-type="fig" rid="F6">Figure 6B</xref>) can be used to determine the final class prediction. In majority voting systems, the different feature subsets generated by each algorithm are used to train and test a specific classifier. The final predicted output is the class that is predicted by the majority of the classifiers (see (<xref ref-type="bibr" rid="B43">Guan et al., 2014</xref>; <xref ref-type="bibr" rid="B13">Bol&#xf3;n-Canedo and Alonso-Betanzos, 2019</xref>) for reviews about ensemble methods).</p>
<p>
<xref ref-type="bibr" rid="B115">Verma et al. (2018)</xref> proposed the use of a collective feature selection approach that combined the union of the top-ranked outputs of several feature selection methods (MDR, random forest, MultiSURFNTuRF). They applied this approach to identify SNPs associated with body mass index (BMI) and showed that the ensemble approach could detect epistatic effects that were otherwise missed using any single individual feature selection method.</p>
<p>
<xref ref-type="bibr" rid="B15">Bol&#xf3;n-Canedo et al. (2012)</xref> applied an ensemble of five filter methods (CFS, Consistency-based, INTERACT, Information Gain and ReliefF) to ten high dimensional microarray datasets. The authors demonstrated that the ensemble of five filter methods achieved the lowest average error for every classifier tested (C4.5, IB1, and na&#xef;ve Bayes) across all datasets, confirming the advantage of using the ensemble method over individual filters.</p>
</sec>
</sec>
<sec id="s4">
<title>4 Exhaustive Searches for Higher-Order SNP-SNP Interactions</title>
<p>There are instances where scientists are mainly interested in inference, not prediction (e.g., the research interest lies in interpreting the biology of the selected SNPs). Recently, researchers within the GWAS field have recognized the importance of identifying significant SNP-SNP interactions, especially for complex diseases. The wrapper and embedded methods (e.g., decision tree-based algorithms) that can detect feature interactions (see <xref ref-type="sec" rid="s2-2">Section 2.2</xref>&#x2013;<xref ref-type="sec" rid="s2-3">2.3</xref>) have some limitations: 1). Despite modifications that enable epistasis detection (<xref ref-type="sec" rid="s2-3">Section 2.3</xref>), random forest-based algorithms are not exhaustive and are still prone to miss epistatic SNPs with low individual effects; 2) wrapper methods return a subset of features but do not identify which are relatively more important than others.</p>
<p>In theory, the most reliable (albeit na&#xef;ve) way to detect relevant SNP-SNP interactions is by exhaustively testing each possible SNP combination and how it might relate to the phenotype class. Indeed, several exhaustive filter methods have been proposed (see (<xref ref-type="bibr" rid="B26">Cordell, 2009</xref>; <xref ref-type="bibr" rid="B86">Niel et al., 2015</xref>)). Some examples include, BOolean Operation-based Screening and Testing&#x201d; (BOOST), FastEpistasis (<xref ref-type="bibr" rid="B102">Sch&#xfc;pbach et al., 2010</xref>), and Tree-based Epistasis Association Mapping (TEAM) (<xref ref-type="bibr" rid="B132">Zhang et al., 2010</xref>). However, these methods are restricted to testing and identifying pair-wise SNP interactions. Therefore, any epistatic effects of &#x2265;3 orders will be missed. This contrasts with random forest (and many of its modifications), which despite its lower sensitivity (compared to exhaustive filters), can identify higher order interactions.</p>
<p>For higher-order interactions, exhaustive filter methods have been developed (e.g., Multifactor Dimensionality Reduction (MDR) (<xref ref-type="bibr" rid="B97">Ritchie et al., 2001</xref>) or the Combinatorial Partitioning Method (CPM) (<xref ref-type="bibr" rid="B84">Nelson et al., 2001</xref>)) and shown to be able to detect SNP-SNP interactions across &#x2265;3 orders. However, due to the computational complexity of these analyses, these methods are effectively constrained to a maximum of several hundred features and they cannot be applied to genome-wide datasets (<xref ref-type="bibr" rid="B73">Lou et al., 2007</xref>). Goudey et al. (<xref ref-type="bibr" rid="B39">Goudey et al., 2015</xref>) estimated that evaluating all three-way interactions in a GWAS dataset of 1.1 Million SNPs could take up to 5&#xa0;years even on a parallelized computing server with approximately 262,000 cores.</p>
<p>The application of exhaustive methods to genome-wide data can be achieved using an extended hybrid approach (i.e., applying a filter method as a first step, followed by an exhaustive search), or an integrative approach (incorporating external knowledge) that reduces the search space for the exhaustive methods (<xref ref-type="bibr" rid="B93">Pattin and Moore, 2008</xref>). For example, Greene et al. (<xref ref-type="bibr" rid="B41">Greene et al., 2009</xref>) recommended the use of SURF (a Relief-based filter algorithm) as a filter before using MDR to exhaustively search for relevant SNP interactions. <xref ref-type="bibr" rid="B25">Collins et al. (2013)</xref> used MDR to identify significant three-way SNP interactions that are associated with tuberculosis from a dataset of 19 SNPs mapped to candidate tuberculosis genes. Similarly, algorithms that incorporate two-stage strategies to detect high-order interactions have been developed (e.g., dynamic clustering for high-order genome-wide epistatic interactions detecting (DCHE) (<xref ref-type="bibr" rid="B44">Guo et al., 2014</xref>) and the epistasis detector based on the clustering of relatively frequent items (EDCF) (<xref ref-type="bibr" rid="B125">Xie et al., 2012</xref>)). DCHE and EDCF work by first identifying significant pair-wise interactions and using them as candidates to search for high-order interactions. More recently, swarm intelligence search algorithms have been proposed as an alternative way to look for candidate higher-order feature interactions, prior to application of an exhaustive search strategy. For example, <xref ref-type="bibr" rid="B111">Tuo et al. (2020)</xref> proposed the use of multipopulation harmony search algorithm to identify candidate <italic>k</italic>-order SNP interactions to reduce computation load before applying MDR to verify the interactions. Notably, the multi-stage algorithm (MP-HS-DHSI) that Tuo et al. developed is scalable to high-dimensional datasets (&#x3e;100,000 SNPs), much less computationally demanding than purely exhaustive searches, and is sensitive enough to detect interactions where the individual SNPs have no individual effects (<xref ref-type="bibr" rid="B111">Tuo et al., 2020</xref>).</p>
<p>Despite being time demanding, the exhaustive search for pair-wise SNP interaction is possible (<xref ref-type="bibr" rid="B81">Marchini et al., 2005</xref>). However, exhaustive searches for higher-order interactions are not yet available. Researchers must resort to hybrid, integrative, or two-stage approaches to reduce the feature space prior to exhaustive search (<xref ref-type="table" rid="T2">Table 2</xref>). Several (non-exhaustive) embedded methods (e.g., approaches based on decision tree algorithms) have been proposed as viable options to identify SNP interactions and increase the best predictive power of the resulting information. However, the need for an efficient and scalable algorithm to detect SNP-SNP interactions remains, especially for higher-order interactions.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Summary of algorithms reviewed to detect epistasis along with datasets applications, computational time, and memory requirements. Data are taken from three comparative studies, each of which are colour coded differently. N/A, not available.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Method</th>
<th align="center">Algorithm/software</th>
<th align="center">Exhaustive search ?</th>
<th align="center">Detects Higher-order Interaction ?</th>
<th align="center">Dataset</th>
<th align="center">No. SNPs</th>
<th align="center">Time</th>
<th align="center">Mem</th>
<th align="center">References</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="left">Filter (multivariate)</td>
<td align="center">BOOST</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td rowspan="3" align="center">Colorectal cancer SNPs (CORRECT study)</td>
<td align="center">253,657</td>
<td align="center">5&#xa0;h</td>
<td align="center">N/A</td>
<td rowspan="3" align="center">
<xref ref-type="bibr" rid="B57">Kafaie et al. (2021)</xref>
</td>
</tr>
<tr>
<td align="center">FastEpistasis</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">253,657</td>
<td align="center">98.5&#xa0;h</td>
<td align="center">N/A</td>
</tr>
<tr>
<td align="center">TEAM</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">253,657</td>
<td align="center">271&#xa0;h</td>
<td align="center">N/A</td>
</tr>
<tr>
<td rowspan="2" align="left">Filter (multivariate)</td>
<td align="center">MDR (pair-wise)</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td rowspan="6" align="center">Obesity SNPs (MyCode DiscovEHR study)</td>
<td align="center">100,000</td>
<td align="center">25&#xa0;h</td>
<td align="center">10&#xa0;Gb</td>
<td rowspan="6" align="center">
<xref ref-type="bibr" rid="B115">Verma et al. (2018)</xref>
</td>
</tr>
<tr>
<td align="center">MultiSURF &#x2b; TURF</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">100,000</td>
<td align="center">2.3&#xa0;h</td>
<td align="center">28&#xa0;Gb</td>
</tr>
<tr>
<td rowspan="4" align="left">Embedded (Decision tree-based)</td>
<td rowspan="2" align="center">Random Forest (Ranger R package)</td>
<td rowspan="2" align="center">No</td>
<td rowspan="2" align="center">Yes</td>
<td align="center">100,000</td>
<td align="center">Not feasible</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">500</td>
<td align="center">11.4&#xa0;min</td>
<td align="center">8&#xa0;Gb</td>
</tr>
<tr>
<td rowspan="2" align="center">Gradient Boosting</td>
<td rowspan="2" align="center">No</td>
<td rowspan="2" align="center">Yes</td>
<td align="center">100,000</td>
<td align="center">Not feasible</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">500</td>
<td align="center">7.8&#xa0;min</td>
<td align="center">8&#xa0;Gb</td>
</tr>
<tr>
<td rowspan="3" align="left">Filter (multivariate)</td>
<td rowspan="2" align="center">MDR (up to 5 order interactions)</td>
<td rowspan="2" align="center">Yes</td>
<td rowspan="2" align="center">Yes</td>
<td rowspan="3" align="center">WTCCC&#x2014;T1D</td>
<td align="center">2,184</td>
<td align="center">Not feasible</td>
<td align="center">&#x2014;</td>
<td rowspan="8" align="center">
<xref ref-type="bibr" rid="B120">Wei and Lu (2014)</xref>
</td>
</tr>
<tr>
<td align="center">20</td>
<td align="center">2&#xa0;min</td>
<td align="center">56&#xa0;Mb</td>
</tr>
<tr>
<td align="center">BOOST</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="center">2,184</td>
<td align="center">14&#xa0;s</td>
<td align="center">5&#xa0;Mb</td>
</tr>
<tr>
<td rowspan="5" align="left">Embedded (Decision tree-based)</td>
<td align="center">Random Jungle</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">WTCCC&#x2014;T1D</td>
<td align="center">2,184</td>
<td align="center">12&#xa0;min</td>
<td align="center">110&#xa0;Mb</td>
</tr>
<tr>
<td rowspan="2" align="center">GWGGI-TAMW</td>
<td rowspan="2" align="center">No</td>
<td rowspan="2" align="center">Yes</td>
<td align="center">WTCCC&#x2014;T1D</td>
<td align="center">2,184</td>
<td align="center">3&#xa0;min</td>
<td align="center">7&#xa0;Mb</td>
</tr>
<tr>
<td align="center">WTCCC&#x2014;CAD</td>
<td align="center">459,000</td>
<td align="center">10&#xa0;h</td>
<td align="center">738&#xa0;Mb</td>
</tr>
<tr>
<td rowspan="2" align="center">GWGGI-LRMW</td>
<td rowspan="2" align="center">No</td>
<td rowspan="2" align="center">Yes</td>
<td align="center">WTCCC&#x2014;T1D</td>
<td align="center">2,184</td>
<td align="center">1.5&#xa0;min</td>
<td align="center">7&#xa0;Mb</td>
</tr>
<tr>
<td align="center">WTCCC &#x2013;CAD</td>
<td align="center">459,000</td>
<td align="center">3.5&#xa0;h</td>
<td align="center">731&#xa0;Mb</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s5">
<title>5 Conclusion</title>
<p>Supervised ML algorithms can be applied to genome-wide SNP datasets. However, this is often not ideal because the curse of dimensionality leads to long training times and production of an overfitted predictive model. Therefore, the reduction of the total feature numbers to a more manageable level by selection of the most informative SNPs is essential before training the model.</p>
<p>Currently, no single feature selection method stands above the rest. Each method has its strengths and weaknesses (<xref ref-type="table" rid="T1">Table 1</xref>, <xref ref-type="table" rid="T3">Table 3</xref>, discussed in <xref ref-type="sec" rid="s2-4">Section 2.4</xref>). Indeed, it is becoming rarer for researchers to depend on just a single feature selection method. Therefore, we contend that the use of a two-stage approach or hybrid approach should be considered &#x201c;best practice.&#x201d; In a typical hybrid approach, a filter method is used in the first stage to reduce the number of candidate SNPs to a more manageable level, so that more complex and computationally heavy wrapper, embedded, or exhaustive search methods can be applied. Depending on the available resources, the filter used should be multivariate and able to detect feature interactions. Alternatively, biological knowledge can be used as an <italic>a priori</italic> filter for SNP pre-selection. Multiple feature selection methods can also be combined in a parallel scheme (ensemble method). By exploiting strengths of the different methods, ensemble methods allow better accuracy and stability than relying on any single feature selection method.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Advantages, limitations, and references for the feature selection algorithms reviewed in this paper.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Method</th>
<th align="center">Algorithms/softwares</th>
<th align="center">Advantages</th>
<th align="center">Limitaitons</th>
<th align="center">References</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="13" align="left">Filter (multivariate)</td>
<td rowspan="2" align="left">MIFS, mRMR, CMIM, FCBF</td>
<td align="left">- Can remove redundant features</td>
<td align="left">- Ignores feature interaction</td>
<td rowspan="2" align="left">
<xref ref-type="bibr" rid="B9">Battiti, (1994)</xref>, <xref ref-type="bibr" rid="B94">Peng et al. (2005)</xref>, <xref ref-type="bibr" rid="B131">Yu and Liu (2004)</xref>, <xref ref-type="bibr" rid="B101">Schlittgen, (2011)</xref>
</td>
</tr>
<tr>
<td align="left">- Can be used for high-dimensional data</td>
<td align="left">- Not exhaustive</td>
</tr>
<tr>
<td rowspan="2" align="left">FS-RRC, CMIFSI</td>
<td align="left">- Can detect pair-wise feature interaction</td>
<td rowspan="2" align="left">- Not exhaustive</td>
<td rowspan="2" align="left">
<xref ref-type="bibr" rid="B70">Liang et al. (2019)</xref>, <xref ref-type="bibr" rid="B68">Li et al. (2020)</xref>
</td>
</tr>
<tr>
<td align="left">- Can remove redundant features</td>
</tr>
<tr>
<td rowspan="2" align="left">BOOST, FastEpistasis, TEAM</td>
<td align="left">- Performs exhaustive search</td>
<td align="left">- Cannot remove redundant features</td>
<td rowspan="2" align="left">
<xref ref-type="bibr" rid="B102">Sch&#xfc;pbach et al. (2010)</xref>, <xref ref-type="bibr" rid="B118">Wan et al. (2010)</xref>, <xref ref-type="bibr" rid="B132">Zhang et al. (2010)</xref>
</td>
</tr>
<tr>
<td align="left">- Can detect pair-wise feature interaction</td>
<td align="left">- Computationally expensive (relative to non-exhaustive filters)</td>
</tr>
<tr>
<td rowspan="2" align="left">Relief-based Algorithms: Relief, ReliefF, TURF, SURF, SURF&#x2a;, MultiSURF, MultiSURF&#x2a;</td>
<td align="left">- Can detect pair-wise feature interactions</td>
<td align="left">- Not exhaustive</td>
<td rowspan="2" align="left">
<xref ref-type="bibr" rid="B58">Kira and Rendell, (1992)</xref>, <xref ref-type="bibr" rid="B63">Kononenko, (1994)</xref>, <xref ref-type="bibr" rid="B83">Moore and White, (2007)</xref>, (<xref ref-type="bibr" rid="B41">Greene et al., 2009</xref>), <xref ref-type="bibr" rid="B40">Granizo-Mackenzie and Moore, (2013)</xref>, <xref ref-type="bibr" rid="B42">Greene et al. (2010)</xref>, <xref ref-type="bibr" rid="B114">Urbanowicz et al. (2018a)</xref>
</td>
</tr>
<tr>
<td align="left">- Some algorithms (ReliefF, MultiSURF) can detect higher-order interactions</td>
<td align="left">- Cannot remove redundant features</td>
</tr>
<tr>
<td rowspan="2" align="left">MDR, CPM</td>
<td align="left">- Performs exhaustive search</td>
<td rowspan="2" align="left">- Computationally very expensive for higher-order interactions (Cannot be applied to high-dimensional data)</td>
<td rowspan="2" align="left">
<xref ref-type="bibr" rid="B97">Ritchie et al. (2001)</xref>, <xref ref-type="bibr" rid="B84">Nelson et al. (2001)</xref>
</td>
</tr>
<tr>
<td align="left">- Can detect higher-order interactions</td>
</tr>
<tr>
<td rowspan="3" align="left">DCHE, EDCF</td>
<td align="left">- Performs exhaustive search</td>
<td rowspan="3" align="left">-Potentially lose feature interactions that do not have significant pair-wise effect</td>
<td rowspan="3" align="left">
<xref ref-type="bibr" rid="B125">Xie et al. (2012)</xref>, <xref ref-type="bibr" rid="B44">Guo et al. (2014)</xref>
</td>
</tr>
<tr>
<td align="left">- Can detect higher-order interactions</td>
</tr>
<tr>
<td align="left">- Can remove redundant features</td>
</tr>
<tr>
<td rowspan="5" align="left">Embedded</td>
<td rowspan="2" align="left">Random Jungle, GWGGI</td>
<td align="left">- Can detect higher-order interactions</td>
<td align="left">- Not exhaustive</td>
<td rowspan="2" align="left">
<xref ref-type="bibr" rid="B103">Schwarz et al. (2010)</xref>, <xref ref-type="bibr" rid="B120">Wei and Lu, (2014)</xref>
</td>
</tr>
<tr>
<td align="left">- Feature selection and prediction model are made simultaneously</td>
<td align="left">- Cannot remove redundant features</td>
</tr>
<tr>
<td rowspan="3" align="left">T-Tree</td>
<td align="left">- Can detect higher-order interactions</td>
<td rowspan="3" align="left">- Not exhaustive</td>
<td rowspan="3" align="left">
<xref ref-type="bibr" rid="B18">Botta et al. (2014)</xref>
</td>
</tr>
<tr>
<td align="left">- Feature selection and prediction model are made simultaneously</td>
</tr>
<tr>
<td align="left">- Can remove redundant features</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</body>
<back>
<sec id="s6">
<title>Author Contributions</title>
<p>NP conceived and wrote the review. TF, AK, and JOS conceived and commented on the review.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>NP received a University of Auckland PhD Scholarship. TF and JOS were funded by a grant from the Dines Family Foundation.</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abraham</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Inouye</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Genomic Risk Prediction of Complex Human Disease and its Clinical Application</article-title>. <source>Curr. Opin. Genet. Dev.</source> <volume>33</volume>, <fpage>10</fpage>&#x2013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.1016/j.gde.2015.06.005</pub-id> </citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abramovich</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Benjamini</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Donoho</surname>
<given-names>D. L.</given-names>
</name>
<name>
<surname>Johnstone</surname>
<given-names>I. M.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Adapting to Unknown Sparsity by Controlling the False Discovery Rate</article-title>. <source>Ann. Stat.</source> <volume>34</volume>, <fpage>584</fpage>&#x2013;<lpage>653</lpage>. <pub-id pub-id-type="doi">10.1214/009053606000000074</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Altshuler</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Daly</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Lander</surname>
<given-names>E. S.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Genetic Mapping in Human Disease</article-title>. <source>Science</source> <volume>322</volume>, <fpage>881</fpage>&#x2013;<lpage>888</lpage>. <pub-id pub-id-type="doi">10.1126/science.1156409</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>&#xc1;lvarez-Est&#xe9;vez</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>S&#xe1;nchez-Maro&#xf1;o</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Alonso-Betanzos</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Moret-Bonillo</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Reducing Dimensionality in a Database of Sleep EEG Arousals</article-title>. <source>Expert Syst. Appl.</source> <volume>38</volume>, <fpage>7746</fpage>&#x2013;<lpage>7754</lpage>. </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alzubi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Ramzan</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Alzoubi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Amira</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>A Hybrid Feature Selection Method for Complex Diseases SNPs</article-title>. <source>IEEE Access</source> <volume>6</volume>, <fpage>1292</fpage>&#x2013;<lpage>1301</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2017.2778268</pub-id> </citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aphinyanaphongs</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>L. D.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Peskin</surname>
<given-names>E. R.</given-names>
</name>
<name>
<surname>Efstathiadis</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Aliferis</surname>
<given-names>C. F.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>A Comprehensive Empirical Comparison of Modern Supervised Classification and Feature Selection Methods for Text Categorization</article-title>. <source>J. Assn Inf. Sci. Tec.</source> <volume>65</volume>, <fpage>1964</fpage>&#x2013;<lpage>1987</lpage>. <pub-id pub-id-type="doi">10.1002/asi.23110</pub-id> </citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ashley</surname>
<given-names>E. A.</given-names>
</name>
<name>
<surname>Butte</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Wheeler</surname>
<given-names>M. T.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Klein</surname>
<given-names>T. E.</given-names>
</name>
<name>
<surname>Dewey</surname>
<given-names>F. E.</given-names>
</name>
<etal/>
</person-group> (<year>2010</year>). <article-title>Clinical Assessment Incorporating a Personal Genome</article-title>. <source>Lancet</source> <volume>375</volume>, <fpage>1525</fpage>&#x2013;<lpage>1535</lpage>. <pub-id pub-id-type="doi">10.1016/S0140-6736(10)60452-7</pub-id> </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barrera-G&#xf3;mez</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Agier</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Portengen</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Chadeau-Hyam</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Giorgis-Allemand</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Siroux</surname>
<given-names>V.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>A Systematic Comparison of Statistical Methods to Detect Interactions in Exposome-Health Associations</article-title>. <source>Environ. Heal. A Glob. Access Sci. Source</source> <volume>16</volume>, <fpage>74</fpage>. <pub-id pub-id-type="doi">10.1186/s12940-017-0277-6</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Battiti</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>1994</year>). <article-title>Using Mutual Information for Selecting Features in Supervised Neural Net Learning</article-title>. <source>IEEE Trans. Neural Netw.</source> <volume>5</volume>, <fpage>537</fpage>&#x2013;<lpage>550</lpage>. <pub-id pub-id-type="doi">10.1109/72.298224</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Benjamini</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hochberg</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing</article-title>. <source>J. R. Stat. Soc. Ser. B Methodol.</source> <volume>57</volume>, <fpage>289</fpage>&#x2013;<lpage>300</lpage>. <pub-id pub-id-type="doi">10.1111/j.2517-6161.1995.tb02031.x</pub-id> </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Benjamini</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yekutieli</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>The Control of the False Discovery Rate in Multiple Testing under Dependency</article-title>. <source>Ann. Stat.</source> <volume>29</volume>, <fpage>1165</fpage>&#x2013;<lpage>1188</lpage>. <pub-id pub-id-type="doi">10.1214/aos/1013699998</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bins</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Draper</surname>
<given-names>B. A.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Feature Selection from Huge Feature Sets</article-title>. <source>Proc. IEEE Int. Conf. Comput. Vis.</source> <volume>2</volume>, <fpage>159</fpage>&#x2013;<lpage>165</lpage>. <pub-id pub-id-type="doi">10.1109/ICCV.2001.937619</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bol&#xf3;n-Canedo</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Alonso-Betanzos</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Ensembles for Feature Selection: A Review and Future Trends</article-title>. <source>Inf. Fusion</source> <volume>52</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1016/j.inffus.2018.11.008</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bol&#xf3;n-Canedo</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>S&#xe1;nchez-Maro&#xf1;o</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Alonso-Betanzos</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>A Review of Feature Selection Methods on Synthetic Data</article-title>. <source>Knowl. Inf. Syst.</source> <volume>34</volume>, <fpage>483</fpage>&#x2013;<lpage>519</lpage>. </citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bol&#xf3;n-Canedo</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>S&#xe1;nchez-Maro&#xf1;o</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Alonso-Betanzos</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>An Ensemble of Filters and Classifiers for Microarray Data Classification</article-title>. <source>Pattern Recognit.</source> <volume>45</volume>, <fpage>531</fpage>&#x2013;<lpage>539</lpage>. </citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bol&#xf3;n-Canedo</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>S&#xe1;nchez-Maro&#xf1;o</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Alonso-Betanzos</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ben&#xed;tez</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Herrera</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>A Review of Microarray Datasets and Applied Feature Selection Methods</article-title>. <source>Inf. Sci. (Ny)</source> <volume>282</volume>, <fpage>111</fpage>&#x2013;<lpage>135</lpage>. </citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bommert</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Bischl</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Rahnenf&#xfc;hrer</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lang</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Benchmark for Filter Methods for Feature Selection in High-Dimensional Classification Data</article-title>. <source>Comput. Statistics Data Analysis</source> <volume>143</volume>, <fpage>106839</fpage>. <pub-id pub-id-type="doi">10.1016/j.csda.2019.106839</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Botta</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Louppe</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Geurts</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Wehenkel</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Exploiting SNP Correlations within Random Forest for Genome-wide Association Studies</article-title>. <source>PLoS One</source> <volume>9</volume>, <fpage>e93379</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0093379</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Breiman</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Random Forests</article-title>. <source>Mach. Learn.</source> <volume>45</volume>, <fpage>5</fpage>&#x2013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1023/a:1010933404324</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Broekema</surname>
<given-names>R. V.</given-names>
</name>
<name>
<surname>Bakker</surname>
<given-names>O. B.</given-names>
</name>
<name>
<surname>Jonkers</surname>
<given-names>I. H.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A Practical View of Fine-Mapping and Gene Prioritization in the Post-genome-wide Association Era</article-title>. <source>Open Biol.</source> <volume>10</volume>, <fpage>190221</fpage>. <pub-id pub-id-type="doi">10.1098/rsob.190221</pub-id> </citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brzyski</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Peterson</surname>
<given-names>C. B.</given-names>
</name>
<name>
<surname>Sobczyk</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Cand&#xe8;s</surname>
<given-names>E. J.</given-names>
</name>
<name>
<surname>Bogdan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sabatti</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Controlling the Rate of GWAS False Discoveries</article-title>. <source>Genetics</source> <volume>205</volume>, <fpage>61</fpage>&#x2013;<lpage>75</lpage>. <pub-id pub-id-type="doi">10.1534/genetics.116.193987</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bureau</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Dupuis</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Falls</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Lunetta</surname>
<given-names>K. L.</given-names>
</name>
<name>
<surname>Hayward</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Keith</surname>
<given-names>T. P.</given-names>
</name>
<etal/>
</person-group> (<year>2005</year>). <article-title>Identifying SNPs Predictive of Phenotype Using Random Forests</article-title>. <source>Genet. Epidemiol.</source> <volume>28</volume>, <fpage>171</fpage>&#x2013;<lpage>182</lpage>. <pub-id pub-id-type="doi">10.1002/gepi.20041</pub-id> </citation>
</ref>
<ref id="B23">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Butler-Yeoman</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Xue</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Particle Swarm Optimisation for Feature Selection: A Hybrid Filter-Wrapper Approach</article-title>,&#x201d; in <conf-name>2015 IEEE Congress on Evolutionary Computation (CEC)</conf-name>, <conf-loc>Sendai, Japan</conf-loc>, <conf-date>25-28 May 2015</conf-date>, <fpage>2428</fpage>&#x2013;<lpage>2435</lpage>. <pub-id pub-id-type="doi">10.1109/CEC.2015.7257186</pub-id> </citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chandrashekar</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Sahin</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>A Survey on Feature Selection Methods</article-title>. <source>Comput. Electr. Eng.</source> <volume>40</volume>, <fpage>16</fpage>&#x2013;<lpage>28</lpage>. <pub-id pub-id-type="doi">10.1016/j.compeleceng.2013.11.024</pub-id> </citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Collins</surname>
<given-names>R. L.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wejse</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Sirugo</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Williams</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Moore</surname>
<given-names>J. H.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Multifactor Dimensionality Reduction Reveals a Three-Locus Epistatic Interaction Associated with Susceptibility to Pulmonary Tuberculosis</article-title>. <source>BioData Min.</source> <volume>6</volume>, <fpage>4</fpage>&#x2013;<lpage>5</lpage>. <pub-id pub-id-type="doi">10.1186/1756-0381-6-4</pub-id> </citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cordell</surname>
<given-names>H. J.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Detecting Gene-Gene Interactions that Underlie Human Diseases</article-title>. <source>Nat. Rev. Genet.</source> <volume>10</volume>, <fpage>392</fpage>&#x2013;<lpage>404</lpage>. <pub-id pub-id-type="doi">10.1038/nrg2579</pub-id> </citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Couronn&#xe9;</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Probst</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Boulesteix</surname>
<given-names>A.-L.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Random Forest versus Logistic Regression: a Large-Scale Benchmark Experiment</article-title>. <source>BMC Bioinforma.</source> <volume>19</volume>, <fpage>270</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-018-2264-5</pub-id> </citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cueto-L&#xf3;pez</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Garc&#xed;a-Ord&#xe1;s</surname>
<given-names>M. T.</given-names>
</name>
<name>
<surname>D&#xe1;vila-Batista</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Moreno</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Aragon&#xe9;s</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Alaiz-Rodr&#xed;guez</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A Comparative Study on Feature Selection for a Risk Prediction Model for Colorectal Cancer</article-title>. <source>Comput. Methods Programs Biomed.</source> <volume>177</volume>, <fpage>219</fpage>&#x2013;<lpage>229</lpage>. <pub-id pub-id-type="doi">10.1016/j.cmpb.2019.06.001</pub-id> </citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Danasingh</surname>
<given-names>A. A. G. S.</given-names>
</name>
<name>
<surname>Subramanian</surname>
<given-names>A. a. B.</given-names>
</name>
<name>
<surname>Epiphany</surname>
<given-names>J. L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Identifying Redundant Features Using Unsupervised Learning for High-Dimensional Data</article-title>. <source>SN Appl. Sci.</source> <volume>2</volume>, <fpage>1367</fpage>. <pub-id pub-id-type="doi">10.1007/s42452-020-3157-6</pub-id> </citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>D&#x2019;Angelo</surname>
<given-names>G. M.</given-names>
</name>
<name>
<surname>Rao</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Gu</surname>
<given-names>C. C.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Combining Least Absolute Shrinkage and Selection Operator (LASSO) and Principal-Components Analysis for Detection of Gene-Gene Interactions in Genome-wide Association Studies</article-title>. <source>BMC Proc.</source> <volume>3</volume>, <fpage>S62</fpage>. <pub-id pub-id-type="doi">10.1186/1753-6561-3-S7-S62</pub-id> </citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>De</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Bush</surname>
<given-names>W. S.</given-names>
</name>
<name>
<surname>Moore</surname>
<given-names>J. H.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Bioinformatics Challenges in Genome-wide Association Studies (Gwas)</article-title>. <source>Methods Mol. Biol.</source> <volume>1168</volume>, <fpage>63</fpage>&#x2013;<lpage>81</lpage>. <pub-id pub-id-type="doi">10.1007/978-1-4939-0847-9_5</pub-id> </citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Donnelly</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Progress and Challenges in Genome-wide Association Studies in Humans</article-title>. <source>Nature</source> <volume>456</volume>, <fpage>728</fpage>&#x2013;<lpage>731</lpage>. <pub-id pub-id-type="doi">10.1038/nature07631</pub-id> </citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dudbridge</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Gusnanto</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Estimation of Significance Thresholds for Genomewide Association Scans</article-title>. <source>Genet. Epidemiol.</source> <volume>32</volume>, <fpage>227</fpage>&#x2013;<lpage>234</lpage>. <pub-id pub-id-type="doi">10.1002/gepi.20297</pub-id> </citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dunn</surname>
<given-names>O. J.</given-names>
</name>
</person-group> (<year>1961</year>). <article-title>Multiple Comparisons Among Means</article-title>. <source>J. Am. Stat. Assoc.</source> <volume>56</volume>, <fpage>52</fpage>&#x2013;<lpage>64</lpage>. <pub-id pub-id-type="doi">10.1080/01621459.1961.10482090</pub-id> </citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Farcomeni</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>A Review of Modern Multiple Hypothesis Testing, with Particular Attention to the False Discovery Proportion</article-title>. <source>Stat. Methods Med. Res.</source> <volume>17</volume>, <fpage>347</fpage>&#x2013;<lpage>388</lpage>. <pub-id pub-id-type="doi">10.1177/0962280206079046</pub-id> </citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Forman</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>An Extensive Empirical Study of Feature Selection Metrics for Text Classification</article-title>. <source>J. Mach. Learn. Res.</source> <volume>3</volume>, <fpage>1289</fpage>&#x2013;<lpage>1305</lpage>. <pub-id pub-id-type="doi">10.5555/944919.944974</pub-id> </citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Forsati</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Moayedikia</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Jensen</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Shamsfard</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Meybodi</surname>
<given-names>M. R.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Enriched Ant Colony Optimization and its Application in Feature Selection</article-title>. <source>Neurocomputing</source> <volume>142</volume>, <fpage>354</fpage>&#x2013;<lpage>371</lpage>. <pub-id pub-id-type="doi">10.1016/j.neucom.2014.03.053</pub-id> </citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ghosh</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Guha</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Sarkar</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Abraham</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A Wrapper-Filter Feature Selection Technique Based on Ant Colony Optimization</article-title>. <source>Neural Comput. Applic</source> <volume>32</volume>, <fpage>7839</fpage>&#x2013;<lpage>7857</lpage>. <pub-id pub-id-type="doi">10.1007/s00521-019-04171-3</pub-id> </citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Goudey</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Abedini</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hopper</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Inouye</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Makalic</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Schmidt</surname>
<given-names>D. F.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>High Performance Computing Enabling Exhaustive Analysis of Higher Order Single Nucleotide Polymorphism Interaction in Genome Wide Association Studies</article-title>. <source>Health Inf. Sci. Syst.</source> <volume>3</volume>, <fpage>S3</fpage>. <pub-id pub-id-type="doi">10.1186/2047-2501-3-S1-S3</pub-id> </citation>
</ref>
<ref id="B40">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Granizo-Mackenzie</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Moore</surname>
<given-names>J. H.</given-names>
</name>
</person-group> (<year>2013</year>). &#x201c;<article-title>Multiple Threshold Spatially Uniform ReliefF for the Genetic Analysis of Complex Human Diseases</article-title>,&#x201d; in <source>Lecture Notes in Computer Science (Including Subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)</source> (<publisher-loc>Berlin, Heidelberg</publisher-loc>: <publisher-name>Springer</publisher-name>), <volume>7833</volume>, <fpage>1</fpage>&#x2013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-642-37189-9_1</pub-id> </citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Greene</surname>
<given-names>C. S.</given-names>
</name>
<name>
<surname>Penrod</surname>
<given-names>N. M.</given-names>
</name>
<name>
<surname>Kiralis</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Moore</surname>
<given-names>J. H.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Spatially Uniform ReliefF (SURF) for Computationally-Efficient Filtering of Gene-Gene Interactions</article-title>. <source>BioData Min.</source> <volume>2</volume>, <fpage>5</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1186/1756-0381-2-5</pub-id> </citation>
</ref>
<ref id="B42">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Greene</surname>
<given-names>C. S.</given-names>
</name>
<name>
<surname>Himmelstein</surname>
<given-names>D. S.</given-names>
</name>
<name>
<surname>Kiralis</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Moore</surname>
<given-names>J. H.</given-names>
</name>
</person-group> (<year>2010</year>). &#x201c;<article-title>The Informative Extremes: Using Both Nearest and Farthest Individuals Can Improve Relief Algorithms in the Domain of Human Genetics</article-title>,&#x201d; in <source>Lecture Notes in Computer Science (Including Subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)</source> (<publisher-loc>Berlin, Heidelberg</publisher-loc>: <publisher-name>Springer</publisher-name>), <volume>6023</volume>, <fpage>182</fpage>&#x2013;<lpage>193</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-642-12211-8_16</pub-id> </citation>
</ref>
<ref id="B43">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Guan</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>Y. K.</given-names>
</name>
<name>
<surname>Najeebullah</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Rasel</surname>
<given-names>M. K.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>A Review of Ensemble Learning Based Feature Selection</article-title>,&#x201d; in <source>IETE Technical Review</source> (<publisher-loc>India)</publisher-loc>: <publisher-name>Institution of Electronics and Telecommunication Engineers</publisher-name>), <volume>31</volume>, <fpage>190</fpage>&#x2013;<lpage>198</lpage>. <pub-id pub-id-type="doi">10.1080/02564602.2014.906859</pub-id> </citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Cloud Computing for Detecting High-Order Genome-wide Epistatic Interaction via Dynamic Clustering</article-title>. <source>BMC Bioinforma.</source> <volume>15</volume>, <fpage>102</fpage>&#x2013;<lpage>116</lpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-15-102</pub-id> </citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chung</surname>
<given-names>F.-L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Multi-Label Bioinformatics Data Classification with Ensemble Embedded Feature Selection</article-title>. <source>IEEE Access</source> <volume>7</volume>, <fpage>103863</fpage>&#x2013;<lpage>103875</lpage>. <pub-id pub-id-type="doi">10.1109/access.2019.2931035</pub-id> </citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guyon</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Elisseeff</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>An Introduction to Variable and Feature Selection</article-title>. <source>J. Mach. Learn. Res.</source> <volume>3</volume>, <fpage>1157</fpage>&#x2013;<lpage>1182</lpage>. <pub-id pub-id-type="doi">10.5555/944919.944968</pub-id> </citation>
</ref>
<ref id="B47">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Guyon</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Gunn</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Nikravesh</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zadeh</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2008</year>). <source>Feature Extraction: Foundations and Applications</source>, <volume>207</volume>. <publisher-loc>Berlin</publisher-loc>: <publisher-name>Springer</publisher-name>. </citation>
</ref>
<ref id="B48">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Hall</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2000</year>). &#x201c;<article-title>Correlation-based Feature Selection of Discrete and Numeric Class Machine Learning</article-title>,&#x201d; in <conf-name>Proceedings of the Seventeenth International Conference on Machine Learning (ICML 2000)</conf-name>, <conf-loc>Stanford University, Stanford, CA, USA</conf-loc>, <conf-date>June 29 - July 2, 2000</conf-date>. </citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Han</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X. W.</given-names>
</name>
<name>
<surname>Talebizadeh</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Genetic Studies of Complex Human Diseases: Characterizing SNP-Disease Associations Using Bayesian Networks</article-title>. <source>BMC Syst. Biol.</source> <volume>6 Suppl 3</volume>, <fpage>S14</fpage>. <pub-id pub-id-type="doi">10.1186/1752-0509-6-S3-S14</pub-id> </citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hayes-Roth</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>1975</year>). <article-title>Review of "Adaptation in Natural and Artificial Systems by John H. Holland", the U. Of Michigan Press, 1975</article-title>. <source>SIGART Bull.</source> <volume>53</volume>, <fpage>15</fpage>. <pub-id pub-id-type="doi">10.1145/1216504.1216510</pub-id> </citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Herold</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Steffens</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Brockschmidt</surname>
<given-names>F. F.</given-names>
</name>
<name>
<surname>Baur</surname>
<given-names>M. P.</given-names>
</name>
<name>
<surname>Becker</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>INTERSNP: Genome-wide Interaction Analysis Guided by A Priori Information</article-title>. <source>Bioinformatics</source> <volume>25</volume>, <fpage>3275</fpage>&#x2013;<lpage>3281</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp596</pub-id> </citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hindorff</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Sethupathy</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Junkins</surname>
<given-names>H. A.</given-names>
</name>
<name>
<surname>Ramos</surname>
<given-names>E. M.</given-names>
</name>
<name>
<surname>Mehta</surname>
<given-names>J. P.</given-names>
</name>
<name>
<surname>Collins</surname>
<given-names>F. S.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>Potential Etiologic and Functional Implications of Genome-wide Association Loci for Human Diseases and Traits</article-title>. <source>Proc. Natl. Acad. Sci. U. S. A.</source> <volume>106</volume>, <fpage>9362</fpage>&#x2013;<lpage>9367</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0903103106</pub-id> </citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ho</surname>
<given-names>D. S. W.</given-names>
</name>
<name>
<surname>Schierding</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wake</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Saffery</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>O&#x27;Sullivan</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Machine Learning SNP Based Prediction for Precision Medicine</article-title>. <source>Front. Genet.</source> <volume>10</volume>, <fpage>267</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2019.00267</pub-id> </citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hoque</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bhattacharyya</surname>
<given-names>D. K.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>EFS-MI: an Ensemble Feature Selection Method for Classification</article-title>. <source>Complex Intell. Syst.</source> <volume>4</volume>, <fpage>105</fpage>&#x2013;<lpage>118</lpage>. <pub-id pub-id-type="doi">10.1007/s40747-017-0060-x</pub-id> </citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Inza</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Larra&#xf1;aga</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Blanco</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Cerrolaza</surname>
<given-names>A. J.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Filter versus Wrapper Gene Selection Approaches in DNA Microarray Domains</article-title>. <source>Artif. Intell. Med.</source> <volume>31</volume>, <fpage>91</fpage>&#x2013;<lpage>103</lpage>. <pub-id pub-id-type="doi">10.1016/j.artmed.2004.01.007</pub-id> </citation>
</ref>
<ref id="B56">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>John</surname>
<given-names>G. H.</given-names>
</name>
<name>
<surname>Kohavi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Pfleger</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>1994</year>). &#x201c;<article-title>Irrelevant Features and the Subset Selection Problem</article-title>,&#x201d; in <source>Machine Learning Proceedings 1994</source> (<publisher-loc>Burlington, MA</publisher-loc>: <publisher-name>Morgan Kaufmann Publishers</publisher-name>), <volume>121&#x2013;129</volume>, <fpage>121</fpage>&#x2013;<lpage>129</lpage>. <pub-id pub-id-type="doi">10.1016/b978-1-55860-335-6.50023-4</pub-id> </citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kafaie</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Statistical Methods with Exhaustive Search in the Identification of Gene-Gene Interactions for Colorectal Cancer</article-title>. <source>Genet. Epidemiol.</source> <volume>45</volume>, <fpage>222</fpage>&#x2013;<lpage>234</lpage>. <pub-id pub-id-type="doi">10.1002/gepi.22372</pub-id> </citation>
</ref>
<ref id="B58">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Kira</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Rendell</surname>
<given-names>L. A.</given-names>
</name>
</person-group> (<year>1992</year>). &#x201c;<article-title>Feature Selection Problem: Traditional Methods and a New Algorithm</article-title>,&#x201d; in <source>Proceedings Tenth National Conference on Artificial Intelligence</source> <volume>2</volume>, <fpage>129</fpage>&#x2013;<lpage>134</lpage>. </citation>
</ref>
<ref id="B59">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Kittler</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>1978</year>). &#x201c;<article-title>Feature Set Search Alborithms</article-title>,&#x201d; in <source>Pattern Recognition and Signal Processing</source>. <publisher-loc>Dordrecht, Netherlands</publisher-loc>: <publisher-name>Springer Dordrecht</publisher-name>, <fpage>41</fpage>&#x2013;<lpage>60</lpage>. <pub-id pub-id-type="doi">10.1007/978-94-009-9941-1_3</pub-id> </citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kohavi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>John</surname>
<given-names>G. H.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>Wrappers for Feature Subset Selection</article-title>. <source>Artif. Intell.</source> <volume>97</volume>, <fpage>273</fpage>&#x2013;<lpage>324</lpage>. <pub-id pub-id-type="doi">10.1016/s0004-3702(97)00043-x</pub-id> </citation>
</ref>
<ref id="B61">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Koller</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Sahami</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>1996</year>). &#x201c;<article-title>Toward Optimal Feature Selection</article-title>,&#x201d; in <source>International Conference on Machine Learning</source>. <publisher-loc>Stanford, CA</publisher-loc>: <publisher-name>Stanford InfoLab</publisher-name>, <fpage>284</fpage>&#x2013;<lpage>292</lpage>. </citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>K&#xf6;nig</surname>
<given-names>I. R.</given-names>
</name>
<name>
<surname>Auerbach</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gola</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Held</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Holzinger</surname>
<given-names>E. R.</given-names>
</name>
<name>
<surname>Legault</surname>
<given-names>M. A.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Machine Learning and Data Mining in Complex Genomic Data-Aa Review on the Lessons Learned in Genetic Analysis Workshop 19</article-title>. <source>BMC Genet.</source> <volume>17</volume>, <fpage>1</fpage>. <comment>BioMed Central</comment>. <pub-id pub-id-type="doi">10.1186/s12863-015-0315-8</pub-id> </citation>
</ref>
<ref id="B63">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Kononenko</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>1994</year>). &#x201c;<article-title>Estimating Attributes: Analysis and Extensions of RELIEF</article-title>,&#x201d; in <source>Lecture Notes in Computer Science (Including Subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)</source> (<publisher-loc>Berlin, Heidelberg</publisher-loc>: <publisher-name>Springer</publisher-name>), <volume>784</volume>, <fpage>171</fpage>&#x2013;<lpage>182</lpage>. <pub-id pub-id-type="doi">10.1007/3-540-57868-4_5710.1007/3-540-57868-4_57</pub-id> </citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kotzyba&#x2010;Hibert</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Kapfer</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Goeldner</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Recent Trends in Photoaffinity Labeling</article-title>. <source>Angewandte Chemie Int. Ed. Engl.</source> <volume>34</volume>, <fpage>1296</fpage>&#x2013;<lpage>1312</lpage>. </citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kruppa</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ziegler</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>K&#xf6;nig</surname>
<given-names>I. R.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Risk Estimation and Risk Prediction Using Machine-Learning Methods</article-title>. <source>Hum. Genet.</source> <volume>131</volume>, <fpage>1639</fpage>&#x2013;<lpage>1654</lpage>. <pub-id pub-id-type="doi">10.1007/s00439-012-1194-y</pub-id> </citation>
</ref>
<ref id="B66">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kubus</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>The Problem of Redundant Variables in Random Forests</article-title>. <source>Folia Oeconomica</source> <volume>6</volume>, <fpage>7</fpage>&#x2013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.18778/0208-6018.339.01</pub-id> </citation>
</ref>
<ref id="B67">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kuncheva</surname>
<given-names>L. I.</given-names>
</name>
<name>
<surname>Skurichina</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Duin</surname>
<given-names>R. P. W.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>An Experimental Study on Diversity for Bagging and Boosting with Linear Classifiers</article-title>. <source>Inf. Fusion</source> <volume>3</volume>, <fpage>245</fpage>&#x2013;<lpage>258</lpage>. <pub-id pub-id-type="doi">10.1016/s1566-2535(02)00093-3</pub-id> </citation>
</ref>
<ref id="B68">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Qi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A New Feature Selection Algorithm Based on Relevance, Redundancy and Complementarity</article-title>. <source>Comput. Biol. Med.</source> <volume>119</volume>, <fpage>103667</fpage>. <comment>Elsevier</comment>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2020.103667</pub-id> </citation>
</ref>
<ref id="B69">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Umbach</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>Terry</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Taylor</surname>
<given-names>J. A.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Application of the GA/KNN Method to SELDI Proteomics Data</article-title>. <source>Bioinformatics</source> <volume>20</volume>, <fpage>1638</fpage>&#x2013;<lpage>1640</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bth098</pub-id> </citation>
</ref>
<ref id="B70">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Luan</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Feature Selection with Conditional Mutual Information Considering Feature Interaction</article-title>. <source>Symmetry</source> <volume>11</volume>, <fpage>858</fpage>. <pub-id pub-id-type="doi">10.3390/sym11070858</pub-id> </citation>
</ref>
<ref id="B71">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Long</surname>
<given-names>A. D.</given-names>
</name>
<name>
<surname>Mangalam</surname>
<given-names>H. J.</given-names>
</name>
<name>
<surname>Chan</surname>
<given-names>B. Y.</given-names>
</name>
<name>
<surname>Tolleri</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Hatfield</surname>
<given-names>G. W.</given-names>
</name>
<name>
<surname>Baldi</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Improved Statistical Inference from DNA Microarray Data Using Analysis of Variance and A Bayesian Statistical Framework. Analysis of Global Gene Expression in <italic>Escherichia coli</italic> K12</article-title>. <source>J. Biol. Chem.</source> <volume>276</volume>, <fpage>19937</fpage>&#x2013;<lpage>19944</lpage>. <pub-id pub-id-type="doi">10.1074/jbc.M010192200</pub-id> </citation>
</ref>
<ref id="B72">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>L&#xf3;pez</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Torrent-Fontbona</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Vi&#xf1;as</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Fern&#xe1;ndez-Real</surname>
<given-names>J. M.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Single Nucleotide Polymorphism Relevance Learning with Random Forests for Type 2 Diabetes Risk Prediction</article-title>. <source>Artif. Intell. Med.</source> <volume>85</volume>, <fpage>43</fpage>&#x2013;<lpage>49</lpage>. <pub-id pub-id-type="doi">10.1016/j.artmed.2017.09.005</pub-id> </citation>
</ref>
<ref id="B73">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lou</surname>
<given-names>X. Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>G. B.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>J. Z.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Elston</surname>
<given-names>R. C.</given-names>
</name>
<etal/>
</person-group> (<year>2007</year>). <article-title>A Generalized Combinatorial Approach for Detecting Gene-By-Gene and Gene-By-Environment Interactions with Application to Nicotine Dependence</article-title>. <source>Am. J. Hum. Genet.</source> <volume>80</volume> (<issue>6</issue>), <fpage>1125</fpage>&#x2013;<lpage>1137</lpage>. <comment>Elsevier</comment>. <pub-id pub-id-type="doi">10.1086/518312</pub-id> </citation>
</ref>
<ref id="B74">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Louppe</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wehenkel</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Sutera</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Geurts</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2013</year>). &#x201c;<article-title>Understanding Variable Importances in Forests of Randomized Trees</article-title>,&#x201d; in <source>Advances in Neural Information Processing Systems</source> <volume>26</volume>. </citation>
</ref>
<ref id="B75">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lunetta</surname>
<given-names>K. L.</given-names>
</name>
<name>
<surname>Hayward</surname>
<given-names>L. B.</given-names>
</name>
<name>
<surname>Segal</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>van Eerdewegh</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Screening Large-Scale Association Study Data: Exploiting Interactions Using Random Forests</article-title>. <source>BMC Genet.</source> <volume>5</volume>, <fpage>32</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2156-5-32</pub-id> </citation>
</ref>
<ref id="B76">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Keinan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Clark</surname>
<given-names>A. G.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Biological Knowledge-Driven Analysis of Epistasis in Human GWAS with Application to Lipid Traits</article-title>. <source>Methods Mol. Biol.</source> <volume>1253</volume>, <fpage>35</fpage>&#x2013;<lpage>45</lpage>. <pub-id pub-id-type="doi">10.1007/978-1-4939-2155-3_3</pub-id> </citation>
</ref>
<ref id="B77">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maher</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Personal Genomes: The Case of the Missing Heritability</article-title>. <source>Nature</source> <volume>456</volume>, <fpage>18</fpage>&#x2013;<lpage>21</lpage>. <pub-id pub-id-type="doi">10.1038/456018a</pub-id> </citation>
</ref>
<ref id="B78">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Makowsky</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Pajewski</surname>
<given-names>N. M.</given-names>
</name>
<name>
<surname>Klimentidis</surname>
<given-names>Y. C.</given-names>
</name>
<name>
<surname>Vazquez</surname>
<given-names>A. I.</given-names>
</name>
<name>
<surname>Duarte</surname>
<given-names>C. W.</given-names>
</name>
<name>
<surname>Allison</surname>
<given-names>D. B.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>Beyond Missing Heritability: Prediction of Complex Traits</article-title>. <source>PLoS Genet.</source> <volume>7</volume>, <fpage>e1002051</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pgen.1002051</pub-id> </citation>
</ref>
<ref id="B79">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Manolio</surname>
<given-names>T. A.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Bringing Genome-wide Association Findings into Clinical Use</article-title>. <source>Nat. Rev. Genet.</source> <volume>14</volume>, <fpage>549</fpage>&#x2013;<lpage>558</lpage>. <pub-id pub-id-type="doi">10.1038/nrg3523</pub-id> </citation>
</ref>
<ref id="B80">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A Wrapper Feature Subset Selection Method Based on Randomized Search and Multilayer Structure</article-title>. <source>Biomed. Res. Int.</source> <volume>2019</volume>, <fpage>9864213</fpage>. <pub-id pub-id-type="doi">10.1155/2019/9864213</pub-id> </citation>
</ref>
<ref id="B81">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marchini</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Donnelly</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Cardon</surname>
<given-names>L. R.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Genome-wide Strategies for Detecting Multiple Loci that Influence Complex Diseases</article-title>. <source>Nat. Genet.</source> <volume>37</volume>, <fpage>413</fpage>&#x2013;<lpage>417</lpage>. <pub-id pub-id-type="doi">10.1038/ng1537</pub-id> </citation>
</ref>
<ref id="B82">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Mohri</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Rostamizadeh</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Talwalkar</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <source>Foundations of Machine Learning</source>. <publisher-loc>Cambridge, MA</publisher-loc>: <publisher-name>MIT Press.</publisher-name> </citation>
</ref>
<ref id="B83">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Moore</surname>
<given-names>J. H.</given-names>
</name>
<name>
<surname>White</surname>
<given-names>B. C.</given-names>
</name>
</person-group> (<year>2007</year>). &#x201c;<article-title>Tuning ReliefF for Genome-wide Genetic Analysis</article-title>,&#x201d; in <source>Lecture Notes in Computer Science (Including Subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)</source> (<publisher-loc>Berlin, Heidelberg</publisher-loc>: <publisher-name>Springer</publisher-name>), <volume>4447</volume>, <fpage>166</fpage>&#x2013;<lpage>175</lpage>. </citation>
</ref>
<ref id="B84">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nelson</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Kardia</surname>
<given-names>S. L.</given-names>
</name>
<name>
<surname>Ferrell</surname>
<given-names>R. E.</given-names>
</name>
<name>
<surname>Sing</surname>
<given-names>C. F.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>A Combinatorial Partitioning Method to Identify Multilocus Genotypic Partitions that Predict Quantitative Trait Variation</article-title>. <source>Genome Res.</source> <volume>11</volume>, <fpage>458</fpage>&#x2013;<lpage>470</lpage>. <pub-id pub-id-type="doi">10.1101/gr.172901</pub-id> </citation>
</ref>
<ref id="B85">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Newton</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Harney</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Wordsworth</surname>
<given-names>B. P.</given-names>
</name>
<name>
<surname>Brown</surname>
<given-names>M. A.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>A Review of the MHC Genetics of Rheumatoid Arthritis</article-title>. <source>Genes. Immun.</source> <volume>5</volume>, <fpage>151</fpage>&#x2013;<lpage>157</lpage>. <pub-id pub-id-type="doi">10.1038/sj.gene.6364045</pub-id> </citation>
</ref>
<ref id="B86">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Niel</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Sinoquet</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Dina</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Rocheleau</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>A Survey about Methods Dedicated to Epistasis Detection</article-title>. <source>Front. Genet.</source> <volume>6</volume>, <fpage>285</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2015.00285</pub-id> </citation>
</ref>
<ref id="B87">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Okser</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pahikkala</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Airola</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Salakoski</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ripatti</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Aittokallio</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Regularized Machine Learning in the Genetic Prediction of Complex Traits</article-title>. <source>PLoS Genet.</source> <volume>10</volume>, <fpage>e1004754</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pgen.1004754</pub-id> </citation>
</ref>
<ref id="B88">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Okser</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pahikkala</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Aittokallio</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Genetic Variants and Their Interactions in Disease Risk Prediction - Machine Learning and Network Perspectives</article-title>. <source>BioData Min.</source> <volume>6</volume>, <fpage>5</fpage>. <pub-id pub-id-type="doi">10.1186/1756-0381-6-5</pub-id> </citation>
</ref>
<ref id="B89">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Onengut-Gumuscu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>W. M.</given-names>
</name>
<name>
<surname>Burren</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Cooper</surname>
<given-names>N. J.</given-names>
</name>
<name>
<surname>Quinlan</surname>
<given-names>A. R.</given-names>
</name>
<name>
<surname>Mychaleckyj</surname>
<given-names>J. C.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Fine Mapping of Type 1 Diabetes Susceptibility Loci and Evidence for Colocalization of Causal Variants with Lymphoid Gene Enhancers</article-title>. <source>Nat. Genet.</source> <volume>47</volume>, <fpage>381</fpage>&#x2013;<lpage>386</lpage>. <pub-id pub-id-type="doi">10.1038/ng.3245</pub-id> </citation>
</ref>
<ref id="B90">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ooka</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Johno</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Nakamoto</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Yoda</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yokomichi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yamagata</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Random Forest Approach for Determining Risk Prediction and Predictive Factors of Type 2 Diabetes: Large-Scale Health Check-Up Data in Japan</article-title>. <source>Bmjnph</source> <volume>4</volume>, <fpage>140</fpage>&#x2013;<lpage>148</lpage>. <pub-id pub-id-type="doi">10.1136/bmjnph-2020-000200</pub-id> </citation>
</ref>
<ref id="B91">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pal</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Foody</surname>
<given-names>G. M.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Feature Selection for Classification of Hyperspectral Data by SVM</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>48</volume>, <fpage>2297</fpage>&#x2013;<lpage>2307</lpage>. <pub-id pub-id-type="doi">10.1109/tgrs.2009.2039484</pub-id> </citation>
</ref>
<ref id="B92">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Panagiotou</surname>
<given-names>O. A.</given-names>
</name>
<name>
<surname>Ioannidis</surname>
<given-names>J. P.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>What Should the Genome-wide Significance Threshold Be? Empirical Replication of Borderline Genetic Associations</article-title>. <source>Int. J. Epidemiol.</source> <volume>41</volume>, <fpage>273</fpage>&#x2013;<lpage>286</lpage>. <pub-id pub-id-type="doi">10.1093/ije/dyr178</pub-id> </citation>
</ref>
<ref id="B93">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pattin</surname>
<given-names>K. A.</given-names>
</name>
<name>
<surname>Moore</surname>
<given-names>J. H.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Exploiting the Proteome to Improve the Genome-wide Genetic Analysis of Epistasis in Common Human Diseases</article-title>. <source>Hum. Genet.</source> <volume>124</volume>, <fpage>19</fpage>&#x2013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.1007/s00439-008-0522-8</pub-id> </citation>
</ref>
<ref id="B94">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peng</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Long</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Feature Selection Based on Mutual Information: Criteria of Max-Dependency, Max-Relevance, and Min-Redundancy</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell.</source> <volume>27</volume>, <fpage>1226</fpage>&#x2013;<lpage>1238</lpage>. <pub-id pub-id-type="doi">10.1109/TPAMI.2005.159</pub-id> </citation>
</ref>
<ref id="B95">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pes</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Ensemble Feature Selection for High-Dimensional Data: a Stability Analysis across Multiple Domains</article-title>. <source>Neural Comput. Applic</source> <volume>32</volume>, <fpage>5951</fpage>&#x2013;<lpage>5973</lpage>. <pub-id pub-id-type="doi">10.1007/s00521-019-04082-3</pub-id> </citation>
</ref>
<ref id="B96">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Remeseiro</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Bolon-Canedo</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A Review of Feature Selection Methods in Medical Applications</article-title>. <source>Comput. Biol. Med.</source> <volume>112</volume>, <fpage>103375</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2019.103375</pub-id> </citation>
</ref>
<ref id="B97">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ritchie</surname>
<given-names>M. D.</given-names>
</name>
<name>
<surname>Hahn</surname>
<given-names>L. W.</given-names>
</name>
<name>
<surname>Roodi</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Bailey</surname>
<given-names>L. R.</given-names>
</name>
<name>
<surname>Dupont</surname>
<given-names>W. D.</given-names>
</name>
<name>
<surname>Parl</surname>
<given-names>F. F.</given-names>
</name>
<etal/>
</person-group> (<year>2001</year>). <article-title>Multifactor-dimensionality Reduction Reveals High-Order Interactions Among Estrogen-Metabolism Genes in Sporadic Breast Cancer</article-title>. <source>Am. J. Hum. Genet.</source> <volume>69</volume>, <fpage>138</fpage>&#x2013;<lpage>147</lpage>. <pub-id pub-id-type="doi">10.1086/321276</pub-id> </citation>
</ref>
<ref id="B98">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Romagnoni</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>J&#xe9;gou</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Van Steen</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Wainrib</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Hugot</surname>
<given-names>J. P.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Comparative Performances of Machine Learning Methods for Classifying Crohn Disease Patients Using Genome-wide Genotyping Data</article-title>. <source>Sci. Rep.</source> <volume>9</volume>, <fpage>10351</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-019-46649-z</pub-id> </citation>
</ref>
<ref id="B99">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Saeys</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Inza</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Larra&#xf1;aga</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>A Review of Feature Selection Techniques in Bioinformatics</article-title>. <source>Bioinformatics</source> <volume>23</volume>, <fpage>2507</fpage>&#x2013;<lpage>2517</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btm344</pub-id> </citation>
</ref>
<ref id="B100">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Saeys</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Abeel</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Van De Peer</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2008</year>). &#x201c;<article-title>Robust Feature Selection Using Ensemble Feature Selection Techniques</article-title>,&#x201d; in <source>Lecture Notes in Computer Science (Including Subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)</source> (<publisher-loc>Berlin, Heidelberg</publisher-loc>: <publisher-name>Springer</publisher-name>), <volume>5212</volume>, <fpage>313</fpage>&#x2013;<lpage>325</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-540-87481-2_21</pub-id> </citation>
</ref>
<ref id="B101">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schlittgen</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>A Weighted Least-Squares Approach to Clusterwise Regression</article-title>. <source>AStA Adv. Stat. Anal.</source> <volume>95</volume>, <fpage>205</fpage>&#x2013;<lpage>217</lpage>. <pub-id pub-id-type="doi">10.1007/s10182-011-0155-4</pub-id> </citation>
</ref>
<ref id="B102">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sch&#xfc;pbach</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Xenarios</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Bergmann</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kapur</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>FastEpistasis: a High Performance Computing Solution for Quantitative Trait Epistasis</article-title>. <source>Bioinformatics</source> <volume>26</volume>, <fpage>1468</fpage>&#x2013;<lpage>1469</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btq147</pub-id> </citation>
</ref>
<ref id="B103">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schwarz</surname>
<given-names>D. F.</given-names>
</name>
<name>
<surname>K&#xf6;nig</surname>
<given-names>I. R.</given-names>
</name>
<name>
<surname>Ziegler</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>On Safari to Random Jungle: a Fast Implementation of Random Forests for High-Dimensional Data</article-title>. <source>Bioinformatics</source> <volume>26</volume>, <fpage>1752</fpage>&#x2013;<lpage>1758</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btq257</pub-id> </citation>
</ref>
<ref id="B104">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Seijo-Pardo</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Bol&#xf3;n-Canedo</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Porto-D&#xed;az</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Alonso-Betanzos</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Ensemble Feature Selection for Rankings of Features</article-title>,&#x201d; in <source>Lecture Notes in Computer Science (Including Subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer-Verlag</publisher-name>), <volume>9095</volume>, <fpage>29</fpage>&#x2013;<lpage>42</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-19222-2_3</pub-id> </citation>
</ref>
<ref id="B105">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Signorino</surname>
<given-names>C. S.</given-names>
</name>
<name>
<surname>Kirchner</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Using LASSO to Model Interactions and Nonlinearities in Survey Data</article-title>. <source>Surv. Pract.</source> <volume>11</volume>, <fpage>1</fpage>&#x2013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.29115/sp-2018-0005</pub-id> </citation>
</ref>
<ref id="B106">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Skalak</surname>
<given-names>D. B.</given-names>
</name>
</person-group> (<year>1994</year>). &#x201c;<article-title>Prototype and Feature Selection by Sampling and Random Mutation Hill Climbing Algorithms</article-title>,&#x201d; in <source>Machine Learning Proceedings 1994</source>. <publisher-loc>Burlington, MA</publisher-loc>: <publisher-name>Morgan Kauffmann</publisher-name>, <fpage>293</fpage>&#x2013;<lpage>301</lpage>. <pub-id pub-id-type="doi">10.1016/b978-1-55860-335-6.50043-x</pub-id> </citation>
</ref>
<ref id="B107">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Spain</surname>
<given-names>S. L.</given-names>
</name>
<name>
<surname>Barrett</surname>
<given-names>J. C.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Strategies for Fine-Mapping Complex Traits</article-title>. <source>Hum. Mol. Genet.</source> <volume>24</volume>, <fpage>R111</fpage>&#x2013;<lpage>R119</lpage>. <pub-id pub-id-type="doi">10.1093/hmg/ddv260</pub-id> </citation>
</ref>
<ref id="B108">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Spiegel</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Hawkins</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>&#x27;Personalized Medicine&#x27; to Identify Genetic Risks for Type 2 Diabetes and Focus Prevention: Can it Fulfill its Promise?</article-title> <source>Health Aff. (Millwood)</source> <volume>31</volume>, <fpage>43</fpage>&#x2013;<lpage>49</lpage>. <pub-id pub-id-type="doi">10.1377/hlthaff.2011.1054</pub-id> </citation>
</ref>
<ref id="B109">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Szymczak</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Biernacka</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Cordell</surname>
<given-names>H. J.</given-names>
</name>
<name>
<surname>Gonz&#xe1;lez-Recio</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>K&#xf6;nig</surname>
<given-names>I. R.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>Machine Learning in Genome-wide Association Studies</article-title>. <source>Genet. Epidemiol.</source> <volume>33 Suppl 1</volume>, <fpage>S51</fpage>&#x2013;<lpage>S57</lpage>. <pub-id pub-id-type="doi">10.1002/gepi.20473</pub-id> </citation>
</ref>
<ref id="B110">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tsai</surname>
<given-names>C.-F.</given-names>
</name>
<name>
<surname>Sung</surname>
<given-names>Y.-T.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Ensemble Feature Selection in High Dimension, Low Sample Size Datasets: Parallel and Serial Combination Approaches</article-title>. <source>Knowledge-Based Syst.</source> <volume>203</volume>, <fpage>106097</fpage>. <pub-id pub-id-type="doi">10.1016/j.knosys.2020.106097</pub-id> </citation>
</ref>
<ref id="B111">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tuo</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Multipopulation Harmony Search Algorithm for the Detection of High-Order SNP Interactions</article-title>. <source>Bioinformatics</source> <volume>36</volume>, <fpage>4389</fpage>&#x2013;<lpage>4398</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa215</pub-id> </citation>
</ref>
<ref id="B112">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Uddin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hossain</surname>
<given-names>M. E.</given-names>
</name>
<name>
<surname>Moni</surname>
<given-names>M. A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Comparing Different Supervised Machine Learning Algorithms for Disease Prediction</article-title>. <source>BMC Med. Inf. Decis. Mak.</source> <volume>19</volume>, <fpage>281</fpage>. <pub-id pub-id-type="doi">10.1186/s12911-019-1004-8</pub-id> </citation>
</ref>
<ref id="B113">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Urbanowicz</surname>
<given-names>R. J.</given-names>
</name>
<name>
<surname>Meeker</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>La Cava</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Olson</surname>
<given-names>R. S.</given-names>
</name>
<name>
<surname>Moore</surname>
<given-names>J. H.</given-names>
</name>
</person-group> (<year>2018b</year>). <article-title>Relief-based Feature Selection: Introduction and Review</article-title>. <source>J. Biomed. Inf.</source> <volume>85</volume>, <fpage>189</fpage>&#x2013;<lpage>203</lpage>. <pub-id pub-id-type="doi">10.1016/j.jbi.2018.07.014</pub-id> </citation>
</ref>
<ref id="B114">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Urbanowicz</surname>
<given-names>R. J.</given-names>
</name>
<name>
<surname>Olson</surname>
<given-names>R. S.</given-names>
</name>
<name>
<surname>Schmitt</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Meeker</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Moore</surname>
<given-names>J. H.</given-names>
</name>
</person-group> (<year>2018a</year>). <article-title>Benchmarking Relief-Based Feature Selection Methods for Bioinformatics Data Mining</article-title>. <source>J. Biomed. Inf.</source> <volume>85</volume>, <fpage>168</fpage>&#x2013;<lpage>188</lpage>. <pub-id pub-id-type="doi">10.1016/j.jbi.2018.07.015</pub-id> </citation>
</ref>
<ref id="B115">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Verma</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Lucas</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Veturi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Dudek</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Collective Feature Selection to Identify Crucial Epistatic Variants</article-title>. <source>BioData Min.</source> <volume>11</volume>, <fpage>5</fpage>. <pub-id pub-id-type="doi">10.1186/s13040-018-0168-6</pub-id> </citation>
</ref>
<ref id="B116">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Visscher</surname>
<given-names>P. M.</given-names>
</name>
<name>
<surname>Wray</surname>
<given-names>N. R.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Sklar</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>McCarthy</surname>
<given-names>M. I.</given-names>
</name>
<name>
<surname>Brown</surname>
<given-names>M. A.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>10 Years of GWAS Discovery: Biology, Function, and Translation</article-title>. <source>Am. J. Hum. Genet.</source> <volume>101</volume>, <fpage>5</fpage>&#x2013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2017.06.005</pub-id> </citation>
</ref>
<ref id="B117">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wah</surname>
<given-names>Y. B.</given-names>
</name>
<name>
<surname>Ibrahim</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Hamid</surname>
<given-names>H. A.</given-names>
</name>
<name>
<surname>Abdul-Rahman</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Fong</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Feature Selection Methods: Case of Filter and Wrapper Approaches for Maximising Classification Accuracy</article-title>. <source>Pertanika J. Sci. Technol.</source> <volume>26</volume>, <fpage>329</fpage>&#x2013;<lpage>340</lpage>. </citation>
</ref>
<ref id="B118">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wan</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Xue</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>N. L. S.</given-names>
</name>
<etal/>
</person-group> (<year>2010</year>). <article-title>BOOST: A Fast Approach to Detecting Gene-Gene Interactions in Genome-wide Case-Control Studies</article-title>. <source>Am. J. Hum. Genet.</source> <volume>87</volume> (<issue>3</issue>), <fpage>325</fpage>&#x2013;<lpage>340</lpage>. <comment>Elsevier</comment>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2010.07.021</pub-id> </citation>
</ref>
<ref id="B119">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>An Ensemble Feature Selection Method for High-Dimensional Data Based on Sort Aggregation</article-title>. <source>Syst. Sci. Control Eng.</source> <volume>7</volume>, <fpage>32</fpage>&#x2013;<lpage>39</lpage>. <pub-id pub-id-type="doi">10.1080/21642583.2019.1620658</pub-id> </citation>
</ref>
<ref id="B120">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wei</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>GWGGI: Software for Genome-wide Gene-Gene Interaction Analysis</article-title>. <source>BMC Genet.</source> <volume>15</volume>, <fpage>101</fpage>. <pub-id pub-id-type="doi">10.1186/s12863-014-0101-z</pub-id> </citation>
</ref>
<ref id="B121">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wei</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Bradfield</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cardinale</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Frackelton</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Large Sample Size, Wide Variant Spectrum, and Advanced Machine-Learning Technique Boost Risk Prediction for Inflammatory Bowel Disease</article-title>. <source>Am. J. Hum. Genet.</source> <volume>92</volume>, <fpage>1008</fpage>&#x2013;<lpage>1012</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2013.05.002</pub-id> </citation>
</ref>
<ref id="B122">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Winham</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>Colby</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Freimuth</surname>
<given-names>R. R.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>de Andrade</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Huebner</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>SNP Interaction Detection with Random Forests in High-Dimensional Genetic Data</article-title>. <source>BMC Bioinforma.</source> <volume>13</volume>, <fpage>164</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-13-164</pub-id> </citation>
</ref>
<ref id="B123">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wolpert</surname>
<given-names>D. H.</given-names>
</name>
<name>
<surname>Macready</surname>
<given-names>W. G.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>No Free Lunch Theorems for Optimization</article-title>. <source>IEEE Trans. Evol. Comput.</source> <volume>1</volume>, <fpage>67</fpage>&#x2013;<lpage>82</lpage>. <pub-id pub-id-type="doi">10.1109/4235.585893</pub-id> </citation>
</ref>
<ref id="B124">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wray</surname>
<given-names>N. R.</given-names>
</name>
<name>
<surname>Goddard</surname>
<given-names>M. E.</given-names>
</name>
<name>
<surname>Visscher</surname>
<given-names>P. M.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Prediction of Individual Genetic Risk to Disease from Genome-wide Association Studies</article-title>. <source>Genome Res.</source> <volume>17</volume>, <fpage>1520</fpage>&#x2013;<lpage>1528</lpage>. <pub-id pub-id-type="doi">10.1101/gr.6665407</pub-id> </citation>
</ref>
<ref id="B125">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xie</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Detecting Genome-wide Epistases Based on the Clustering of Relatively Frequent Items</article-title>. <source>Bioinformatics</source> <volume>28</volume>, <fpage>5</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btr603</pub-id> </citation>
</ref>
<ref id="B126">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Fang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Biomarker Identification by Feature Wrappers</article-title>. <source>Genome Res.</source> <volume>11</volume>, <fpage>1878</fpage>&#x2013;<lpage>1887</lpage>. <pub-id pub-id-type="doi">10.1101/gr.190001</pub-id> </citation>
</ref>
<ref id="B127">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Tachmazidou</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Walter</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Ciampi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Zeggini</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Greenwood</surname>
<given-names>C. M. T.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Estimating Genome&#x2010;Wide Significance for Whole&#x2010;Genome Sequencing Studies</article-title>. <source>Genet. Epidemiol.</source> <volume>38</volume>, <fpage>281</fpage>&#x2013;<lpage>290</lpage>. <comment>Wiley Online Libr.</comment> <pub-id pub-id-type="doi">10.1002/gepi.21797</pub-id> </citation>
</ref>
<ref id="B128">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Mao</surname>
<given-names>K. Z.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Robust Feature Selection for Microarray Data Based on Multicriterion Fusion</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinform</source> <volume>8</volume>, <fpage>1080</fpage>&#x2013;<lpage>1092</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2010.103</pub-id> </citation>
</ref>
<ref id="B129">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Honavar</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>1998</year>). <article-title>Feature Subset Selection Using a Genetic Algorithm</article-title>. <source>IEEE Intell. Syst.</source> <volume>13</volume>, <fpage>44</fpage>&#x2013;<lpage>49</lpage>. <pub-id pub-id-type="doi">10.1109/5254.671091</pub-id> </citation>
</ref>
<ref id="B130">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yoshida</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Koike</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>SNPInterForest: a New Method for Detecting Epistatic Interactions</article-title>. <source>BMC Bioinforma.</source> <volume>12</volume>, <fpage>469</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-12-469</pub-id> </citation>
</ref>
<ref id="B131">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Efficient Feature Selection via Analysis of Relevance and Redundancy</article-title>. <source>J. Mach. Learn. Res.</source> <volume>5</volume>, <fpage>1205</fpage>&#x2013;<lpage>1224</lpage>. <pub-id pub-id-type="doi">10.5555/1005332.1044700</pub-id> </citation>
</ref>
<ref id="B132">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>TEAM: Efficient Two-Locus Epistasis Tests in Human Genome-wide Association Study</article-title>. <source>Bioinformatics</source> <volume>26</volume>, <fpage>i217</fpage>&#x2013;<lpage>27</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btq186</pub-id> </citation>
</ref>
<ref id="B133">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Divergence-based Feature Selection for Separate Classes</article-title>. <source>Neurocomputing</source> <volume>101</volume>, <fpage>32</fpage>&#x2013;<lpage>42</lpage>. <pub-id pub-id-type="doi">10.1016/j.neucom.2012.06.036</pub-id> </citation>
</ref>
</ref-list>
</back>
</article>