<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1009428</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2022.1009428</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Simultaneous detection of novel genes and SNPs by adaptive <italic>p</italic>-value combination</article-title>
<alt-title alt-title-type="left-running-head">Chen et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2022.1009428">10.3389/fgene.2022.1009428</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Xiaohui</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1941037/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Hong</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1035250/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Ming</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2092928/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Deng</surname>
<given-names>Hong-Wen</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/23308/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wu</surname>
<given-names>Zheyang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/24577/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Mathematical Sciences</institution>, <institution>Worcester Polytechnic Institute</institution>, <addr-line>Worcester</addr-line>, <addr-line>MA</addr-line>, <country>United States</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Translational Biomarker Statistics</institution>, <institution>Global Biometrics and Data Management</institution>, <institution>Pfizer Inc.</institution>, <addr-line>Cambridge</addr-line>, <addr-line>MA</addr-line>, <country>United States</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Bioinformatics and Computational Biology Program</institution>, <institution>Worcester Polytechnic Institute</institution>, <addr-line>Worcester</addr-line>, <addr-line>MA</addr-line>, <country>United States</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Division of Biomedical Informatics &#x26; Genomics</institution>, <institution>School of Medicine</institution>, <institution>Tulane University</institution>, <addr-line>New Orleans</addr-line>, <addr-line>LA</addr-line>, <country>United States</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1560675/overview">Ryan Sun</ext-link>, University of Texas MD Anderson Cancer Center, United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1554594/overview">Yaowu Liu</ext-link>, Southwestern University of Finance and Economics, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1212064/overview">Zilin Li</ext-link>, Indiana University Bloomington, United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2030128/overview">Il-Youp Kwak</ext-link>, Chung-Ang University, South Korea</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Zheyang Wu, <email>zheyangwu@wpi.edu</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Statistical Genetics and Methodology, a section of the journal Frontiers in Genetics</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>17</day>
<month>11</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>13</volume>
<elocation-id>1009428</elocation-id>
<history>
<date date-type="received">
<day>01</day>
<month>08</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>03</day>
<month>11</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Chen, Zhang, Liu, Deng and Wu.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Chen, Zhang, Liu, Deng and Wu</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Combining SNP <italic>p</italic>-values from GWAS summary data is a promising strategy for detecting novel genetic factors. Existing statistical methods for the <italic>p</italic>-value-based SNP-set testing confront two challenges. First, the statistical power of different methods depends on unknown patterns of genetic effects that could drastically vary over different SNP sets. Second, they do not identify which SNPs primarily contribute to the global association of the whole set. We propose a new signal-adaptive analysis pipeline to address these challenges using the omnibus thresholding Fisher&#x2019;s method (oTFisher). The oTFisher remains robustly powerful over various patterns of genetic effects. Its adaptive thresholding can be applied to estimate important SNPs contributing to the overall significance of the given SNP set. We develop efficient calculation algorithms to control the type I error rate, which accounts for the linkage disequilibrium among SNPs. Extensive simulations show that the oTFisher has robustly high power and provides a higher balanced accuracy in screening SNPs than the traditional Bonferroni and FDR procedures. We applied the oTFisher to study the genetic association of genes and haplotype blocks of the bone density-related traits using the summary data of the Genetic Factors for Osteoporosis Consortium. The oTFisher identified more novel and literature-reported genetic factors than existing <italic>p</italic>-value combination methods. Relevant computation has been implemented into the R package TFisher to support similar data analysis.</p>
</abstract>
<kwd-group>
<kwd>GWAS summary statistics</kwd>
<kwd>SNP-set analysis</kwd>
<kwd>
<italic>p</italic>-value combination</kwd>
<kwd>Fisher&#x2019;s method</kwd>
<kwd>global hypothesis test</kwd>
<kwd>osteoporosis</kwd>
<kwd>bone density</kwd>
<kwd>genetic association</kwd>
</kwd-group>
<contract-num rid="cn001">DMS-2113570</contract-num>
<contract-sponsor id="cn001">National Science Foundation<named-content content-type="fundref-id">10.13039/100000001</named-content>
</contract-sponsor>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>GWAS summary data is an important resource for dissecting the genetics of complex traits. In contrast to the individual-level genotype and phenotype data, summary data allows much broader access because of less privacy risk (<xref ref-type="bibr" rid="B39">NIH, 2018</xref>). The summary statistics are often sufficient for typical genetic association studies with the same efficiency as individual-level data (<xref ref-type="bibr" rid="B28">Lin and Zeng, 2010a</xref>,<xref ref-type="bibr" rid="B29">b</xref>). Furthermore, it is convenient to integrate summary data from different studies, e.g., through meta-analysis, to cumulate information to increase the power of detecting genetic factors. Many summary data analyses have been carried out and resulted in new genetic findings (<xref ref-type="bibr" rid="B12">Evangelou and Ioannidis, 2013</xref>; <xref ref-type="bibr" rid="B41">Pasaniuc and Price, 2017</xref>; <xref ref-type="bibr" rid="B16">Guo and Wu, 2019</xref>).</p>
<p>GWAS summary data is often used to test the association between a trait and sets of SNPs in genes or other genomic segments. Such SNP-set test can reveal weak genetic effects that are unidentifiable by individual SNPs (<xref ref-type="bibr" rid="B19">Hoh et al., 2001</xref>; <xref ref-type="bibr" rid="B55">Xiong et al., 2002</xref>; <xref ref-type="bibr" rid="B52">Wu et al., 2010</xref>; <xref ref-type="bibr" rid="B54">Wu et al., 2014</xref>; <xref ref-type="bibr" rid="B47">Sun et al., 2019</xref>; <xref ref-type="bibr" rid="B48">Sun and Lin, 2019</xref>). Many methods have been developed based on the combination of SNP statistics (e.g., z-scores) or their <italic>p</italic>-values. Combining the <italic>p</italic>-values has multiple advantages. The <italic>p</italic>-values are the direct measure of statistical significance. Combining them does not concern the problem of signal cancellation in adding SNP z-scores of opposite directions (<xref ref-type="bibr" rid="B40">Pan, 2009</xref>). Furthermore, <italic>p</italic>-values are homogeneously Uniform (0, 1) distributed under the null as long as the statistics are continuous. Therefore, <italic>p</italic>-values from statistics of different types or scales can be directly combined.</p>
<p>The SNP-set test is essentially a global hypothesis testing procedure for detecting the existence of &#x201c;signals&#x201d; of genetic effects. Optimal signal-detection tests depend on the signal patterns (<xref ref-type="bibr" rid="B9">Donoho and Jin, 2004</xref>; <xref ref-type="bibr" rid="B59">Zhang et al., 2020a</xref>; <xref ref-type="bibr" rid="B61">Zhang et al., 2020b</xref>; <xref ref-type="bibr" rid="B62">Zhang and Wu, 2022a</xref>). For example, Fisher&#x2019;s method (<xref ref-type="bibr" rid="B13">Fisher, 1925</xref>) is optimal for detecting dense signals (e.g., in the sense of Bahadur efficiency (<xref ref-type="bibr" rid="B30">Littell and Folks, 1971</xref>, <xref ref-type="bibr" rid="B31">1973</xref>)). Meanwhile, the minimal <italic>p</italic>-value test is preferred for detecting sparse and strong signals (<xref ref-type="bibr" rid="B9">Donoho and Jin, 2004</xref>). In GWAS, signal patterns depend on the fraction of causal SNPs, the strength of their effects, the linkage disequilibrium (LD) among SNPs, and other potential factors (e.g., covariates) (<xref ref-type="bibr" rid="B62">Zhang and Wu, 2022a</xref>). The collective signal patterns are often unknown and drastically vary over different SNP sets. One strategy to address this issue is the omnibus testing procedure. An excellent approach is the ACAT-O, which includes three different tests, the ACAT, the SKAT, and the burden test (<xref ref-type="bibr" rid="B32">Liu et al., 2019</xref>). The ACAT is more powerful than SKAT and burden tests for sparse signals when the fraction of causal SNPs is small and the LDs are weak. On the contrary, SKAT and burden tests are more powerful for dense signals. The ACAT-O becomes robust by adapting to the power of these three tests. However, SKAT and burden tests are not <italic>p</italic>-value combination methods. The SKAT requires the marginal score statistics, which may not be provided in summary data (<xref ref-type="bibr" rid="B53">Wu et al., 2011</xref>).</p>
<p>We propose an adaptive <italic>p</italic>-value combination procedure based on the thresholding Fisher&#x2019;s method (TFisher) (<xref ref-type="bibr" rid="B61">Zhang et al., 2020b</xref>). The TFisher provides a flexible mechanism for truncating and weighting SNP <italic>p</italic>-values in the testing procedure. When signals are sparse, the TFisher statistic is powerful by including a few smallest <italic>p</italic>-values that are most likely associated with signals; when signals are dense, more <italic>p</italic>-values can be included to improve power. Therefore, the corresponding omnibus testing procedure (the oTFisher) remains robustly high power for various signal patterns by automatically adapting to a subset of important SNPs. Unlike the ACAT-O, which involves different types of test statistics, the oTFisher restricts to the same family of statistics. The adaptation is through truncating and weighting SNP <italic>p</italic>-values, which provides a vehicle for screening important SNPs. If the SNP-set is significantly associated, the important SNPs selected by oTFisher are likely trait relevant. This feature is useful for two reasons. First, important SNP screening based on the SNP-set test could help to identify SNPs with weak genetic effects because the SNP-set test has the potential to detect the totality of genetic effects that single-SNP analysis cannot. Second, the important SNPs that drive the association of a SNP set, e.g., a gene, could help reveal genetic architecture, disease mechanism, and other downstream analyses of the gene.</p>
<p>The exact distribution of oTFisher is challenging to obtain when SNP <italic>p</italic>-values are dependent because of the LD among SNPs. For controlling the type I error rate, we could rely on a re-sampling-based strategy to get the empirical <italic>p</italic>-value of the oTFisher. However, this strategy is computationally expensive, especially for moderate to large SNP sets. We design an efficient algorithm to calculate the <italic>p</italic>-value of the TFisher and the oTFisher. It is a hybrid of the generalization of Brown&#x2019;s method (GB) (<xref ref-type="bibr" rid="B3">Brown, 1975</xref>) and a more advanced skewness-kurtosis-ratio matching method (SKRM) (<xref ref-type="bibr" rid="B63">Zhang and Wu, 2022b</xref>; <xref ref-type="bibr" rid="B60">Zhang et al., 2022</xref>). The GB is fast and reasonably accurate for larger <italic>p</italic>-values (&#x2265;0.01). The SKRM can significantly improve calculation accuracy for smaller <italic>p</italic>-value.</p>
<p>The oTFisher is shown robustly powerful through extensive simulations. The type I error rate is adequately controlled even at a stringent significance level. We applied the oTFisher to analyze the summary data from the Genetic Factors for Osteoporosis Consortium (GEFOS). The oTFisher systematically identified more literature disease genes than the current <italic>p</italic>-value combination methods. The results contributed more insights into the genetics of osteoporosis.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Materials and methods</title>
<sec id="s2-1">
<title>2.1 SNP-set testing statistics</title>
<p>Let a set of <italic>n</italic> SNPs have <italic>p</italic>-values <italic>P</italic>
<sub>
<italic>i</italic>
</sub>, <italic>i</italic> &#x3d; 1, <italic>&#x2026;</italic> , <italic>n</italic>. The TFisher statistic tests the genetic association between a trait and the SNP set by combining these <italic>p</italic>-values while allowing for a general scheme of truncation and weighting (<xref ref-type="bibr" rid="B61">Zhang et al., 2020b</xref>):<disp-formula id="e1">
<mml:math id="m1">
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:mi>log</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
<mml:mi>I</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:math>
<label>(1)</label>
</disp-formula>where <italic>I</italic> () is the indicator function, <italic>&#x3c4;</italic>
<sub>1</sub> &#x3e; 0 is a truncation parameter that includes <italic>p</italic>-values equal or smaller than <italic>&#x3c4;</italic>
<sub>1</sub> into the statistic, and <italic>&#x3c4;</italic>
<sub>2</sub> &#x3e; 0 is a weighting parameter for selected <italic>p</italic>-values. When <italic>&#x3c4;</italic>
<sub>1</sub> &#x3d; <italic>&#x3c4;</italic>
<sub>2</sub> &#x3d; 1, the TFisher statistic combines all <italic>p</italic>-values, which is the classic Fisher&#x2019;s combination statistic: <inline-formula id="inf1">
<mml:math id="m2">
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>1,1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
<mml:msubsup>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula>. The TFisher family include the statistic of truncation-product method (TPM) (<xref ref-type="bibr" rid="B58">Zaykin et al., 2002</xref>, <xref ref-type="bibr" rid="B57">2007</xref>): <inline-formula id="inf2">
<mml:math id="m3">
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
<mml:msubsup>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, i.e., a special case of the TFisher with <italic>&#x3c4;</italic>
<sub>2</sub> &#x3d; 1. Our previous study has shown that statistical power and computation efficiency can be improved by weighting the truncated <italic>p</italic>-values through <italic>&#x3c4;</italic>
<sub>2</sub>. An optimality can be reached at <italic>&#x3c4;</italic>
<sub>1</sub> &#x3d; <italic>&#x3c4;</italic>
<sub>2</sub> &#x3d; <italic>&#x3c4;</italic> &#x2208; (0, 1], which gives the soft-thresholding statistic:<disp-formula id="equ1">
<mml:math id="m4">
<mml:msubsup>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>2</mml:mn>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:mi>max</mml:mi>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
</disp-formula>An analogous version of the TPM is the rank-truncation product (RTP) method (<xref ref-type="bibr" rid="B10">Dudbridge and Koeleman, 2003</xref>). Let <italic>P</italic>
<sub>(1)</sub> &#x2264; &#x22ef; &#x2264; <italic>P</italic>
<sub>(<italic>n</italic>)</sub> be the ordered input <italic>p</italic>-values. The RTP statistic is <inline-formula id="inf3">
<mml:math id="m5">
<mml:mi>R</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
<mml:msubsup>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> for some predetermined <italic>k</italic>. The RTP statistic can also be written in consistency with the TPM with <italic>&#x3c4;</italic>
<sub>1</sub> &#x3d; <italic>P</italic>
<sub>(<italic>k</italic>)</sub>. Calculating the <italic>p</italic>-value of the RTP is more challenging, especially for SNP <italic>p</italic>-values are dependent due to the LD.</p>
<p>The TFisher is a flexible framework to maximize the detection of SNP-set associations over a broad spectrum of signal patterns. Different signal patterns are in favor of different truncating and weighting parameters. For example, when association signals are dense, more SNP <italic>p</italic>-values should be included in the test statistic by large <italic>&#x3c4;</italic>
<sub>1</sub> and <italic>&#x3c4;</italic>
<sub>2</sub> so that the test is closer to Fisher&#x2019;s method. Dense signals happen under the polygenic model with a substantial number of causal SNPs, or when the LD is strong so that many SNPs in LD with the causal SNPs also show association signals. On the other hand, if association signals are sparse (i.e., only a small number of SNP <italic>p</italic>-values are linked to the causal genetic factor), the smallest SNP <italic>p</italic>-values should be included in the statistic by small <italic>&#x3c4;</italic>
<sub>1</sub> and <italic>&#x3c4;</italic>
<sub>2</sub>.</p>
<p>In reality, the signal patterns are often unknown and substantially differ over traits and loci. Therefore, we rely on the data-adaptive omnibus testing procedure to automatically select appropriate parameters. Specifically, we consider a discrete search domain over {(<italic>&#x3c4;</italic>
<sub>1<italic>k</italic>
</sub>, <italic>&#x3c4;</italic>
<sub>2<italic>k</italic>
</sub>), <italic>k</italic> &#x3d; 1, <italic>&#x2026;</italic> , <italic>K</italic>}, where <italic>K</italic> is the total number of <italic>&#x3c4;</italic> values to search on. Denote <italic>P</italic>(<italic>k</italic>) the test <italic>p</italic>-value of <italic>T</italic>
<sub>
<italic>n</italic>
</sub> (<italic>&#x3c4;</italic>
<sub>1<italic>k</italic>
</sub>, <italic>&#x3c4;</italic>
<sub>2<italic>k</italic>
</sub>). The omnibus statistic is defined as the smallest <italic>P</italic>(<italic>k</italic>), which indicates the maximal association evidence for the whole SNP-set:<disp-formula id="e2">
<mml:math id="m6">
<mml:mtext>oTFisher_minp</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mi>P</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(2)</label>
</disp-formula>Moreover, we define a second omnibus test by Cauchy combination test (CCT) of <italic>P</italic>(<italic>k</italic>)&#x2019;s (<xref ref-type="bibr" rid="B33">Liu and Xie, 2020</xref>):<disp-formula id="e3">
<mml:math id="m7">
<mml:mtext>oTFisher_cct</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:mi>tan</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>0.5</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(3)</label>
</disp-formula>The summands of oTFisher_cct are the transformation of <italic>P</italic>(<italic>k</italic>)&#x2019;s by the inverse cumulative distribution function (CDF) of the standard Cauchy distribution. Because of the heavy tail of Cauchy distribution, oTFisher_cct is dominated by <italic>p</italic>-values closer to 0 or 1. In practice, we truncate <italic>P</italic>(<italic>k</italic>) &#x3d; 1 to be 0.9 so that oTFisher_cct is dominated by small <italic>p</italic>-values and performs similarly to oTFisher_minp. Note that since <italic>P</italic>(<italic>k</italic>) depends on the LD (see its calculation below), the oTFisher implicitly accounts for the LD information.</p>
</sec>
<sec id="s2-2">
<title>2.2 SNP-screening procedures</title>
<p>We can utilize the oTFisher as a procedure to screen for important SNPs. The oTFisher procedure has three steps:<list list-type="simple">
<list-item>
<p>1) SNP-set testing: Identify the significantly associated SNP-sets by their oTFisher <italic>p</italic>-values <inline-formula id="inf4">
<mml:math id="m8">
<mml:mo>&#x2264;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>/</mml:mo>
<mml:mi>g</mml:mi>
</mml:math>
</inline-formula>, where <italic>&#x3b1;</italic> is the adjusted significance level, <italic>g</italic> is the number of SNP sets (e.g., genes) studied simultaneously.</p>
</list-item>
<list-item>
<p>2) Screening: From the <italic>t</italic> SNPs contained in the significant SNP sets, get <italic>s</italic> candidate SNPs with their <italic>p</italic>-value less than a threshold <italic>p</italic>
<sup>&#x22c6;</sup>.</p>
</list-item>
<list-item>
<p>3) Validation: Use an independent data to get new <italic>p</italic>-values of the <italic>s</italic> candidate SNPs. Get <italic>s</italic>
<sub>1</sub> validated SNPs with their <italic>p</italic>-values less than <italic>&#x3b1;</italic>/<italic>s</italic>.</p>
</list-item>
</list>
</p>
<p>A natural choice of the threshold is <italic>p</italic>
<sup>&#x22c6;</sup> &#x3d; <italic>&#x3c4;</italic>&#x2a;, where <inline-formula id="inf5">
<mml:math id="m9">
<mml:msup>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2261;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:msup>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> corresponding to the oTFisher_minp in <xref ref-type="disp-formula" rid="e2">(2)</xref> (i.e., <italic>P</italic> (<italic>k</italic>&#x2a;) is the minimal <italic>P</italic>(<italic>k</italic>)). Meanwhile, <italic>P</italic>(<italic>k</italic>) could have similar values over different <italic>k</italic>. To be conservative and reduce the false discoveries, we recommend <italic>p</italic>
<sup>&#x22c6;</sup> &#x3d; min{<italic>&#x3c4;</italic>&#x2a;, 0.1} (denoted by oTFisher_r as a restricted version).</p>
<p>In practice, SNP screening is commonly based on the Bonferroni procedure or the Benjamini&#x2013;Hochberg (BH) procedure:<list list-type="simple">
<list-item>
<p>&#x2022; Bonferroni procedure: The screened SNPs are those with their <italic>p</italic>-values less than <italic>p</italic>
<sup>&#x22c6;</sup> &#x3d; <italic>&#x3b1;</italic>/<italic>L</italic>, where <italic>L</italic> is the total number of SNPs.</p>
</list-item>
<list-item>
<p>&#x2022; BH procedure: The screened SNPs are those with <italic>p</italic>-value less than <inline-formula id="inf6">
<mml:math id="m10">
<mml:msup>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x22c6;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x22c6;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula>, where <italic>k</italic>
<sup>&#x22c6;</sup> is the largest <italic>k</italic> such that the ordered SNP <italic>p</italic>-values <italic>P</italic>
<sub>(<italic>k</italic>)</sub> &#x2264; <italic>&#x3b1;k</italic>/<italic>L</italic>.</p>
</list-item>
</list>
</p>
<p>The SNPs screened by Bonferroni and BH are validated in the same way as the validation stage for the oTFisher.</p>
<p>There are two potential benefits of utilizing the oTFisher procedure over Bonferroni and FDR procedures. First, as a set-testing method, the oTFisher can potentially increase the discovery of weakly associated SNPs. It is because the SNP-set test can detect the collective existence of weak genetic effects that are indistinguishable from individual SNPs (<xref ref-type="bibr" rid="B9">Donoho and Jin, 2004</xref>; <xref ref-type="bibr" rid="B54">Wu et al., 2014</xref>; <xref ref-type="bibr" rid="B21">Jin and Ke, 2016</xref>). Therefore, the oTFisher could better reveal SNPs with weak genetic effects than the Bonferroni and FDR procedures, which only rely on individual SNP tests. Second, <italic>&#x3c4;</italic>&#x2a; is influenced by the proportion of genetic signals (<xref ref-type="bibr" rid="B61">Zhang et al., 2020b</xref>). This information could also contribute to identifying important SNPs.</p>
</sec>
<sec id="s2-3">
<title>2.3 Algorithms for <italic>p</italic>-value calculation</title>
<p>Following the literature (<xref ref-type="bibr" rid="B3">Brown, 1975</xref>; <xref ref-type="bibr" rid="B63">Zhang and Wu, 2022b</xref>), we account for the dependence of SNP <italic>p</italic>-values by assuming that the vector of their z-score statistics <inline-formula id="inf7">
<mml:math id="m11">
<mml:mi mathvariant="bold">Z</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is approximately normal:<disp-formula id="e4">
<mml:math id="m12">
<mml:mi mathvariant="bold">Z</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3bc;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">&#x3a3;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:math>
<label>(4)</label>
</disp-formula>where the mean vector <italic>&#x3bc;</italic> corresponds to the association hypotheses: <italic>H</italic>
<sub>0</sub>: <bold>
<italic>&#x3bc;</italic>
</bold> &#x3d; <bold>0</bold>, i.e., no SNPs are associated, and <italic>H</italic>
<sub>1</sub>: <bold>
<italic>&#x3bc;</italic>
</bold> &#x2260; <bold>0</bold>, i.e., at least one SNP is associated. The correlation matrix <bold>&#x3a3;</bold> is assumed to be estimable but otherwise arbitrary. These assumptions are reasonably satisfied in practice when the sample size is reasonably large (e.g., by the linear model-based association tests (<xref ref-type="bibr" rid="B44">Shao, 2010</xref>)). As one example, the estimation of <bold>&#x3a3;</bold> among the marginal score statistics is given in Section 3.1. For analyzing GWAS summary data where the individual-level genotype data are unavailable, <bold>&#x3a3;</bold> can often be estimated by the LD matrix based on reference genome panel data, such as the 1,000 Genome and the UK10K projects (<xref ref-type="bibr" rid="B20">Hu et al., 2013</xref>). Although most GWAS summary data contains two-sided <italic>p</italic>-values, we allow they are one-sided for the completeness of statistical development:<disp-formula id="e5">
<mml:math id="m13">
<mml:mtext>&#x2009;One</mml:mtext>
<mml:mo>-</mml:mo>
<mml:mtext>sided:&#x2009;</mml:mtext>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>;</mml:mo>
<mml:mspace width="1em"/>
<mml:mtext>&#x2009;Two</mml:mtext>
<mml:mo>-</mml:mo>
<mml:mtext>sided:&#x2009;</mml:mtext>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:math>
<label>(5)</label>
</disp-formula>where <inline-formula id="inf8">
<mml:math id="m14">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="double-struck">P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>0,1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> denotes the survival function of <italic>N</italic> (0, 1).</p>
</sec>
<sec id="s2-4">
<title>2.3.1 <italic>p</italic>-value calculation for TFisher</title>
<p>At given <italic>&#x3c4;</italic>
<sub>1</sub>, <italic>&#x3c4;</italic>
<sub>2</sub> and <italic>n</italic>, the TFisher statistic <italic>T</italic>
<sub>
<italic>n</italic>
</sub> (<italic>&#x3c4;</italic>
<sub>1</sub>, <italic>&#x3c4;</italic>
<sub>2</sub>) in <xref ref-type="disp-formula" rid="e1">Eq. 1</xref> has a point probability mass at 0: <inline-formula id="inf9">
<mml:math id="m15">
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="double-struck">P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="double-struck">P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3e;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> corresponding to all <italic>p</italic>-values are truncated. Define <italic>s</italic> &#x3d; &#x2212;2&#x2009;log (<italic>&#x3c4;</italic>
<sub>1</sub>/<italic>&#x3c4;</italic>
<sub>2</sub>) &#x2265; 0 when <italic>&#x3c4;</italic>
<sub>2</sub> &#x2265; <italic>&#x3c4;</italic>
<sub>1</sub>, and <italic>s</italic> &#x3d; &#x2212;2<italic>n</italic>&#x2009;log (<italic>&#x3c4;</italic>
<sub>1</sub>/<italic>&#x3c4;</italic>
<sub>2</sub>) &#x3c; 0 when <italic>&#x3c4;</italic>
<sub>2</sub> &#x3c; <italic>&#x3c4;</italic>
<sub>1</sub>. In either case <italic>T</italic>
<sub>
<italic>n</italic>
</sub> (<italic>&#x3c4;</italic>
<sub>1</sub>, <italic>&#x3c4;</italic>
<sub>2</sub>) &#x2265; <italic>s</italic> and its distribution is a mixture of point mass at 0 and a continuous distribution defined in [<italic>s</italic>, <italic>&#x221e;</italic>). That is, <italic>T</italic>
<sub>
<italic>n</italic>
</sub> (<italic>&#x3c4;</italic>
<sub>1</sub>, <italic>&#x3c4;</italic>
<sub>2</sub>) &#x223c; <italic>p</italic>
<sub>0</sub> &#x22c5; 0 &#x2b; (1 &#x2212; <italic>p</italic>
<sub>0</sub>) &#x22c5; <italic>T</italic>&#x2032;, where <italic>T</italic>&#x2032; denotes an appropriate continuous random variable.</p>
<p>The exact value of <italic>p</italic>
<sub>0</sub> is easy to calculate under normality in <xref ref-type="disp-formula" rid="e4">Eq. 4</xref>. The exact distribution of <italic>T</italic>&#x2032; is challenging to obtain. We propose to use the gamma distribution model to approximate it for a few reasons. First, the model is consistent with the distribution of the TFisher under independence, which is a weighted gamma distribution (<xref ref-type="bibr" rid="B61">Zhang et al., 2020b</xref>). Second, the literature has been using gamma distribution to approximate Fisher&#x2019;s method under dependence (<xref ref-type="bibr" rid="B3">Brown, 1975</xref>; <xref ref-type="bibr" rid="B63">Zhang and Wu, 2022b</xref>). Third, when the shape parameter of the gamma distribution is large, it converges to the normal distribution, which is appropriate for the TFisher statistic when <italic>n</italic> is large (see details below). Overall, gamma distribution provides a flexible and straightforward distribution model, vital for computational speed and accuracy.</p>
<p>Specifically, we approximate the distribution of the TFisher statistic by<disp-formula id="e6">
<mml:math id="m16">
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mrow>
<mml:mover>
<mml:mrow>
<mml:mo>&#x2248;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:mover>
</mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x22c5;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x22c5;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:math>
<label>(6)</label>
</disp-formula>where <italic>X</italic>&#x2032; &#x2212; <italic>b</italic> &#x223c;&#x393;(<italic>a</italic>, <italic>&#x3b8;</italic>), the gamma distribution with shape parameter <italic>a</italic> and scale parameter <italic>&#x3b8;</italic>. We consider a shift parameter <italic>b</italic> so that <italic>X</italic>&#x2032; &#x2208; [<italic>b</italic>, <italic>&#x221e;</italic>). Let <italic>F</italic>
<sub>&#x393;(<italic>a</italic>,<italic>&#x3b8;</italic>) (</sub>
<italic>x</italic>) denote the CDF of &#x393;(<italic>a</italic>, <italic>&#x3b8;</italic>). The CDF of <italic>X</italic> is<disp-formula id="e7">
<mml:math id="m17">
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="double-struck">P</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mi>I</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">&#x393;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(7)</label>
</disp-formula>Based on <xref ref-type="disp-formula" rid="e7">(7)</xref>, the <italic>p</italic>-value of the TFisher at an observed statistic <italic>t</italic> is<disp-formula id="e8">
<mml:math id="m18">
<mml:mi mathvariant="double-struck">P</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3e;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2248;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(8)</label>
</disp-formula>We discuss the methods to calculate <italic>F</italic>
<sub>
<italic>X</italic>
</sub>(<italic>t</italic>) in the following.</p>
<p>Method 1: The generalized Brown&#x2019;s method (GB): This method follows the essential idea of Brown&#x2019;s method (<xref ref-type="bibr" rid="B3">Brown, 1975</xref>) to match the first two moments of <italic>T</italic> and <italic>X</italic>. Specifically, we set the shift parameter <italic>b</italic> &#x3d; <italic>s</italic> so that <italic>X</italic>&#x2032; and <italic>T</italic>&#x2032; have the same domain. The parameters <italic>a</italic> and <italic>&#x3b8;</italic> are determined by matching the means and the variances of <italic>T</italic>
<sub>
<italic>n</italic>
</sub> (<italic>&#x3c4;</italic>
<sub>1</sub>, <italic>&#x3c4;</italic>
<sub>2</sub>) and <italic>X</italic>. Denote <italic>&#x3bc;</italic>
<sub>
<italic>T</italic>
</sub> &#x3d; E (<italic>T</italic>
<sub>
<italic>n</italic>
</sub> (<italic>&#x3c4;</italic>
<sub>1</sub>, <italic>&#x3c4;</italic>
<sub>2</sub>)) and <inline-formula id="inf10">
<mml:math id="m19">
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="normal">V</mml:mi>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">r</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. We have<disp-formula id="equ2">
<mml:math id="m20">
<mml:mfenced open="{" close="">
<mml:mrow>
<mml:mtable class="cases">
<mml:mtr>
<mml:mtd columnalign="left">
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mspace width="1em"/>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
<mml:mspace width="1em"/>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x21d2;</mml:mo>
<mml:mfenced open="{" close="">
<mml:mrow>
<mml:mtable class="cases">
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mi>a</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>b</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfrac>
<mml:mspace width="1em"/>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>b</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
<mml:mo>.</mml:mo>
<mml:mspace width="1em"/>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:math>
</disp-formula>Note that the gamma approximation is consistent with the asymptotic normal distribution of <italic>T</italic>
<sub>
<italic>n</italic>
</sub> (<italic>&#x3c4;</italic>
<sub>1</sub>, <italic>&#x3c4;</italic>
<sub>2</sub>) for large <italic>n</italic> by the Central Limit Theorem (CLT). Specifically, when <italic>n</italic> &#x2192; <italic>&#x221e;</italic>, <italic>p</italic>
<sub>0</sub> &#x2192; 0, so <italic>&#x3bc;</italic>
<sub>
<italic>T</italic>
</sub> &#x3d; <italic>&#x3bc;</italic>
<sub>
<italic>X</italic>
</sub> &#x2248; <italic>a&#x3b8;</italic> &#x2b; <italic>b</italic> and <inline-formula id="inf11">
<mml:math id="m21">
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2248;</mml:mo>
<mml:mi>a</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>. Because &#x393;(<italic>a</italic>, <italic>&#x3b8;</italic>) &#x2248; <italic>N</italic> (<italic>a&#x3b8;</italic>, <italic>a&#x3b8;</italic>
<sup>2</sup>) for large <italic>a</italic>, the distribution model in <xref ref-type="disp-formula" rid="e6">(6)</xref> leads to <inline-formula id="inf12">
<mml:math id="m22">
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mover>
<mml:mrow>
<mml:mo>&#x2248;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:mover>
</mml:mrow>
<mml:mi mathvariant="normal">&#x393;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>b</mml:mi>
<mml:mo>&#x2248;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2248;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. However, for finite <italic>n</italic>, the distribution model in <xref ref-type="disp-formula" rid="e6">Eq. 6</xref> is more accurate for <italic>p</italic>-value calculation.</p>
<p>Straightforward calculation gives <italic>&#x3bc;</italic>
<sub>
<italic>T</italic>
</sub> &#x3d; 2<italic>n&#x3c4;</italic>
<sub>1</sub> (1 &#x2212; log&#x2009; <italic>&#x3c4;</italic>
<sub>1</sub> &#x2b; log&#x2009; <italic>&#x3c4;</italic>
<sub>2</sub>). For the variance <inline-formula id="inf13">
<mml:math id="m23">
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>, we deduce its analytical formula given in Lemma 1 in Supplementary Material. The formula involves a summation of infinite terms. However, in practice, a summation of two or three terms over <italic>k</italic> would give sufficient accuracy for <inline-formula id="inf14">
<mml:math id="m24">
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> (<xref ref-type="bibr" rid="B63">Zhang and Wu, 2022b</xref>). The proof is based on Mehler&#x2019;s theorem (<xref ref-type="bibr" rid="B42">Patel and Read, 1996</xref>) and is given in Supplementary Material.</p>
<p>Method 2: Skewness-kurtosis-ratio matching method (SKRM). Accurate calculation of small <italic>p</italic>-value highly depends on the precise approximation of the right tail of the null distribution. In this method, we do not require the shifting parameter <italic>b</italic> &#x3d; <italic>s</italic> but treat it as additional freedom to capture the right-tail information of the TFisher statistic. That is, in addition to the first two moments, we further match the skewness-kurtosis ratios of <italic>T</italic>
<sub>
<italic>n</italic>
</sub> (<italic>&#x3c4;</italic>
<sub>1</sub>, <italic>&#x3c4;</italic>
<sub>2</sub>) and <italic>X</italic>. Engaging higher-order moments could provide more flexibility in the distribution and thus improve the accuracy of <italic>p</italic>-value calculation. In particular, matching the skewness-kurtosis ratios is a cost-efficient method&#x2013;it captures two higher moments using only one extra parameter.</p>
<p>Specifically, let <inline-formula id="inf15">
<mml:math id="m25">
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>E</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>/</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> and <inline-formula id="inf16">
<mml:math id="m26">
<mml:msub>
<mml:mrow>
<mml:mi>&#x3ba;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>E</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>/</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> be the skewness and kurtosis of <italic>T</italic>&#x2032;, and <italic>&#x3b3;</italic>
<sub>
<italic>X</italic>&#x2032;</sub> and <italic>&#x3ba;</italic>
<sub>
<italic>X</italic>&#x2032;</sub> be the skewness and kurtosis of <italic>X</italic>&#x2032;, respectively. By matching the ratio between skewness and excess kurtosis<disp-formula id="equ3">
<mml:math id="m27">
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3ba;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3ba;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:math>
</disp-formula>we can obtain a simple closed form <inline-formula id="inf17">
<mml:math id="m28">
<mml:mi>a</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>9</mml:mn>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3ba;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>3</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:math>
</inline-formula>. Subsequently, by matching the mean and variance <italic>&#x3bc;</italic>
<sub>
<italic>T</italic>&#x2032;</sub> &#x3d; <italic>&#x3bc;</italic>
<sub>
<italic>X</italic>&#x2032;</sub> and <inline-formula id="inf18">
<mml:math id="m29">
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>, we have <inline-formula id="inf19">
<mml:math id="m30">
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
</mml:math>
</inline-formula> and <inline-formula id="inf20">
<mml:math id="m31">
<mml:mi>b</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:msqrt>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msqrt>
</mml:math>
</inline-formula>. After <italic>a</italic>, <italic>&#x3b8;</italic> and <italic>b</italic> are determined, the <italic>p</italic>-value of the TFisher can be calculated by <xref ref-type="disp-formula" rid="e7">Eqs 7</xref>, <xref ref-type="disp-formula" rid="e8">8</xref>.</p>
<p>Exact calculation of <italic>&#x3b3;</italic>
<sub>
<italic>T</italic>&#x2032;</sub> and <italic>&#x3ba;</italic>
<sub>
<italic>T</italic>&#x2032;</sub> would be intricate due to the complexity of the high moments of the summational terms in the TFisher statistic. We rely on simulation by <xref ref-type="disp-formula" rid="e4">Eq. 4</xref> to obtain these values. The number of simulations needed for estimating these parameters is much smaller than that required for obtaining a small empirical <italic>p</italic>-value directly. Therefore, the SKRM is still computationally more efficient than getting <italic>p</italic>-values solely based on the re-sampling strategy.</p>
<p>Method 3: Hybrid method. To balance computational speed and accuracy, we recommend a simple two-stage calculation of TFisher&#x2019;s <italic>p</italic>-value. Because the GB is fast and accurate for controlling the type I error rate at <italic>&#x3b1;</italic> &#x2265; 0.01 (see the numerical results below), the GB is applied in the first stage. If the obtained <italic>p</italic>-value is less than 0.01 (the chance is about 1% under the null), the SKRM method will obtain the final <italic>p</italic>-value in the second stage. With a single core of 2.80&#xa0;GHz AMD EPYC 7543 CPU and 20G memory, the computation times for calculating TFisher&#x2019;s <italic>p</italic>-values for SNP sets of 30/50/100/200 SNPs are about 0.07/0.13/0.30/0.74&#xa0;s by the GB method (implemented in R version 4.2.0). Correspondingly, the expected times by the hybrid method (assuming 1% chance of engaging the SKRM method that takes 10<sup>5</sup> simulations to obtain <italic>&#x3b3;</italic>
<sub>
<italic>T</italic>&#x2032;</sub> and <italic>&#x3ba;</italic>
<sub>
<italic>T</italic>&#x2032;</sub> values) are 0.09/0.15/0.33/0.78&#xa0;s.</p>
</sec>
<sec id="s2-5">
<title>2.3.2 <italic>p</italic>-value calculation for oTFisher</title>
<p>For the oTFisher_minp in <xref ref-type="disp-formula" rid="e2">Eq. 2</xref>, we can apply asymptotic distribution to approximate its <italic>p</italic>-value by<disp-formula id="e9">
<mml:math id="m32">
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold">R</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:math>
<label>(9)</label>
</disp-formula>where <italic>p</italic>
<sub>0</sub> is the observed statistic of oTFisher_minp, &#x3a6;<sub>
<bold>R</bold>
</sub> denotes the CDF of a multivariate normal distribution with mean zero and correlation matrix <bold>R</bold>. We obtain <bold>R</bold> by scaling <bold>&#x3a9;</bold>, the covariance matrix of <italic>T</italic>
<sub>
<italic>n</italic>
</sub> (<italic>&#x3c4;</italic>
<sub>1<italic>k</italic>
</sub>, <italic>&#x3c4;</italic>
<sub>2<italic>k</italic>
</sub>), <italic>k</italic> &#x3d; 1, &#x2026; , <italic>K</italic>, given by Lemma 2 in Supplementary Material. That is, <bold>R</bold> &#x3d; <bold>&#x39b;&#x3a9;&#x39b;</bold> with the diagonal matrix <inline-formula id="inf21">
<mml:math id="m33">
<mml:mi mathvariant="bold">&#x39b;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>diag</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>/</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msqrt>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>As for oTFisher_cct in <xref ref-type="disp-formula" rid="e3">Eq. 3</xref>, following the property of the CCT, its distribution is robust to the correlations as long as <italic>T</italic>
<sub>
<italic>n</italic>
</sub> (<italic>&#x3c4;</italic>
<sub>1<italic>k</italic>
</sub>, <italic>&#x3c4;</italic>
<sub>2<italic>k</italic>
</sub>)&#x2019;s are roughly normal distributed. This requirement is justifiable because <italic>T</italic>
<sub>
<italic>n</italic>
</sub> (<italic>&#x3c4;</italic>
<sub>1<italic>k</italic>
</sub>, <italic>&#x3c4;</italic>
<sub>2<italic>k</italic>
</sub>) is in the format of summation so that it is roughly normal distributed by the CLT when <italic>n</italic> is moderately large and <italic>&#x3c4;</italic>
<sub>1</sub> is not too small. Denote the observed statistic by <italic>cct</italic>
<sub>
<italic>o</italic>
</sub>. We directly apply the result by (<xref ref-type="bibr" rid="B33">Liu and Xie, 2020</xref>) to approximate its test <italic>p</italic>-value by <inline-formula id="inf22">
<mml:math id="m34">
<mml:mi mathvariant="double-struck">P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>F</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>c</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mo>&#x3e;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>c</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2248;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>tan</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>c</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>/</mml:mo>
<mml:mi>&#x3c0;</mml:mi>
</mml:math>
</inline-formula>. For the computation time of obtaining the oTFisher&#x2019;s <italic>p</italic>-value, the dominant part is to get the <italic>p</italic>-values of the TFisher statistics <italic>T</italic>
<sub>
<italic>n</italic>
</sub> (<italic>&#x3c4;</italic>
<sub>1<italic>k</italic>
</sub>, <italic>&#x3c4;</italic>
<sub>2<italic>k</italic>
</sub>), <italic>k</italic> &#x3d; 1, &#x2026; , <italic>K</italic>.</p>
</sec>
</sec>
<sec id="s3">
<title>3 Simulation studies</title>
<sec id="s3-1">
<title>3.1 Simulation design</title>
<p>Simulations were applied to verify the accuracy of the <italic>p</italic>-value calculation, statistical power, and SNP-screening performance of the oTFisher procedures. The genotype data were generated by the Cosi2 package (<xref ref-type="bibr" rid="B45">Shlyakhter et al., 2014</xref>). Specifically, 1,290 haplotypes were generated according to a coalescent model based on chromosome 1 of the European population. Two haplotypes were randomly picked with replacement to form the genotypes of one diploid individual. In each simulation, we obtained SNPs of <italic>N</italic> individuals. Both rare variants (0.05%<inline-formula id="inf23">
<mml:math id="m35">
<mml:mo>&#x3c;</mml:mo>
</mml:math>
</inline-formula>MAF<inline-formula id="inf24">
<mml:math id="m36">
<mml:mo>&#x3c;</mml:mo>
</mml:math>
</inline-formula>5%) and common variants (MAF &#x2265;5%) were considered.</p>
<p>We simulated continuous and binary traits by the regression and the logit model, respectively:<disp-formula id="e10">
<mml:math id="m37">
<mml:mtable class="gathered">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mi mathvariant="bold-italic">&#x3b2;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mi mathvariant="bold-italic">&#x3b3;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2009;where&#x2009;</mml:mtext>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mover>
<mml:mrow>
<mml:mo>&#x223c;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>d</mml:mi>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:mover>
</mml:mrow>
<mml:mi>N</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>0,1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mtext>logit</mml:mtext>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="double-struck">P</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mi mathvariant="bold-italic">&#x3b2;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mi mathvariant="bold-italic">&#x3b3;</mml:mi>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
<label>(10)</label>
</disp-formula>where <italic>Y</italic>
<sub>
<italic>k</italic>
</sub> quantifies the phenotypic trait of the <italic>k</italic>th subject, <italic>k</italic> &#x3d; 1, &#x2026; , <italic>N</italic>, with the sample size <italic>N</italic>. <inline-formula id="inf25">
<mml:math id="m38">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is the genotype vector of <italic>n</italic> SNPs, <inline-formula id="inf26">
<mml:math id="m39">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is the vector of <italic>l</italic> controlling covariates. The nonzero elements of <inline-formula id="inf27">
<mml:math id="m40">
<mml:mi mathvariant="bold-italic">&#x3b2;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> are the causal genetic effects of the corresponding SNPs. The SNP-set analysis concerns testing the global hypotheses<disp-formula id="equ4">
<mml:math id="m41">
<mml:msub>
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3b2;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">0</mml:mn>
<mml:mtext>&#x2009;versus&#x2009;</mml:mtext>
<mml:msub>
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3b2;</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mn mathvariant="bold">0</mml:mn>
<mml:mo>.</mml:mo>
</mml:math>
</disp-formula>We mimic a balanced case-control study for the binary traits. That is, a large number of outcomes were generated based on the probability of the logit model, then we randomly selected subjects so that the numbers of cases and controls are <italic>N</italic>/2 each.</p>
<p>Based on the simulated data, we calculate the marginal score test statistic following literature (<xref ref-type="bibr" rid="B34">McCullagh and Nelder, 1989</xref>; <xref ref-type="bibr" rid="B43">Schaid et al., 2002</xref>; <xref ref-type="bibr" rid="B1">Barnett et al., 2017</xref>). Specifically, the score of the <italic>i</italic>th SNP is <inline-formula id="inf28">
<mml:math id="m42">
<mml:msub>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
</mml:math>
</inline-formula> where <inline-formula id="inf29">
<mml:math id="m43">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> are the fitted trait values by the maximum likelihood estimation under <italic>H</italic>
<sub>0</sub>. It can be shown that under <italic>H</italic>
<sub>0</sub>, <inline-formula id="inf30">
<mml:math id="m44">
<mml:mi mathvariant="bold">M</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mover>
<mml:mrow>
<mml:mo>&#x2192;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:mover>
</mml:mrow>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn mathvariant="bold">0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">&#x3a3;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:math>
</inline-formula> as <italic>N</italic> &#x2192; <italic>&#x221e;</italic>. The covariance matrix <bold>&#x3a3;</bold> can be estimated by <inline-formula id="inf31">
<mml:math id="m45">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3a3;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mi mathvariant="bold">W</mml:mi>
<mml:mi mathvariant="bold">G</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mi mathvariant="bold">W</mml:mi>
<mml:mi mathvariant="bold">X</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mi mathvariant="bold">W</mml:mi>
<mml:mi mathvariant="bold">X</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mi mathvariant="bold">W</mml:mi>
<mml:mi mathvariant="bold">G</mml:mi>
<mml:mo>,</mml:mo>
</mml:math>
</inline-formula> where (<bold>G</bold>, <bold>X</bold>) is the design matrix corresponding to (10), and <bold>W</bold> is a diagonal matrix: <inline-formula id="inf32">
<mml:math id="m46">
<mml:mi mathvariant="bold">W</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mi mathvariant="bold">I</mml:mi>
</mml:math>
</inline-formula> for continuous trait (where <inline-formula id="inf33">
<mml:math id="m47">
<mml:msup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is the estimate of the residual variance); <inline-formula id="inf34">
<mml:math id="m48">
<mml:mi mathvariant="bold">W</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>diag</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> for binary trait. Each <italic>M</italic>
<sub>
<italic>i</italic>
</sub> is standardized to get the marginal score statistic <inline-formula id="inf35">
<mml:math id="m49">
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>/</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3a3;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msqrt>
<mml:mrow>
<mml:mover>
<mml:mrow>
<mml:mo>&#x2192;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:mover>
</mml:mrow>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>0,1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> as <italic>N</italic> &#x2192; <italic>&#x221e;</italic> under <italic>H</italic>
<sub>0</sub>. The correlation matrix of <inline-formula id="inf36">
<mml:math id="m50">
<mml:mi mathvariant="bold">Z</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is estimated by <inline-formula id="inf37">
<mml:math id="m51">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">D</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3a3;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">D</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf38">
<mml:math id="m52">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">D</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>diag</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3a3;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msqrt>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. Thus, the marginal score statistics satisfy the assumption (4) asymptotically. We used the two-sided SNP <italic>p</italic>-values in <xref ref-type="disp-formula" rid="e5">Eq. 5</xref>. For the rare-variant analysis of binary traits, the saddle point approximation (SPA) was applied to obtain SNP <italic>p</italic>-values, which corrects the bias due to the unbalanced distribution of rare variants&#x2019; genotype data (<xref ref-type="bibr" rid="B8">Dey et al., 2017</xref>).</p>
</sec>
<sec id="s3-2">
<title>3.2 Accuracy of <italic>p</italic>-value calculation</title>
<p>Under the null, quantitative and binary trait values were generated by setting <bold>
<italic>&#x3b2;</italic>
</bold> &#x3d; <bold>0</bold> and <bold>Z</bold> &#x3d; <bold>1</bold> in <xref ref-type="disp-formula" rid="e10">Eq. 10</xref>. We simulated 10<sup>7</sup> oTFisher statistics with the adapting domain <inline-formula id="inf39">
<mml:math id="m53">
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mn>0.001</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.005</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.01</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.05</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.5</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.7</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. The simulations included 100 randomly generated genotype data to mimic various minor allele frequencies and LD structures of SNPs in various genes. The empirical type I error rate was obtained by proportionating all calculated <italic>p</italic>-values smaller than a given nominal level <italic>&#x3b1;</italic>. <xref ref-type="table" rid="T1">Table 1</xref> lists the ratios of empirical type I error rates and the nominal <italic>&#x3b1;</italic> levels under common and rare variants for quantitative and binary traits. A ratio around 1 indicates accurate calculation. A more stringent <italic>&#x3b1;</italic> is harder to control. The GB method well controls type I error rate up to <italic>&#x3b1;</italic> &#x2265; 0.005, but becomes liberal at smaller <italic>&#x3b1;</italic>, where the SKRM method (using 10<sup>5</sup> simulations to obtain the third and fourth moments) controls the error much better. Therefore, the hybrid method combining the GB and the SKRM balances accuracy and computational speed. Consistent simulation results with various <italic>n</italic> and <inline-formula id="inf40">
<mml:math id="m54">
<mml:mi mathvariant="script">T</mml:mi>
</mml:math>
</inline-formula> settings (with starting <italic>&#x3c4;</italic> &#x3d; 10<sup>&#x2013;5</sup>, 10<sup>&#x2013;3</sup> or 10<sup>&#x2013;2</sup> in <inline-formula id="inf41">
<mml:math id="m55">
<mml:mi mathvariant="script">T</mml:mi>
</mml:math>
</inline-formula>) are given in <xref ref-type="sec" rid="s12">Supplementary Tables S1&#x2013;S4</xref> in Supplementary Material.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Type I error control for SNP-set testing by the oTFisher under quantitative and binary traits. SNP-set size <italic>n</italic> &#x3d; 100, sample size <italic>N</italic> &#x3d; 1,000.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="3" align="left">
<italic>&#x3b1;</italic>
</th>
<th colspan="4" align="left">rare variants</th>
<th colspan="4" align="left">common variants</th>
</tr>
<tr>
<th colspan="2" align="left">oTFisher_cct</th>
<th colspan="2" align="left">oTFisher_minp</th>
<th colspan="2" align="left">oTFisher_cct</th>
<th colspan="2" align="left">oTFisher_minp</th>
</tr>
<tr>
<th align="left">GB</th>
<th align="left">Hybrid</th>
<th align="left">GB</th>
<th align="left">Hybrid</th>
<th align="left">GB</th>
<th align="left">Hybrid</th>
<th align="left">GB</th>
<th align="left">hybrid</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td colspan="9" align="left">Continuous trait</td>
</tr>
<tr>
<td align="left">0.1</td>
<td align="left">0.93</td>
<td align="left">0.94</td>
<td align="left">0.84</td>
<td align="left">0.85</td>
<td align="left">0.89</td>
<td align="left">0.90</td>
<td align="left">0.83</td>
<td align="left">0.83</td>
</tr>
<tr>
<td align="left">0.05</td>
<td align="left">0.98</td>
<td align="left">0.99</td>
<td align="left">0.80</td>
<td align="left">0.81</td>
<td align="left">0.95</td>
<td align="left">0.96</td>
<td align="left">0.77</td>
<td align="left">0.77</td>
</tr>
<tr>
<td align="left">0.01</td>
<td align="left">1.15</td>
<td align="left">0.9</td>
<td align="left">0.88</td>
<td align="left">0.64</td>
<td align="left">1.13</td>
<td align="left">0.94</td>
<td align="left">0.84</td>
<td align="left">0.64</td>
</tr>
<tr>
<td align="left">0.005</td>
<td align="left">1.27</td>
<td align="left">0.86</td>
<td align="left">1.00</td>
<td align="left">0.57</td>
<td align="left">1.28</td>
<td align="left">0.90</td>
<td align="left">0.97</td>
<td align="left">0.56</td>
</tr>
<tr>
<td align="left">0.001</td>
<td align="left">1.80</td>
<td align="left">0.87</td>
<td align="left">1.48</td>
<td align="left">0.56</td>
<td align="left">1.82</td>
<td align="left">0.89</td>
<td align="left">1.50</td>
<td align="left">0.52</td>
</tr>
<tr>
<td align="left">0.0005</td>
<td align="left">2.17</td>
<td align="left">0.89</td>
<td align="left">1.84</td>
<td align="left">0.58</td>
<td align="left">2.21</td>
<td align="left">0.91</td>
<td align="left">1.89</td>
<td align="left">0.53</td>
</tr>
<tr>
<td align="left">0.0001</td>
<td align="left">3.69</td>
<td align="left">0.95</td>
<td align="left">3.61</td>
<td align="left">0.70</td>
<td align="left">3.66</td>
<td align="left">1.00</td>
<td align="left">3.65</td>
<td align="left">0.73</td>
</tr>
<tr>
<td align="left">0.00005</td>
<td align="left">4.82</td>
<td align="left">1.05</td>
<td align="left">4.99</td>
<td align="left">0.87</td>
<td align="left">4.66</td>
<td align="left">1.08</td>
<td align="left">4.96</td>
<td align="left">0.79</td>
</tr>
<tr>
<td align="left">0.00001</td>
<td align="left">9.54</td>
<td align="left">1.11</td>
<td align="left">11.00</td>
<td align="left">1.06</td>
<td align="left">8.89</td>
<td align="left">1.25</td>
<td align="left">11.33</td>
<td align="left">1.29</td>
</tr>
<tr>
<td colspan="9" align="left">Binary trait</td>
</tr>
<tr>
<td align="left">0.1</td>
<td align="left">0.93</td>
<td align="left">0.93</td>
<td align="left">0.85</td>
<td align="left">0.85</td>
<td align="left">0.88</td>
<td align="left">0.88</td>
<td align="left">0.82</td>
<td align="left">0.82</td>
</tr>
<tr>
<td align="left">0.05</td>
<td align="left">0.97</td>
<td align="left">0.98</td>
<td align="left">0.81</td>
<td align="left">0.81</td>
<td align="left">0.94</td>
<td align="left">0.94</td>
<td align="left">0.76</td>
<td align="left">0.76</td>
</tr>
<tr>
<td align="left">0.01</td>
<td align="left">1.10</td>
<td align="left">0.86</td>
<td align="left">0.85</td>
<td align="left">0.60</td>
<td align="left">1.10</td>
<td align="left">0.9</td>
<td align="left">0.82</td>
<td align="left">0.61</td>
</tr>
<tr>
<td align="left">0.005</td>
<td align="left">1.22</td>
<td align="left">0.81</td>
<td align="left">0.94</td>
<td align="left">0.53</td>
<td align="left">1.23</td>
<td align="left">0.84</td>
<td align="left">0.93</td>
<td align="left">0.52</td>
</tr>
<tr>
<td align="left">0.001</td>
<td align="left">1.71</td>
<td align="left">0.75</td>
<td align="left">1.39</td>
<td align="left">0.48</td>
<td align="left">1.75</td>
<td align="left">0.8</td>
<td align="left">1.44</td>
<td align="left">0.46</td>
</tr>
<tr>
<td align="left">0.0005</td>
<td align="left">2.07</td>
<td align="left">0.77</td>
<td align="left">1.75</td>
<td align="left">0.50</td>
<td align="left">2.11</td>
<td align="left">0.81</td>
<td align="left">1.82</td>
<td align="left">0.47</td>
</tr>
<tr>
<td align="left">0.0001</td>
<td align="left">3.50</td>
<td align="left">0.87</td>
<td align="left">3.38</td>
<td align="left">0.61</td>
<td align="left">3.52</td>
<td align="left">0.85</td>
<td align="left">3.54</td>
<td align="left">0.63</td>
</tr>
<tr>
<td align="left">0.00005</td>
<td align="left">4.59</td>
<td align="left">0.86</td>
<td align="left">4.68</td>
<td align="left">0.67</td>
<td align="left">4.53</td>
<td align="left">0.93</td>
<td align="left">4.82</td>
<td align="left">0.71</td>
</tr>
<tr>
<td align="left">0.00001</td>
<td align="left">8.94</td>
<td align="left">1.09</td>
<td align="left">10.32</td>
<td align="left">0.98</td>
<td align="left">8.70</td>
<td align="left">1.16</td>
<td align="left">11.09</td>
<td align="left">1.02</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3-3">
<title>3.3 Statistical power</title>
<p>Through simulations, we assessed the statistical power of the oTFisher tests in comparison with other <italic>p</italic>-value combination tests in GWAS summary data analysis: the GATES (extended Simes procedure, using GATES2 function in R library aSPU (<xref ref-type="bibr" rid="B24">Li et al., 2011</xref>)), the CCT (i.e., the ACAT with equal weights), Fisher&#x2019;s method, the soft-thresholding TFisher at fixed <italic>&#x3c4;</italic>
<sub>1</sub> &#x3d; <italic>&#x3c4;</italic>
<sub>2</sub> &#x3d; 0.05, the adaptive TPM (ATPM), and the adaptive RTP (ARTP). The ATPM follows (1) and (2) with fixed <italic>&#x3c4;</italic>
<sub>2</sub> &#x3d; 1, and adapts over <inline-formula id="inf42">
<mml:math id="m56">
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">T</mml:mi>
</mml:math>
</inline-formula>. The ARTP adapts over <italic>&#x3c4;</italic>
<sub>1</sub> &#x2208; {<italic>P</italic>
<sub>(<italic>k</italic>)</sub>}, where <inline-formula id="inf43">
<mml:math id="m57">
<mml:mi>k</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mi mathvariant="script">T</mml:mi>
</mml:math>
</inline-formula> (rounding to the nearest integers) to be consistent with the oTFisher and the ATPM.</p>
<p>We considered that the causal SNPs were randomly located, and their effect sizes were given by the nonzero elements of <bold>
<italic>&#x3b2;</italic>
</bold> in <xref ref-type="disp-formula" rid="e10">Eq. 10</xref>. For a fair comparison, we empirically controlled the type I error rate <italic>&#x3b1;</italic> to avoid potentially unavailable or inaccurate <italic>p</italic>-value calculation for some tests. For example, there are no <italic>p</italic>-value calculation methods for the RTP and the ARTP under dependence. We got the critical value of one statistic by the upper 100<italic>&#x3b1;%</italic> percentile of its values generated from 10,000 simulations under the null. The statistical power was obtained by the percentage surpassing the critical value among the statistics generated from 1,000 simulations of the alternative. For the RTP and ARTP, we applied a one-level simulation algorithm consistent with literature (<xref ref-type="bibr" rid="B56">Yu et al., 2009</xref>) except that we directly simulated the Z-scores for faster computation instead of permuting the genotype data (details see <xref ref-type="sec" rid="s12">Supplementary Material</xref>).</p>
<p>Statistical power was systematically studied under various settings regarding trait type (quantitative or binary), SNP type (common or rare), the number of causal SNPs <italic>m</italic>, genetic effect size <italic>&#x3b2;</italic>, SNP-set size <italic>n</italic>, sample size <italic>N</italic>, and type I error rate <italic>&#x3b1;</italic>. <xref ref-type="fig" rid="F1">Figure 1</xref> shows the power comparison under binary traits from common SNPs at <italic>&#x3b1;</italic> &#x3d; 0.005 with <italic>n</italic> &#x3d; 100 and <italic>N</italic> &#x3d; 10,000. A few interesting observations can be made. First, the GATES and the CCT have similar performances. They are advantageous when causal SNPs are sparse (e.g., <italic>m</italic> &#x2264; 3) and their effects are strong. Fisher&#x2019;s method shows an opposite pattern&#x2014;it is preferred if causal SNPs are dense, especially when effects are weak. These patterns are consistent with literature results (<xref ref-type="bibr" rid="B62">Zhang and Wu, 2022a</xref>). TFisher at fixed <italic>&#x3c4;</italic>
<sub>1</sub> &#x3d; <italic>&#x3c4;</italic>
<sub>2</sub> &#x3d; 0.05 is more robust over sparse and dense causal SNPs. However, it could still be less satisfactory (e.g., when <italic>m</italic> &#x2264; 3). The oTFisher_cct and oTFisher_minp are similar; their power is the best in most scenarios, showing an overall advantage over unknown genetic architectures. In comparison with the ATPM and the ARTP, <xref ref-type="fig" rid="F2">Figure 2</xref> shows that the oTFisher is uniformly better than the ATPM. This observation is well-supported by a theoretical optimality study (<xref ref-type="bibr" rid="B61">Zhang et al., 2020b</xref>). The ARTP and the oTFisher have very similar power. Meanwhile, our <italic>p</italic>-value calculation algorithm provides a practical advantage for applying the oTFisher over the ARTP in computation. These comparison patterns remain similar for quantitative traits, rare variants, and different <italic>&#x3b1;</italic> levels. Comparisons under other settings are given in <xref ref-type="sec" rid="s12">Supplementary Figures S1&#x2013;S13</xref> in Supplementary Material.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Statistical power for binary traits from common SNPs. Row 1: Fixing the number of causal SNPs <italic>m</italic> &#x3d; 1, 5, and 10, and varying effect size <italic>&#x3b2;</italic> on <italic>x</italic>-axis. Row 2: Fixing <italic>&#x3b2;</italic> &#x3d; 0.06, 0.08, and 0.1, and varying <italic>m</italic> on <italic>x</italic>-axis. Testing methods: fisher: Fisher&#x2019;s method; soft: soft-thresholding TFisher with <italic>&#x3c4;</italic>
<sub>1</sub> &#x3d; <italic>&#x3c4;</italic>
<sub>2</sub> &#x3d; 0.05; cct: Cauchy combination test; otfisher_minp: oTFisher in <xref ref-type="disp-formula" rid="e2">Eq. 2</xref>; otfisher_cct: oTFisher in <xref ref-type="disp-formula" rid="e3">Eq. 3</xref>; gates: extended Simes procedure.</p>
</caption>
<graphic xlink:href="fgene-13-1009428-g001.tif"/>
</fig>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Power comparison among the data-adaptive tests. Quantitative traits from rare variants. <italic>&#x3b1;</italic> &#x3d; 0.01. Fixing the effect size to be 0.12 (left panel) and 0.14 (right). <italic>X</italic>-axis: The number of causal SNPs. Testing methods: fisher: Fisher&#x2019;s method; soft: soft-thresholding TFisher with <italic>&#x3c4;</italic>
<sub>1</sub> &#x3d; <italic>&#x3c4;</italic>
<sub>2</sub> &#x3d; 0.05; cct: Cauchy combination test; otfisher_minp: oTFisher in <xref ref-type="disp-formula" rid="e2">Eq. 2</xref>; otfisher_cct: oTFisher in <xref ref-type="disp-formula" rid="e3">Eq. 3</xref>; gates: extended Simes procedure; ATPM: adaptive TPM; ARTP: adaptive RTP.</p>
</caption>
<graphic xlink:href="fgene-13-1009428-g002.tif"/>
</fig>
</sec>
<sec id="s3-4">
<title>3.4 SNP screening</title>
<p>We studied the performance of SNP-screening procedures measured by the accuracy of detecting causal SNPs. Because non-causal SNPs in LD with causal SNPs also show statistical associations, the study focused on rare-variant analysis with weak LDs for simplicity. To mimic a gene-based SNP-set analysis, we simulated <italic>L</italic> &#x3d; 1,000 SNPs (with the LD <italic>r</italic>
<sup>2</sup> &#x3c; 0.3) in <italic>g</italic> &#x3d; 10 genes of equal size. Two causal genes contained causal SNPs with random locations. The continuous and binary traits were obtained using models in <xref ref-type="disp-formula" rid="e10">Eq. 10</xref> that included all causal SNPs. We systematically varied the genetic effect <italic>&#x3b2;</italic> and the proportion of causal SNPs in the two causal genes. The sample size <italic>N</italic> &#x3d; 1,000; the cases and controls were balanced for binary traits.</p>
<p>We considered accuracy by the sensitivity, specificity, and balanced accuracy (BA, the average of sensitivity and specificity) based on the true positives (TP, the picked SNPs that are causal), false positives (FP, the picked SNPs that are non-causal), true negatives (TN, the unpicked SNPs that are non-causal), and false negatives (FN, the unpicked SNPs that are causal). These numbers are determined after defining the &#x201c;picked&#x201d; and &#x201c;unpicked&#x201d; SNPs. At the screening stage of the oTFisher procedure described in Section 2.2, we consider the <italic>s</italic> candidate SNPs as being picked from in total <italic>t</italic> SNPs in the significant genes; the rest <italic>t</italic> &#x2212; <italic>s</italic> SNPs are unpicked. At the validation stage, the <italic>s</italic>
<sub>1</sub> validated SNPs are picked, and the rest <italic>t</italic> &#x2212; <italic>s</italic>
<sub>1</sub> SNPs in the significant genes are unpicked. The accuracy measures were averaged over 1,000 simulations.</p>
<p>The oTFisher procedure was compared with Bonferroni and BH procedures for SNP screening. We further considered an oracle procedure:<list list-type="simple">
<list-item>
<p>&#x2022; Oracle procedure: Assume the number of causal SNPs <italic>m</italic> is known, the oracle, i.e., the best possible, SNP screening procedure is to pick SNPs by setting <italic>p</italic>
<sup>&#x22c6;</sup> &#x3d; <italic>P</italic>
<sub>(<italic>m</italic>)</sub>, the <italic>m</italic>th smallest SNP <italic>p</italic>-value.</p>
</list-item>
</list>
</p>
<p>Certainly, <italic>m</italic> is unknown in reality, so this procedure is a hypothetically optimal procedure serving as an indicative accuracy for comparison purposes. The validation process is the same for all procedures.</p>
<p>
<xref ref-type="fig" rid="F3">Figure 3</xref> compares the BA of the screened SNPs and validation of them. Overall, in both screening and validation stages, the oTFisher_r gave a higher BA than Bonferroni and the BH, sometimes even the Oracle. In the screening stage, the oTFisher using <italic>p</italic>&#x2a; &#x3d; <italic>&#x3c4;</italic>&#x2a; had higher sensitivity but lower specificity. Restricting the threshold to <italic>p</italic>
<sup>&#x22c6;</sup> &#x3d; min{<italic>&#x3c4;</italic>&#x2a;, 0.1} significantly increased the specificity. The validation stage helped further control the type I error. For comparisons of sensitivity and specificity and results under more settings, see <xref ref-type="sec" rid="s12">Supplementary Figures S14&#x2013;S21</xref> in Supplementary Material.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>The balanced accuracy at the SNP screening (row 1) and validation (row 2) stages under the continuous trait model. Code for methods: otfisher: the oTFisher procedure with threshold <italic>p</italic>
<sup>&#x22c6;</sup> &#x3d; <italic>&#x3c4;</italic>&#x2a;; otfisher_r, the oTFisher procedure with <italic>p</italic>
<sup>&#x22c6;</sup> &#x3d; min{<italic>&#x3c4;</italic>&#x2a;, 0.1}; ora, the oracle procedure; bon, Bonferroni procedure; BH, Benjamini&#x2013;Hochberg procedure; sig, screening stage; val, validation stage. Signal proportion is the proportion of causal SNPs in the two causal genes.</p>
</caption>
<graphic xlink:href="fgene-13-1009428-g003.tif"/>
</fig>
</sec>
</sec>
<sec id="s4">
<title>4 Real-data analysis</title>
<p>We conducted a comprehensive study of nine GWAS summary data sets from the GEFOS (<xref ref-type="bibr" rid="B11">Estrada et al., 2012</xref>; <xref ref-type="bibr" rid="B64">Zheng et al., 2015</xref>; <xref ref-type="bibr" rid="B22">Kemp et al., 2017</xref>; <xref ref-type="bibr" rid="B35">Medina-Gomez et al., 2017</xref>; <xref ref-type="bibr" rid="B36">Medina-Gomez et al., 2018</xref>; <xref ref-type="bibr" rid="B49">Trajanoska et al., 2018</xref>, <xref ref-type="bibr" rid="B50">2020</xref>; <xref ref-type="bibr" rid="B37">Morris et al., 2019</xref>). A description of these studies and data is given in Supplementary Material. Using the SNP <italic>p</italic>-values, we carried out gene and haplotype-block (haploblock) analyses for hunting putative genetic factors associated with bone mineral density (BMD) related traits and fall risk. The Supplementary Material gives details on our data pre-processing, including the pipeline to map SNPs to genes and haploblocks (<xref ref-type="bibr" rid="B14">Gabriel et al., 2002</xref>; <xref ref-type="bibr" rid="B6">Chang et al., 2015</xref>; <xref ref-type="bibr" rid="B7">Deng et al., 2016</xref>), correlation estimation by reference genome panel of the 1,000 Genome project (<xref ref-type="bibr" rid="B18">Higham, 2002</xref>; <xref ref-type="bibr" rid="B29">Lin and Zeng, 2010b</xref>), and SNP <italic>p</italic>-value adjustment based on the LD score regression (<xref ref-type="bibr" rid="B5">Bulik-Sullivan et al., 2015</xref>; <xref ref-type="bibr" rid="B23">Lee et al., 2018</xref>). For stable numerical computation without losing much associative information, SNPs with high LDs are pruned&#x2014;if a SNP pair has the LD <italic>r</italic>
<sup>2</sup> &#x3e; 0.9, the variants with a lower MAF would be removed (following the default setting of PLINK&#x2019;s SNP pruning function (<xref ref-type="bibr" rid="B6">Chang et al., 2015</xref>)). Furthermore, for GEFOS2017_TBBMD data, it contains a large number of genome-wide significant SNPs (<italic>p</italic>-values <inline-formula id="inf44">
<mml:math id="m58">
<mml:mo>&#x3c;</mml:mo>
</mml:math>
</inline-formula> 5E-8). These SNPs were removed from our SNP-set analyses for this data. The purpose is to reduce the false positive rate and study how many genes and haploblocks could still be detected by SNP-set analysis. The Q-Q plots for raw SNP <italic>p</italic>-values are given in <xref ref-type="sec" rid="s12">Supplementary Figure S22</xref>. The summary statistics on the features of SNP, genes, and haploblocks are given in <xref ref-type="sec" rid="s12">Supplementary Tables S5&#x2013;S8</xref>.</p>
<p>We extensively searched the literature and obtained comprehensive lists of 2,179 &#x201c;literature&#x201d; genes and 4,802 literature SNPs reported to be associated with osteoporosis, bone fracture, and various traits of bone mineral density (BMD). For the falling risk, we took the 16 genes reported by (<xref ref-type="bibr" rid="B50">Trajanoska et al., 2020</xref>) as literature genes since it is the only large-scale study we found regarding this trait. The searching strategies and resources are described in Supplementary Material (last update: 15 May 2022). The lists are in supplementary files literature_genes.xls and literature_snps.xlsx, including the gene and SNP information, associated phenotypes, resources, references, etc. These literature genes and SNPs are enriched with true disease genetic factors. Therefore, including literature genes and SNPs among the top hits can evidence the credible performance of a good data analysis method. At the same time, top hits that are not among these literature findings but are functionally relevant to the given trait can be reasonably considered as putative novel genetic discoveries.</p>
<sec id="s4-1">
<title>4.1 Gene-based analysis</title>
<p>We studied the genetic associations between genes and traits using the SNP sets grouped by genes. Four methods were applied. First, the Bonferroni procedure represents the single-SNP method applied in the original GEFOS studies. The top-hit genes contained significant SNPs with <italic>p</italic>-values less than Bonferroni adjusted threshold: 0.05 divided by the total number of SNPs in all genes. Second, we applied two broadly applied SNP-set methods using SNP <italic>p</italic>-values: the ACAT (<xref ref-type="bibr" rid="B32">Liu et al., 2019</xref>) and the GATES. Third, we applied our proposed oTFisher method with the adapting domain <inline-formula id="inf45">
<mml:math id="m59">
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:msup>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>5</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mn>0.001</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.005</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.01</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.05</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.5</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.7</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>The oTFisher_cct and the oTFisher_minp yielded similar results, so the former is reported below for simplicity. The Q-Q plots of the gene-based association <italic>p</italic>-values are given in Supplementary Figure S24. The top-hit genes are given in the supplementary file top_gene.xlsx. The overlaps of the top-hit genes and the top-hit literature genes among these four analysis methods are given in <xref ref-type="sec" rid="s12">Supplementary Figures S25&#x2013;S26</xref>.</p>
<p>
<xref ref-type="fig" rid="F4">Figure 4</xref> summarizes the numbers of top gene hits, from which we can make a few interesting observations. First, the Bonferroni procedure systematically led to fewer top hits and literature genes than the other methods. The result indicates that SNP-set tests could have higher statistical power than single-SNP analysis in detecting disease genes. Second, the oTFisher yielded similar or more gene hits, and most of the hits are literature genes indicating a reliable discovery and potentially higher statistical power. In particular, the oTFisher could have the advantage of detecting polygenic genes that contain relatively dense genetic signals. For examples, the oTFisher detected significantly more genes in the studies of GEFOS2017_TBBMD and GEFOS2020_FALLS, where top-hit genes often contain multiple SNPs with relatively small <italic>p</italic>-values (see <xref ref-type="sec" rid="s12">Supplementary Tables S9 and S10</xref> for the distribution of SNP <italic>p</italic>-values within these top-hit genes). The polygenic genetic architecture is possible for complex human diseases, including the BMD-related traits (<xref ref-type="bibr" rid="B22">Kemp et al., 2017</xref>; <xref ref-type="bibr" rid="B37">Morris et al., 2019</xref>).</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Bar chart for top-hit genes by various SNP-set analysis methods. Note: Bonferroni procedure corresponds to zero genes for GEFOS2017_TBBMD (because genome-wide significant SNPs were removed from the gene-based analysis of this data) and GEFOS2020_FALLS (because no genome-wide significant SNPs were mapped to genes).</p>
</caption>
<graphic xlink:href="fgene-13-1009428-g004.tif"/>
</fig>
<p>We carried out gene-set enrichment analyses (GSEA) for the top-hit genes identified by oTFisher. Based on gene ontology (GO) and KEGG pathways, the analysis was conducted by KEGG Orthology-Based Annotation System intelligent version (KOBAS-i) (<xref ref-type="bibr" rid="B4">Bu et al., 2021</xref>). The analysis identified significant GO terms and biological pathways enriched in the top-hit genes at the corrected significance level of 0.05. The GEFOS data-specific results are summarized in supplementary files top_GOs_pathways_study-specific.xlsx. These GO terms and biological pathways are often related to the bone and skeletal system and are consistent with the osteoporosis pathways reported in literature (<xref ref-type="bibr" rid="B17">Guo et al., 2019</xref>). Furthermore, we considered the BMD as a general trait and carried out the GSEA by pooling 173 top-hit genes from eight studies (except for falling risk). Twenty-one significantly enriched pathways were obtained and clustered into three networks based on their correlations by the cirFunMap plot (<xref ref-type="bibr" rid="B4">Bu et al., 2021</xref>). <xref ref-type="fig" rid="F5">Figure 5</xref> visualizes the clusters (ranked by the median <italic>p</italic>-value of the enriched pathways within each cluster). The first cluster gives a major network containing 12 pathways: Wnt signaling pathway (hsa04310), breast cancer (hsa05224), hepatocellular carcinoma (hsa05225), pathways in cancer (hsa05200), gastric cancer (hsa05226), basal cell carcinoma (hsa05217), signaling pathways regulating pluripotency of stem cells (hsa04550), proteoglycans in cancer (hsa05205), hippo signaling pathway (hsa04390), human papillomavirus infection (hsa05165), Cushing syndrome (hsa04934), and mTOR signaling pathway (hsa04150). The top two significant pathways, the Wnt signaling and breast cancer pathways, have been reported in literature (<xref ref-type="bibr" rid="B17">Guo et al., 2019</xref>). The large cluster here provides a networking context for them. The second cluster, containing pancreatic cancer (hsa05212) and colorectal cancer (hsa05210), is also connected with the first cluster. The third cluster, containing the prolactin signaling pathway (hsa04917) and rheumatoid arthritis (hsa05323), is independent of the rest. Details of the significant pathways and their clusters for the BMD traits are given in supplementary files top_pathways_BMDs.xlsx.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>The cirFunMap plot of the pathway network is based on 173 top-hit genes from BMD-related studies. Above: The barplot of the enrichment ratio is defined as the number of top-hit genes in the pathway over the number of total genes in the pathway. Below: The circular network plot. The node color represents different clusters. The node size represents the levels of <italic>p</italic>-value: from small to large: (0.01,0.05), [0.001,0.01), (0.0001,0.001), and (1e-10,0.0001). The edge represents correlations larger than a default threshold of 0.35.</p>
</caption>
<graphic xlink:href="fgene-13-1009428-g005.tif"/>
</fig>
<p>The top-hit novel genes included in the enriched GO terms and pathways are likely disease genes that influence the corresponding functionalities. In particular, we obtained three top-hit novel genes in the 21 significantly enriched pathways obtained by pooling 173 top-hit genes of the BMD traits. Gene <italic>HSPG2</italic> (chr1: 22148724&#x2013;22263790, oTFisher <italic>p</italic>-value 1.18E-07) is included in a significantly enriched pathway of proteoglycans in cancer (hsa05205, corrected enrichment <italic>p</italic>-value 0.0047). It was shown to be associated with segregating developmental dysplasia of the hip (<xref ref-type="bibr" rid="B2">Basit et al., 2017</xref>). Gene <italic>MAP3K12</italic> (chr12: 53874275&#x2013;53893444, oTFisher <italic>p</italic>-value 6.49E-09) is included in the significantly enriched MAPK signaling pathway (hsa04010, corrected enrichment <italic>p</italic>-value 0.0048). It is related to lissencephaly type 3 - metacarpal bone dysplasia and infantile osteopetrosis with neuroaxonal dysplasia in the Open Targets Genetics (<xref ref-type="bibr" rid="B15">Ghoussaini et al., 2021</xref>). Gene <italic>PRKAG1</italic> (chr12: 49396054&#x2013;49412629, oTFisher <italic>p</italic>-value 2.22E-06) is included in the significantly enriched Apelin signaling pathway (hsa04371, corrected enrichment <italic>p</italic>-value 0.0049). It is related to bone marrow failure syndrome in the Open Targets Genetics. Novel top-hit genes contained in the enriched GOs and pathways from GEFOS data-specific results are summarized in the supplementary file novel_genes_in_top_GOs_pathways_study-specific.xlsx. More discussion of them is given in Supplementary Material, including genes connected to relevant traits such as osteoarthritis, osteosarcoma, and bone metastasis.</p>
</sec>
<sec id="s4-2">
<title>4.2 Haplotype block-based analysis</title>
<p>Gene-based analysis has the limitation of a small coverage of the genome. For a whole-genome association study, we grouped and analyzed SNPs by haplotype blocks (haploblock estimation by PLINK (<xref ref-type="bibr" rid="B6">Chang et al., 2015</xref>) is detailed in Supplementary Material). The Q-Q plots and the Manhattan plots are given in <xref ref-type="sec" rid="s12">Supplementary Figures S27 and S28</xref> in Supplementary Material. Overall, genomic inflation is reasonably controlled. For various SNP-set methods, <xref ref-type="fig" rid="F6">Figure 6</xref> shows the number of top-hit blocks. Compared with other methods, the oTFisher generated more top-hit blocks and novel blocks (i.e., top-hit blocks that do not overlap literature genes or SNPs). More details on the top-hit blocks and their corresponding SNPs and genes are summarized in the supplementary file top_haploblocks.xlsx.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Bar chart for top-hit blocks by various tests. Literature blocks are those mapped to literature genes or containing literature SNPs. Genome-wide significant SNPs were removed from testing the data of GEFOS2017_TBBMD. There are no detections for ACAT and GATES for the 2017_TBBMD and 2020_FALLS data because they had no haploblock <italic>p</italic>-values surpassing the significance level defined by 0.05 over the number of blocks.</p>
</caption>
<graphic xlink:href="fgene-13-1009428-g006.tif"/>
</fig>
<p>The haploblock-based analysis provided complementary results to the single-SNP analysis and gene-based analysis. As a SNP-set analysis method, haploblock analysis could detect additional disease SNPs over single-SNP analysis. For example, in the study GEFOS2012_FN, the top-hit blocks by the oTFisher discovered 56 literature SNPs that the single-SNP analysis failed to detect (since their <italic>p</italic>-values do not pass the genome-wide significance level). The haploblock analysis could also map additional disease genes over the gene-based analysis. <xref ref-type="sec" rid="s12">Supplementary Table S11</xref> summarizes the numbers of top-hit blocks that can map to literature or novel genes. According to the results, the haploblock analysis found some literature genes that were not among the top hits of the gene-based analysis. For example, in the study GEFOS2012_FN, six literature genes were mapped by top-hit haploblocks but were not discovered by the gene-based analysis: <italic>ATXN7L3</italic>, <italic>AXIN1</italic>, <italic>CPED1</italic>, <italic>FUBP3</italic>, <italic>LOC100272217</italic>, and <italic>SOX6</italic>. For all GEFOS studies, we had 27 literature genes and 119 literature SNPs (63 of them are outside gene regions) detected by haploblock analysis but not by gene-based analysis. Supplementary Figure S29 lists the numbers of literature genes and SNPs found by haploblock analysis versus gene-based analysis. Furthermore, the top-hit blocks (including single-SNP blocks) contained all GEFOS-reported significant SNPs, indicating no information lost compared to the original GEFOS studies.</p>
<p>The top-hit blocks from eight BMD studies contain 286 novel blocks; 255 of them are outside of genes (detailed information is given in the supplementary file novel_haploblocks.xlsx). By epigenetic annotation (Haploreg v3 (<xref ref-type="bibr" rid="B51">Ward and Kellis, 2016</xref>)), 58 of the novel blocks (representing 43 non-overlapping loci) co-locate with strong enhancers of literature genes. Therefore, these novel blocks are of interest due to their functional connections. For example, a top-hit block chr2:54643778-54645650 (oTFisher <italic>p</italic>-value 1.67E-16 by GEFOS2012_LS data) contains three SNPs rs13393949 (<italic>p</italic>-value 2.12E-06), rs4671215 (<italic>p</italic>-value 9.36E-08), and rs7560205 (<italic>p</italic>-value 1.72E-05) that locate at a strong enhancers of gene <italic>SPTBN1</italic> in cells Huvec (umbilical vein endothelial cells) and NHEK (epidermal keratinocytes). Gene <italic>SPTBN1</italic> is shown associated with heel bone mineral density (<xref ref-type="bibr" rid="B22">Kemp et al., 2017</xref>). The enhancer-located novel blocks are given in the supplementary file novel_haploblocks_enhancers_literaturegene.xlsx, and an extensive discussion of the related SNPs is given in Supplementary Material.</p>
</sec>
<sec id="s4-3">
<title>4.3 Screening SNPs</title>
<p>Real-data analysis results show that the SNPs-screening procedure by the oTFisher_r could likely yield more disease SNPs than the Bonferroni procedure. Specifically, we used GEFOS2012 data sets to collect screened SNPs and validate them by a large data of osteoporosis from the UK Biobank (15,133 cases and 426,942 controls at ages of 38&#x2013;73&#xa0;years) (<xref ref-type="bibr" rid="B46">Sudlow et al., 2015</xref>). <xref ref-type="fig" rid="F7">Figure 7</xref> shows the Venn diagrams of the screened SNPs by the oTFisher_r and the Bonferroni procedures in the haploblock-based analyses, which are also compared with the SNPs reported in the original GEFOS studies and the literature SNPs. For consistent comparison, the validated SNPs were defined by the significance level of 0.05 over the total number of unique SNPs from the screening step and the literature. As expected, all GEFOS-reported SNPs were contained by the sets of literature SNPs as well as the screened SNPs. The oTFisher_r replicated more literature SNPs than the Bonferroni both before and after the validation stage. For example, with GEFOS2012_FN data, all screened SNPs by the Bonferroni were included in the set of SNPs by the oTFisher_r, while the oTFisher_r screened 30 additional literature SNPs (among which five were validated). Therefore, the oTFisher_r likely has a higher chance of finding disease SNPs. Furthermore, over 30% of the screened SNPs by oTFisher_r were verified. The high validation percentage (compared to the expected percentage of no more than 5% under the null) indicates that the set of screened SNPs by oTFisher_r likely contains enriched disease SNPs. Consistent results for gene-based analysis are given in <xref ref-type="sec" rid="s12">Supplementary Figures S30 and S31</xref> in Supplementary Material.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Overlap the screened SNPs in haploblock-based analysis (row 1) and validated SNPs (row 2) based on the oTFisher_r, Bonferroni procedure, original GEFOS study, and literature. SNP screening is done by GEFOS2012_FN (left column) and GEFOS2012_LS (right column); validation is done by the UK Biobank data.</p>
</caption>
<graphic xlink:href="fgene-13-1009428-g007.tif"/>
</fig>
</sec>
</sec>
<sec sec-type="discussion" id="s5">
<title>5 Discussion</title>
<p>GWAS summary data is a rich resource for hunting genetic factors associated with the susceptibility of human complex diseases. To facilitate analyzing such data, in this paper, we propose a SNP <italic>p</italic>-value combination test, the oTFisher, which has robustly high statistical power through adapting to unknown patterns of genetic effects. We develop computationally efficient algorithms to calculate the <italic>p</italic>-value of the oTFisher, which account for the LD of the SNPs. One advantage of such <italic>p</italic>-value combination test is that they do not assume a special type of SNP statistics. In principle, the same calculation can be carried out as long as the correlations among the SNP statistics can be estimated.</p>
<p>As given in <xref ref-type="disp-formula" rid="e2">Eq. 2</xref>, the oTFisher&#x2019;s truncating and weighting scheme for SNP <italic>p</italic>-values maximizes the significance of the potential underlying genetic effects (through minimizing the corresponding TFisher&#x2019;s <italic>p</italic>-value). With well-controlled type I error rate of the oTFisher, this automatic truncating scheme could serve as a vehicle for screening important SNPs that contribute to the overall association of the given SNP set. Results show that this screening procedure could better identify disease SNPs than the traditional Bonferroni and FDR procedures. Meanwhile, because the screening procedure is relatively liberal, validating these screened SNPs using an independent high-quality data set is critical for controlling false positives. Furthermore, one should always be cautious about interpreting the screened SNPs in the sense that statistical association does not necessarily mean causality. The associated SNPs could be due to LD with causal SNPs or even confounding effects. Nevertheless, the oTFisher provides a new way of exploring important SNPs not from their individual perspective but from the combined effects of the group as a whole.</p>
<p>We applied the oTFisher to a comprehensive study of osteoporosis-related traits using GEFOS data. Besides demonstrating the merit of the new method, we also generated novel genes and haploblocks that could benefit the downstream study of osteoporosis genetics. Further biological validations of these results are desired.</p>
<p>Our GEFOS data analysis focused on gene and haplotype block-based SNP-grouping strategies for simple biological interpretability. Based on the data-adaptive omnibus testing principle, the oTFisher can also be extended to other SNP-grouping as well as annotation-weighting strategies in whole genome sequencing studies, especially for studying the noncoding regions, following the ideas proposed in recent literature (<xref ref-type="bibr" rid="B38">Morrison et al., 2017</xref>; <xref ref-type="bibr" rid="B25">Li et al., 2020</xref>, <xref ref-type="bibr" rid="B26">2019</xref>, <xref ref-type="bibr" rid="B27">2022</xref>).</p>
<p>In general, the quality of GWAS summary data analysis highly depends on the quality of the input data. For example, if the SNP <italic>p</italic>-values were inflated, the subsequent SNP-set testing results will be inflated. Current inflation correction procedures could partially address the problem but are still limited. Further research in this direction is needed. Indeed, high-quality data is essential; we highly appreciate data-generating studies providing high-quality summary data for the sake of both primary and secondary data analyses.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s12">Supplementary Material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s7">
<title>Ethics statement</title>
<p>Ethical review and approval was not required for the study on human participants in accordance with the local legislation and institutional requirements. Written informed consent for participation was not required for this study in accordance with the national legislation and the institutional requirements. Written informed consent was not obtained from the individual(s) for the publication of any potentially identifiable images or data included in this article.</p>
</sec>
<sec id="s8">
<title>Author contributions</title>
<p>XC implemented the method and data analysis and drafted the manuscript. HZ conceived the idea and implemented the method. ML was involved in the data analysis. H-WD conceived the idea and was involved in the data analysis. ZW conceived the idea, designed the study, and drafted the manuscript. All the authors read and approved the final manuscript.</p>
</sec>
<sec id="s9">
<title>Funding</title>
<p>This work was supported in part by NSF grant DMS-2113570 (to ZW).</p>
</sec>
<sec sec-type="COI-statement" id="s10">
<title>Conflict of interest</title>
<p>HZ was employed by Pfizer Inc.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s12">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2022.1009428/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2022.1009428/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet2.PDF" id="SM1" mimetype="application/PDF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet1.ZIP" id="SM2" mimetype="application/ZIP" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barnett</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Mukherjee</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>The generalized higher criticism for testing snp-set effects in genetic association studies</article-title>. <source>J. Am. Stat. Assoc.</source> <volume>112</volume>, <fpage>64</fpage>&#x2013;<lpage>76</lpage>. <pub-id pub-id-type="doi">10.1080/01621459.2016.1192039</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Basit</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Albalawi</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Alharby</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Khoshhal</surname>
<given-names>K. I.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Exome sequencing identified rare variants in genes hspg2 and atp2b4 in a family segregating developmental dysplasia of the hip</article-title>. <source>BMC Med. Genet.</source> <volume>18</volume>, <fpage>34</fpage>&#x2013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1186/s12881-017-0393-8</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Brown</surname>
<given-names>M. B.</given-names>
</name>
</person-group> (<year>1975</year>). <article-title>400: A method for combining non-independent, one-sided tests of significance</article-title>. <source>Biometrics</source>. <volume>31</volume> (<issue>4</issue>), <fpage>987</fpage>&#x2013;<lpage>992</lpage>.</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Huo</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Kobas-i: Intelligent prioritization and exploratory visualization of biological functions for gene enrichment analysis</article-title>. <source>Nucleic Acids Res.</source> <volume>49</volume>, <fpage>W317</fpage>&#x2013;<lpage>W325</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkab447</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bulik-Sullivan</surname>
<given-names>B. K.</given-names>
</name>
<name>
<surname>Loh</surname>
<given-names>P.-R.</given-names>
</name>
<name>
<surname>Finucane</surname>
<given-names>H. K.</given-names>
</name>
<name>
<surname>Ripke</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Patterson</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Ld score regression distinguishes confounding from polygenicity in genome-wide association studies</article-title>. <source>Nat. Genet.</source> <volume>47</volume>, <fpage>291</fpage>&#x2013;<lpage>295</lpage>. <pub-id pub-id-type="doi">10.1038/ng.3211</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chang</surname>
<given-names>C. C.</given-names>
</name>
<name>
<surname>Chow</surname>
<given-names>C. C.</given-names>
</name>
<name>
<surname>Tellier</surname>
<given-names>L. C.</given-names>
</name>
<name>
<surname>Vattikuti</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Purcell</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>J. J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Second-generation plink: Rising to the challenge of larger and richer datasets</article-title>. <source>Gigascience</source> <volume>4</volume>, <fpage>7</fpage>&#x2013;<lpage>015</lpage>. <pub-id pub-id-type="doi">10.1186/s13742-015-0047-8</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Deng</surname>
<given-names>J.-E.</given-names>
</name>
<name>
<surname>Sham</surname>
<given-names>P. C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.-X.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Snptracker: A swift tool for comprehensive tracking and unifying dbsnp rs ids and genomic coordinates of massive sequence variants</article-title>. <source>G3</source> <volume>6</volume>, <fpage>205</fpage>&#x2013;<lpage>207</lpage>. <pub-id pub-id-type="doi">10.1534/g3.115.021832</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dey</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Schmidt</surname>
<given-names>E. M.</given-names>
</name>
<name>
<surname>Abecasis</surname>
<given-names>G. R.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>A fast and accurate algorithm to test for binary phenotypes and its application to phewas</article-title>. <source>Am. J. Hum. Genet.</source> <volume>101</volume>, <fpage>37</fpage>&#x2013;<lpage>49</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2017.05.014</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Donoho</surname>
<given-names>D. L.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Higher criticism for detecting sparse heterogeneous mixtures</article-title>. <source>Ann. Stat.</source> <volume>32</volume>, <fpage>962</fpage>&#x2013;<lpage>994</lpage>. <pub-id pub-id-type="doi">10.1214/009053604000000265</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dudbridge</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Koeleman</surname>
<given-names>B. P.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Rank truncated product of <italic>p</italic>-values, with application to genomewide association scans</article-title>. <source>Genet. Epidemiol.</source> <volume>25</volume>, <fpage>360</fpage>&#x2013;<lpage>366</lpage>. <pub-id pub-id-type="doi">10.1002/gepi.10264</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Estrada</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Styrkarsdottir</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Evangelou</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Hsu</surname>
<given-names>Y.-H.</given-names>
</name>
<name>
<surname>Duncan</surname>
<given-names>E. L.</given-names>
</name>
<name>
<surname>Ntzani</surname>
<given-names>E. E.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Genome-wide meta-analysis identifies 56 bone mineral density loci and reveals 14 loci associated with risk of fracture</article-title>. <source>Nat. Genet.</source> <volume>44</volume>, <fpage>491</fpage>&#x2013;<lpage>501</lpage>. <pub-id pub-id-type="doi">10.1038/ng.2249</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Evangelou</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Ioannidis</surname>
<given-names>J. P.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Meta-analysis methods for genome-wide association studies and beyond</article-title>. <source>Nat. Rev. Genet.</source> <volume>14</volume>, <fpage>379</fpage>&#x2013;<lpage>389</lpage>. <pub-id pub-id-type="doi">10.1038/nrg3472</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Fisher</surname>
<given-names>R. A.</given-names>
</name>
</person-group> (<year>1925</year>). <source>Statistical methods for research workers</source>. <edition>1st edition edn</edition>. <publisher-loc>Edinburgh</publisher-loc>: <publisher-name>Oliver &#x26; Boyd</publisher-name>.</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gabriel</surname>
<given-names>S. B.</given-names>
</name>
<name>
<surname>Schaffner</surname>
<given-names>S. F.</given-names>
</name>
<name>
<surname>Nguyen</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Moore</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Roy</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Blumenstiel</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2002</year>). <article-title>The structure of haplotype blocks in the human genome</article-title>. <source>Science</source> <volume>296</volume>, <fpage>2225</fpage>&#x2013;<lpage>2229</lpage>. <pub-id pub-id-type="doi">10.1126/science.1069424</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ghoussaini</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mountjoy</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Carmona</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Peat</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Schmidt</surname>
<given-names>E. M.</given-names>
</name>
<name>
<surname>Hercules</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Open targets genetics: Systematic identification of trait-associated genes using large-scale genetics and functional genomics</article-title>. <source>Nucleic Acids Res.</source> <volume>49</volume>, <fpage>D1311</fpage>&#x2013;<lpage>D1320</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkaa840</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Powerful and efficient snp-set association tests across multiple phenotypes using gwas summary data</article-title>. <source>Bioinformatics</source> <volume>35</volume>, <fpage>1366</fpage>&#x2013;<lpage>1372</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty811</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Lv</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Pathway and network analysis of genes related to osteoporosis</article-title>. <source>Mol. Med. Rep.</source> <volume>20</volume>, <fpage>985</fpage>&#x2013;<lpage>994</lpage>. <pub-id pub-id-type="doi">10.3892/mmr.2019.10353</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Higham</surname>
<given-names>N. J.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Computing the nearest correlation matrix&#x2014;A problem from finance</article-title>. <source>IMA J. Numer. Analysis</source> <volume>22</volume>, <fpage>329</fpage>&#x2013;<lpage>343</lpage>. <pub-id pub-id-type="doi">10.1093/imanum/22.3.329</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hoh</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wille</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ott</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Trimming, weighting, and grouping SNPs in human case-control association studies</article-title>. <source>Genome Res.</source> <volume>11</volume>, <fpage>2115</fpage>&#x2013;<lpage>2119</lpage>. <pub-id pub-id-type="doi">10.1101/gr.204001</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname>
<given-names>Y.-J.</given-names>
</name>
<name>
<surname>Berndt</surname>
<given-names>S. I.</given-names>
</name>
<name>
<surname>Gustafsson</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ganna</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>M&#xe4;gi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wheeler</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Meta-analysis of gene-level associations for rare variants based on single-variant statistics</article-title>. <source>Am. J. Hum. Genet.</source> <volume>93</volume>, <fpage>236</fpage>&#x2013;<lpage>248</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2013.06.011</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jin</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ke</surname>
<given-names>Z. T.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Evolution, regulation, and function of N-terminal variable region of troponin T: Modulation of muscle contractility and beyond</article-title>. <source>Int. Rev. Cell Mol. Biol.</source> <volume>26</volume>, <fpage>1</fpage>&#x2013;<lpage>28</lpage>. <pub-id pub-id-type="doi">10.1016/bs.ircmb.2015.09.002</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kemp</surname>
<given-names>J. P.</given-names>
</name>
<name>
<surname>Morris</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Medina-Gomez</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Forgetta</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Warrington</surname>
<given-names>N. M.</given-names>
</name>
<name>
<surname>Youlten</surname>
<given-names>S. E.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Identification of 153 new loci associated with heel bone mineral density and functional involvement of gpc6 in osteoporosis</article-title>. <source>Nat. Genet.</source> <volume>49</volume>, <fpage>1468</fpage>&#x2013;<lpage>1475</lpage>. <pub-id pub-id-type="doi">10.1038/ng.3949</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>J. J.</given-names>
</name>
<name>
<surname>McGue</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Iacono</surname>
<given-names>W. G.</given-names>
</name>
<name>
<surname>Chow</surname>
<given-names>C. C.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>The accuracy of ld score regression as an estimator of confounding and genetic correlations in genome-wide association studies</article-title>. <source>Genet. Epidemiol.</source> <volume>42</volume>, <fpage>783</fpage>&#x2013;<lpage>795</lpage>. <pub-id pub-id-type="doi">10.1002/gepi.22161</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>M.-X.</given-names>
</name>
<name>
<surname>Gui</surname>
<given-names>H.-S.</given-names>
</name>
<name>
<surname>Kwan</surname>
<given-names>J. S.</given-names>
</name>
<name>
<surname>Sham</surname>
<given-names>P. C.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Gates: A rapid and powerful gene-based association test using extended simes procedure</article-title>. <source>Am. J. Hum. Genet.</source> <volume>88</volume>, <fpage>283</fpage>&#x2013;<lpage>293</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2011.01.019</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Gaynor</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Dynamic incorporation of multiple <italic>in silico</italic> functional annotations empowers rare variant association analysis of large whole-genome sequencing studies at scale</article-title>. <source>Nat. Genet.</source> <volume>52</volume>, <fpage>969</fpage>&#x2013;<lpage>983</lpage>. <pub-id pub-id-type="doi">10.1038/s41588-020-0676-4</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Dynamic scan procedure for detecting rare-variant association regions in whole-genome sequencing studies</article-title>. <source>Am. J. Hum. Genet.</source> <volume>104</volume>, <fpage>802</fpage>&#x2013;<lpage>814</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2019.03.002</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Simultaneous detection of signal regions using quadratic scan statistics with applications to whole genome association studies</article-title>. <source>J. Am. Stat. Assoc.</source> <volume>117</volume>, <fpage>823</fpage>&#x2013;<lpage>834</lpage>. <pub-id pub-id-type="doi">10.1080/01621459.2020.1822849</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>D.-Y.</given-names>
</name>
<name>
<surname>Zeng</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2010a</year>). <article-title>Meta-analysis of genome-wide association studies: No efficiency gain in using individual participant data</article-title>. <source>Genet. Epidemiol.</source> <volume>34</volume>, <fpage>60</fpage>&#x2013;<lpage>66</lpage>. <pub-id pub-id-type="doi">10.1002/gepi.20435</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>D.-Y.</given-names>
</name>
<name>
<surname>Zeng</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2010b</year>). <article-title>On the relative efficiency of using summary statistics versus individual-level data in meta-analysis</article-title>. <source>Biometrika</source> <volume>97</volume>, <fpage>321</fpage>&#x2013;<lpage>332</lpage>. <pub-id pub-id-type="doi">10.1093/biomet/asq006</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Littell</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Folks</surname>
<given-names>J. L.</given-names>
</name>
</person-group> (<year>1971</year>). <article-title>Asymptotic optimality of Fisher&#x2019;s method of combining independent tests</article-title>. <source>J. Am. Stat. Assoc.</source> <volume>66</volume>, <fpage>802</fpage>&#x2013;<lpage>806</lpage>. <pub-id pub-id-type="doi">10.1080/01621459.1971.10482347</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Littell</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Folks</surname>
<given-names>J. L.</given-names>
</name>
</person-group> (<year>1973</year>). <article-title>Asymptotic optimality of Fisher&#x2019;s method of combining independent tests II</article-title>. <source>J. Am. Stat. Assoc.</source> <volume>68</volume>, <fpage>193</fpage>&#x2013;<lpage>194</lpage>. <pub-id pub-id-type="doi">10.1080/01621459.1973.10481362</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Morrison</surname>
<given-names>A. C.</given-names>
</name>
<name>
<surname>Boerwinkle</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Acat: A fast and powerful p value combination method for rare-variant analysis in sequencing studies</article-title>. <source>Am. J. Hum. Genet.</source> <volume>104</volume>, <fpage>410</fpage>&#x2013;<lpage>421</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2019.01.002</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Cauchy combination test: A powerful test with analytic p-value calculation under arbitrary dependency structures</article-title>. <source>J. Am. Stat. Assoc.</source> <volume>115</volume>, <fpage>393</fpage>&#x2013;<lpage>402</lpage>. <pub-id pub-id-type="doi">10.1080/01621459.2018.1554485</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>McCullagh</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Nelder</surname>
<given-names>J. A.</given-names>
</name>
</person-group> (<year>1989</year>). <source>Generalized linear models</source>. <edition>2nd edn</edition>. <publisher-loc>(Florida</publisher-loc>: <publisher-name>CRC Press LLC</publisher-name>.</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Medina-Gomez</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Kemp</surname>
<given-names>J. P.</given-names>
</name>
<name>
<surname>Dimou</surname>
<given-names>N. L.</given-names>
</name>
<name>
<surname>Kreiner</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Chesi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Zemel</surname>
<given-names>B. S.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Bivariate genome-wide association meta-analysis of pediatric musculoskeletal traits reveals pleiotropic effects at the srebf1/tom1l2 locus</article-title>. <source>Nat. Commun.</source> <volume>8</volume>, <fpage>121</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-017-00108-3</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Medina-Gomez</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Kemp</surname>
<given-names>J. P.</given-names>
</name>
<name>
<surname>Trajanoska</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Luan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chesi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ahluwalia</surname>
<given-names>T. S.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Life-course genome-wide association study meta-analysis of total body bmd and assessment of age-specific effects</article-title>. <source>Am. J. Hum. Genet.</source> <volume>102</volume>, <fpage>88</fpage>&#x2013;<lpage>102</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2017.12.005</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Morris</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Kemp</surname>
<given-names>J. P.</given-names>
</name>
<name>
<surname>Youlten</surname>
<given-names>S. E.</given-names>
</name>
<name>
<surname>Laurent</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Logan</surname>
<given-names>J. G.</given-names>
</name>
<name>
<surname>Chai</surname>
<given-names>R. C.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>An atlas of genetic influences on osteoporosis in humans and mice</article-title>. <source>Nat. Genet.</source> <volume>51</volume>, <fpage>258</fpage>&#x2013;<lpage>266</lpage>. <pub-id pub-id-type="doi">10.1038/s41588-018-0302-x</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Morrison</surname>
<given-names>A. C.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Metcalf</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Ballantyne</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Practical approaches for whole-genome sequence analysis of heart-and blood-related traits</article-title>. <source>Am. J. Hum. Genet.</source> <volume>100</volume>, <fpage>205</fpage>&#x2013;<lpage>215</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2016.12.009</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="book">
<collab>NIH</collab> (<year>2018</year>). <source>Update to nih management of genomic summary results access</source>. <comment>
<ext-link ext-link-type="uri" xlink:href="https://grants.nih.gov/grants/guide/notice-files/not-od-19-023.html">https://grants.nih.gov/grants/guide/notice-files/not-od-19-023.html</ext-link>
</comment>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pan</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Asymptotic tests of association with multiple snps in linkage disequilibrium</article-title>. <source>Genet. Epidemiol.</source> <volume>33</volume>, <fpage>497</fpage>&#x2013;<lpage>507</lpage>. <pub-id pub-id-type="doi">10.1002/gepi.20402</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pasaniuc</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Price</surname>
<given-names>A. L.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Dissecting the genetics of complex traits using summary association statistics</article-title>. <source>Nat. Rev. Genet.</source> <volume>18</volume>, <fpage>117</fpage>&#x2013;<lpage>127</lpage>. <pub-id pub-id-type="doi">10.1038/nrg.2016.142</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Patel</surname>
<given-names>J. K.</given-names>
</name>
<name>
<surname>Read</surname>
<given-names>C. B.</given-names>
</name>
</person-group> (<year>1996</year>). <source>Handbook of the normal distribution</source>. <edition>2nd edn</edition>. <publisher-loc>New York</publisher-loc>: <publisher-name>Marcel Dekker</publisher-name>.</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schaid</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Rowland</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>Tines</surname>
<given-names>D. E.</given-names>
</name>
<name>
<surname>Jacobson</surname>
<given-names>R. M.</given-names>
</name>
<name>
<surname>Poland</surname>
<given-names>G. A.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Score tests for association between traits and haplotypes when linkage phase is ambiguous</article-title>. <source>Am. J. Hum. Genet.</source> <volume>70</volume>, <fpage>425</fpage>&#x2013;<lpage>434</lpage>. <pub-id pub-id-type="doi">10.1086/338688</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Shao</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2010</year>). <source>Mathematical statistics</source>. <edition>2nd edn</edition>. <publisher-name>Springer-Verlag</publisher-name>.</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shlyakhter</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Sabeti</surname>
<given-names>P. C.</given-names>
</name>
<name>
<surname>Schaffner</surname>
<given-names>S. F.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Cosi2: An efficient simulator of exact and approximate coalescent with selection</article-title>. <source>Bioinformatics</source> <volume>30</volume>, <fpage>3427</fpage>&#x2013;<lpage>3429</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btu562</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sudlow</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Gallacher</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Allen</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Beral</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Burton</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Danesh</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>UK biobank: An open access resource for identifying the causes of a wide range of complex diseases of middle and old age</article-title>. <source>PLoS Med.</source> <volume>12</volume>, <fpage>e1001779</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pmed.1001779</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Hui</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bader</surname>
<given-names>G. D.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Kraft</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Powerful gene set analysis in gwas with the generalized berk-jones statistic</article-title>. <source>PLoS Genet.</source> <volume>15</volume>, <fpage>e1007530</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pgen.1007530</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Genetic variant set-based tests using the generalized berk&#x2013;jones statistic with application to a genome-wide association study of breast cancer</article-title>. <source>J. Am. Stat. Assoc.</source> <volume>115</volume>, <fpage>1079</fpage>&#x2013;<lpage>1091</lpage>. <pub-id pub-id-type="doi">10.1080/01621459.2019.1660170</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Trajanoska</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Morris</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Oei</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>H.-F.</given-names>
</name>
<name>
<surname>Evans</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>Kiel</surname>
<given-names>D. P.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Assessment of the genetic and clinical determinants of fracture risk: Genome wide association and mendelian randomisation study</article-title>. <source>bmj</source> <volume>362</volume>, <fpage>k3225</fpage>. <pub-id pub-id-type="doi">10.1136/bmj.k3225</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Trajanoska</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Seppala</surname>
<given-names>L. J.</given-names>
</name>
<name>
<surname>Medina-Gomez</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Hsu</surname>
<given-names>Y.-H.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>van Schoor</surname>
<given-names>N. M.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Genetic basis of falling risk susceptibility in the UK biobank study</article-title>. <source>Commun. Biol.</source> <volume>3</volume>, <fpage>543</fpage>&#x2013;<lpage>610</lpage>. <pub-id pub-id-type="doi">10.1038/s42003-020-01256-x</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ward</surname>
<given-names>L. D.</given-names>
</name>
<name>
<surname>Kellis</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Haploreg v4: Systematic mining of putative causal variants, cell types, regulators and target genes for human complex traits and disease</article-title>. <source>Nucleic Acids Res.</source> <volume>44</volume>, <fpage>D877</fpage>&#x2013;<lpage>D881</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkv1340</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>M. C.</given-names>
</name>
<name>
<surname>Kraft</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Epstein</surname>
<given-names>M. P.</given-names>
</name>
<name>
<surname>Taylor</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>Chanock</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>Hunter</surname>
<given-names>D. J.</given-names>
</name>
<etal/>
</person-group> (<year>2010</year>). <article-title>Powerful snp-set analysis for case-control genome-wide association studies</article-title>. <source>Am. J. Hum. Genet.</source> <volume>86</volume>, <fpage>929</fpage>&#x2013;<lpage>942</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2010.05.002</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>M. C.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Boehnke</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Rare-variant association testing for sequencing data with the sequence kernel association test</article-title>. <source>Am. J. Hum. Genet.</source> <volume>89</volume>, <fpage>82</fpage>&#x2013;<lpage>93</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2011.05.029</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cho</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Detection boundary and Higher Criticism approach for sparse and weak genetic effects</article-title>. <source>Ann. Appl. Statistics</source> <volume>8</volume>, <fpage>824</fpage>&#x2013;<lpage>851</lpage>.</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Boerwinkle</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Generalized <italic>T</italic>
<sup>2</sup> test for genome association studies</article-title>. <source>Am. J. Hum. Genet.</source> <volume>70</volume>, <fpage>1257</fpage>&#x2013;<lpage>1268</lpage>. <pub-id pub-id-type="doi">10.1086/340392</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Bergen</surname>
<given-names>A. W.</given-names>
</name>
<name>
<surname>Pfeiffer</surname>
<given-names>R. M.</given-names>
</name>
<name>
<surname>Rosenberg</surname>
<given-names>P. S.</given-names>
</name>
<name>
<surname>Caporaso</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>Pathway analysis by adaptive combination of P-values</article-title>. <source>Genet. Epidemiol.</source> <volume>33</volume>, <fpage>700</fpage>&#x2013;<lpage>709</lpage>. <pub-id pub-id-type="doi">10.1002/gepi.20422</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zaykin</surname>
<given-names>D. V.</given-names>
</name>
<name>
<surname>Zhivotovsky</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Czika</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Shao</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wolfinger</surname>
<given-names>R. D.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Combining <italic>p</italic>-values in large-scale genomics experiments</article-title>. <source>Pharm. Stat.</source> <volume>6</volume>, <fpage>217</fpage>&#x2013;<lpage>226</lpage>. <pub-id pub-id-type="doi">10.1002/pst.304</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zaykin</surname>
<given-names>D. V.</given-names>
</name>
<name>
<surname>Zhivotovsky</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Westfall</surname>
<given-names>P. H.</given-names>
</name>
<name>
<surname>Weir</surname>
<given-names>B. S.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Truncated product method for combining <italic>p</italic>-values</article-title>. <source>Genet. Epidemiol.</source> <volume>22</volume>, <fpage>170</fpage>&#x2013;<lpage>185</lpage>. <pub-id pub-id-type="doi">10.1002/gepi.0042</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2020a</year>). <article-title>Distributions and power of optimal signal-detection statistics in finite case</article-title>. <source>IEEE Trans. Signal Process.</source> <volume>68</volume>, <fpage>1021</fpage>&#x2013;<lpage>1033</lpage>. <pub-id pub-id-type="doi">10.1109/tsp.2020.2967179</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A fast and accurate approximation to the distributions of quadratic forms of Gaussian variables</article-title>. <source>J. Comput. Graph. Statistics</source> <volume>31</volume>, <fpage>304</fpage>&#x2013;<lpage>311</lpage>. <pub-id pub-id-type="doi">10.1080/10618600.2021.2000423</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Tong</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Landers</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2020b</year>). <article-title>Tfisher: A powerful truncation and weighting procedure for combining <italic>p</italic>-values</article-title>. <source>Ann. Appl. Statistics</source> <volume>14</volume>, <fpage>178</fpage>&#x2013;<lpage>201</lpage>.</citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2022a</year>). <article-title>The general goodness-of-fit tests for correlated data</article-title>. <source>Comput. Statistics Data Analysis</source> <volume>167</volume>, <fpage>107379</fpage>. <pub-id pub-id-type="doi">10.1016/j.csda.2021.107379</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2022b</year>). <article-title>The generalized Fisher&#x2019;s combination and accurate p-value calculation under dependence</article-title>. <source>Biometrics</source>. <pub-id pub-id-type="doi">10.1111/biom.13634</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>H.-F.</given-names>
</name>
<name>
<surname>Forgetta</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Hsu</surname>
<given-names>Y.-H.</given-names>
</name>
<name>
<surname>Estrada</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Rosello-Diez</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Leo</surname>
<given-names>P. J.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Whole-genome sequencing identifies en1 as a determinant of bone density and fracture</article-title>. <source>Nature</source> <volume>526</volume>, <fpage>112</fpage>&#x2013;<lpage>117</lpage>. <pub-id pub-id-type="doi">10.1038/nature14878</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>