<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fgene.2021.596794</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Identifying Breast Cancer-Related Genes Based on a Novel Computational Framework Involving KEGG Pathways and PPI Network Modularity</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Zhang</surname> <given-names>Yan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1153630/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Xiang</surname> <given-names>Ju</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/817896/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Tang</surname> <given-names>Liang</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Li</surname> <given-names>Jianming</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/634461/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Lu</surname> <given-names>Qingqing</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/913029/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Tian</surname> <given-names>Geng</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/875841/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>He</surname> <given-names>Bin-Sheng</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="corresp" rid="c003"><sup>&#x0002A;</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Yang</surname> <given-names>Jialiang</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/710934/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>School of Computer Science and Engineering, Central South University</institution>, <addr-line>Changsha</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>School of Information Science and Engineering, Changsha Medical University</institution>, <addr-line>Changsha</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>Academician Workstation, Changsha Medical University</institution>, <addr-line>Changsha</addr-line>, <country>China</country></aff>
<aff id="aff4"><sup>4</sup><institution>Neuroscience Research Center &#x00026; Department of Basic Medical Sciences, Changsha Medical University</institution>, <addr-line>Changsha</addr-line>, <country>China</country></aff>
<aff id="aff5"><sup>5</sup><institution>Qingdao Geneis Institute of Big Data Mining and Precision Medicine</institution>, <addr-line>Qingdao</addr-line>, <country>China</country></aff>
<aff id="aff6"><sup>6</sup><institution>Geneis Beijing Co., Ltd.</institution>, <addr-line>Beijing</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Shankar Subramaniam, University of California, San Diego, United States</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Fuhai Li, Washington University in St. Louis, United States; Andras Szilagyi, Hungarian Academy of Sciences (MTA), Hungary</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Jialiang Yang <email>yangjl&#x00040;geneis.cn</email></corresp>
<corresp id="c002">Jianming Li <email>ljmingcsu&#x00040;163.com</email></corresp>
<corresp id="c003">Bin-Sheng He <email>hbscsmu&#x00040;163.com</email></corresp>
<fn fn-type="other" id="fn001"><p>This article was submitted to Computational Genomics, a section of the journal Frontiers in Genetics</p></fn>
<fn fn-type="other" id="fn002"><p>&#x02020;These authors have contributed equally to this work</p></fn></author-notes>
<pub-date pub-type="epub">
<day>16</day>
<month>08</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>12</volume>
<elocation-id>596794</elocation-id>
<history>
<date date-type="received">
<day>24</day>
<month>08</month>
<year>2020</year>
</date>
<date date-type="accepted">
<day>05</day>
<month>05</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2021 Zhang, Xiang, Tang, Li, Lu, Tian, He and Yang.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Zhang, Xiang, Tang, Li, Lu, Tian, He and Yang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license> </permissions>
<abstract><p>Complex diseases, such as breast cancer, are often caused by mutations of multiple functional genes. Identifying disease-related genes is a critical and challenging task for unveiling the biological mechanisms behind these diseases. In this study, we develop a novel computational framework to analyze the network properties of the known breast cancer&#x02013;associated genes, based on which we develop a random-walk-with-restart (RCRWR) algorithm to predict novel disease genes. Specifically, we first curated a set of breast cancer&#x02013;associated genes from the Genome-Wide Association Studies catalog and Online Mendelian Inheritance in Man database and then studied the distribution of these genes on an integrated protein&#x02013;protein interaction (PPI) network. We found that the breast cancer&#x02013;associated genes are significantly closer to each other than random, which confirms the modularity property of disease genes in a PPI network as revealed by previous studies. We then retrieved PPI subnetworks spanning top breast cancer&#x02013;associated KEGG pathways and found that the distribution of these genes on the subnetworks are non-random, suggesting that these KEGG pathways are activated non-uniformly. Taking advantage of the non-random distribution of breast cancer&#x02013;associated genes, we developed an improved RCRWR algorithm to predict novel cancer genes, which integrates network reconstruction based on local random walk dynamics and subnetworks spanning KEGG pathways. Compared with the disease gene prediction without using the information from the KEGG pathways, this method has a better prediction performance on inferring breast cancer&#x02013;associated genes, and the top predicted genes are better enriched on known breast cancer&#x02013;associated gene ontologies. Finally, we performed a literature search on top predicted novel genes and found that most of them are supported by at least wet-lab experiments on cell lines. In summary, we propose a robust computational framework to prioritize novel breast cancer&#x02013;associated genes, which could be used for further <italic>in vitro</italic> and <italic>in vivo</italic> experimental validation.</p></abstract>
<kwd-group>
<kwd>disease-gene prediction</kwd>
<kwd>protein-protein interactions</kwd>
<kwd>KEGG pathway</kwd>
<kwd>breast cancer</kwd>
<kwd>network propagation</kwd>
</kwd-group>
<counts>
<fig-count count="9"/>
<table-count count="3"/>
<equation-count count="5"/>
<ref-count count="48"/>
<page-count count="14"/>
<word-count count="9011"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>Introduction</title>
<p>Complex diseases, such as cancers, are often caused by dysfunction of multiple genes. The pathogenic mechanism is often due to molecular abnormalities, which affect the biological function of the body through biomolecular networks, resulting in complex and diverse diseases (Taherian-Fard et al., <xref ref-type="bibr" rid="B35">2015</xref>). The gene families of RAS, MYC, ERBB, and FGFR are common proto-oncogenes (Bi et al., <xref ref-type="bibr" rid="B4">2018</xref>). Although chemoradiotherapy remains the standard treatment for some cancers, the majority of patients, who are sensitive initially, develop resistance after multiple relapses, for example, platinum resistance (Guan and Lu, <xref ref-type="bibr" rid="B19">2018</xref>). Besides this, molecular targeted therapy is expected to be more effective and less toxic compared with chemoradiotherapy. The Food and Drug Administration has approved several targeted medicines. The research and wide application of EGFR-TKI (Tyrosine kinase inhibitors) drugs, mainly including Gefitinib, Erlotinib, Icotinib, Afatinib, Dasatinib, and Osimertinib, have greatly improved the overall survival of patients with lung cancer with the <italic>EGFR</italic> gene mutation. In this case, molecular targeted therapy has brought us much closer to personalized therapy, which will improve the therapeutic effect and prognosis for patients (Colli et al., <xref ref-type="bibr" rid="B9">2017</xref>). Therefore, identifying disease-related genes is a critical and challenging task for the study of complex diseases, which can help us understand the mechanisms of diseases, identify treatment targets, and develop novel treatment strategies (Aitman, <xref ref-type="bibr" rid="B1">2002</xref>; Gill et al., <xref ref-type="bibr" rid="B17">2014</xref>).</p>
<p>Traditional approaches to identification of disease-related genes, such as linkage analysis, involves a candidate list consisting of hundreds of genes, requiring a lot of cost and time for in-depth validation (Gill et al., <xref ref-type="bibr" rid="B17">2014</xref>; Opap and Mulder, <xref ref-type="bibr" rid="B33">2017</xref>). As such, disease-gene prediction has attracted much attention in past decades, and many computational algorithms have been developed to predict disease-related genes to minimize the cost and time for the study of disease-related genes (Chen et al., <xref ref-type="bibr" rid="B6">2014</xref>; Gill et al., <xref ref-type="bibr" rid="B17">2014</xref>; Opap and Mulder, <xref ref-type="bibr" rid="B33">2017</xref>; Luo et al., <xref ref-type="bibr" rid="B30">2019a</xref>,<xref ref-type="bibr" rid="B31">b</xref>). Many studies show that genes associated with the same or similar diseases often are more similar in function than others (Goh et al., <xref ref-type="bibr" rid="B18">2007</xref>). Functional similar genes as well as their products often have physical interactions or functional associations. At present, with the rapid development of high-throughput technology, a large number of physical and functional relationships between biomolecules have been revealed, and these form complex biomolecular networks, e.g., protein&#x02013;protein interaction (PPI) networks (Keshava Prasad et al., <xref ref-type="bibr" rid="B26">2009</xref>), gene co-expression networks, and pathway networks (Kanehisa and Goto, <xref ref-type="bibr" rid="B25">2000</xref>). It is found that a gene is more likely to be related to a disease if there exists direct physical interactions or strong functional associations between it and known disease-related genes. Therefore, &#x0201C;guilt by association&#x0201D; becomes a popular strategy for disease-gene prediction (Oliver, <xref ref-type="bibr" rid="B32">2000</xref>; Wu et al., <xref ref-type="bibr" rid="B42">2008</xref>; Hu et al., <xref ref-type="bibr" rid="B22">2018</xref>), and network propagation, such as random walk, has become a widely used approach for disease-gene prediction (Cowen et al., <xref ref-type="bibr" rid="B10">2017</xref>). However, the existing PPI network is still incomplete, and there is a lot of data noise. How to improve the PPI network so as to enhance the ability to predict disease genes is still a problem that needs further study.</p>
<p>Breast cancer is one of the common malignant tumors among women all over the world. Surgery is still the preferred treatment for breast cancer. However, patients with poor systemic conditions, such as serious diseases in the main organs, are prohibited from using surgical treatment. Therefore, to expand the benefit population and improve the treatment effect of breast cancer patients, targeted therapy occupies the most important position in the treatment of breast cancer (Valencia et al., <xref ref-type="bibr" rid="B37">2017</xref>). To identify breast cancer&#x02013;related genes more effectively, we conduct analysis and prediction of breast cancer&#x02013;related genes based on the PPI network and KEGG pathway because PPIs are proven to be very useful in disease-gene prediction, and the physical and functional relationships between genes in the KEGG pathways are stronger and more reliable than others. After collecting disease-gene associations for breast cancer as well as many other diseases, PPIs and KEGG pathway data, we first analyze breast cancer&#x02013;related genes from two aspects: network and enrichment analysis. Then, to enhance the ability for disease-gene prediction, we propose an improved algorithm (RCRWR), which consists of network reconstruction based on local random walk dynamics and random walk with restart. Further, we also improve the prediction ability for disease-related genes by integrating KEGG pathway data. Finally, we conduct extensive analysis for candidate genes.</p>
<p>The rest of the paper is organized as follows. Section Materials and Methods describes the materials and methods used in the study, including the improved algorithm (RCRWR) for disease-gene prediction. Section Results conducts the analysis of disease-related genes by network and enrichment analysis and then evaluates the performance of RCRWR when predicting genes related to breast cancer and other diseases. The results confirm the effectiveness of RCRWR and the important roles of KEGG pathway data in enhancing the ability of disease-gene prediction. Finally, Section Conclusion draws conclusions.</p></sec>
<sec sec-type="materials and methods" id="s2">
<title>Materials and Methods</title>
<p>Here, we first prepare the following data sets: known disease-gene associations, PPIs, and KEGG pathway data. Then, we introduce the methods for statistics of breast cancer-related genes and the improved algorithm for predicting disease-related genes.</p>
<sec>
<title>Data SOURCES</title>
<sec>
<title>Disease-Gene Associations</title>
<p>The disease/trait associated genes were retrieved from the National Institutes of Health Genome-Wide Association Studies (GWAS) catalog (<ext-link ext-link-type="uri" xlink:href="https://www.ebi.ac.uk/gwas/">https://www.ebi.ac.uk/gwas/</ext-link>) (Danielle et al., <xref ref-type="bibr" rid="B11">2013</xref>) and Online Mendelian Inheritance in Man (OMIM) (<ext-link ext-link-type="uri" xlink:href="https://omim.org/">https://omim.org/</ext-link>) (Hamosh, <xref ref-type="bibr" rid="B20">2004</xref>). Some GWAS catalog disease categories are closely related but named differently by different investigators, some of which have many overlapping genes (e.g., see <xref ref-type="supplementary-material" rid="SM1">Supplementary Tables 1</xref>, <xref ref-type="supplementary-material" rid="SM1">2</xref>). It is helpful to merge the related groups of diseases. For that purpose, a hierarchical clustering of diseases is applied to cluster these diseases according to their common disease-related genes. Similar diseases in GWAS and OMIM are manually merged based on disease names. The data set was obtained from the previous study (Yang et al., <xref ref-type="bibr" rid="B45">2016</xref>).</p></sec>
<sec>
<title>PPIs</title>
<p>In the various types of data that have been used for the prediction of disease genes, PPIs are the most widely used data. The PPI network was obtained from the database of STRING (<ext-link ext-link-type="uri" xlink:href="https://string-db.org">https://string-db.org</ext-link>) (von Mering et al., <xref ref-type="bibr" rid="B38">2003</xref>), which quantitatively incorporates several studies and interaction types. In this study, we consider only the undirected and weighted network.</p></sec>
<sec>
<title>KEGG Pathways</title>
<p>We downloaded the KEGG pathway data set from KEGG (Kanehisa and Goto, <xref ref-type="bibr" rid="B25">2000</xref>) (<ext-link ext-link-type="uri" xlink:href="https://www.genome.jp/">https://www.genome.jp/</ext-link>) and MSigDB (<ext-link ext-link-type="uri" xlink:href="https://www.gsea-msigdb.org">https://www.gsea-msigdb.org</ext-link>) (Liberzon et al., <xref ref-type="bibr" rid="B29">2011</xref>). The KEGG pathway database is a collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction, and relation networks for metabolism, genetic information processing, environmental information processing, cellular processes, organismal systems, human diseases, and drug development. MSigDB provides gene sets of canonical KEGG pathways derived from the KEGG pathway database. This data set contains 5,267 unique genes.</p>
<p>Data preparation: We prepare the disease-gene associations, PPI network, and pathway data. Analysis of breast cancer-related genes: We conduct two types of analysis for disease-related genes (network and enrichment). Prediction of breast cancer-related genes: We evaluate the prediction performance based on the PPI network and PPI &#x00026; KEGG pathway, and then we prioritize the candidate genes related to breast cancer by using all known disease-related genes as a training set. Analysis of candidate genes for breast cancer: We conduct three types of analysis for the candidate genes related to breast cancer (enrichment analysis of GO and KEGG as well as literature validation).</p></sec></sec>
<sec>
<title>Statistics of Breast Cancer&#x02013;Related Genes</title>
<sec>
<title>Network Analysis</title>
<p>First, we extract the disease-gene subnetwork related to a specific disease by retaining genes related to this disease and removing all other genes from the PPI network. We calculate six statistical measures of the network to evaluate the disease-gene subnetwork: (a) the number of genes; (b) the number of edges; (c) the average degrees of nodes; (d) clustering coefficient in the subnetwork; (e) link density, which is defined as ratio of the number of existing interactions to its maximum of possible edges; and (f) a <italic>p</italic>-value is given to evaluate the significance of interaction enrichment in the subnetwork.</p>
<p>Then, we analyze the distribution of breast cancer&#x02013;related genes in KEGG pathways (e.g., gastric cancer, cellular senescence, human T cell leukemia virus 1 infection, breast cancer, melanoma) by calculating (a) the number of common genes between the pathway and the breast cancer&#x02013;related gene set; (b) the number of genes in KEGG pathway; (c) the number of edges in the subnetwork of the KEGG pathway; (d) the average degrees of nodes; (e) the clustering coefficient in the subnetwork; and (f) the link density, which is defined as ratio of the number of existing interactions to its maximum of possible edges as well as (g) a <italic>p</italic>-value indicating the significance of gene enrichment in the KEGG pathway.</p>
<p>To demonstrate the higher connectivity of the related subnetworks, we compare these statistical quantities to those of random subsets of genes mapped on the PPI network with the same number of genes and same degree distribution.</p></sec>
<sec>
<title>Enrichment Analysis</title>
<p>Enrichment analysis is a widely used approach to identify biological themes. We analyze the enrichment of the gene set in GO and the pathway. <italic>P</italic>-values using the hypergeometric distribution are defined as</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M1"><mml:mrow><mml:mi>p</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x02212;</mml:mo><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munderover><mml:mrow><mml:mfrac><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mi>M</mml:mi></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>i</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mi>M</mml:mi></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mrow><mml:mi>n</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mi>i</mml:mi></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mi>N</mml:mi></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>n</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>
<p>where <italic>N</italic> is the total number of genes in the background distribution, <italic>M</italic> is the number of genes with given annotations in that distribution, <italic>n</italic> is the size of the list of genes of interest, and <italic>k</italic> is the number of genes with the annotations in this list. <italic>P</italic>-values are adjusted for multiple comparisons, and <italic>q</italic>-values are also calculated for FDR control.</p>
<p>The clusterProfiler package was used to perform the enrichment analysis for GO terms and KEGG pathways (Yu et al., <xref ref-type="bibr" rid="B46">2012</xref>). As such, the background genes are dependent on the databases used by this package. This package depends on the bioconductor annotation data GO.db and KEGG.db to obtain the maps of the entire GO and KEGG corpus. It provides functions, enrichGO and enrichKEGG, to perform the enrichment test for GO terms and KEGG pathways based on hypergeometric distribution. According to the description of clusterProfiler, the background genes should be all genes within a given annotation file, e.g., the GO annotation file. However, the version of the specific annotation file is dependent on the clusterProfiler package.</p></sec></sec>
<sec>
<title>Improved Algorithm for Predicting Breast Cancer-Related Genes</title>
<p>As shown in <bold>Figure 2</bold>, breast cancer&#x02013;related genes tend to be connected with each other in the PPI network. As such, the network-based algorithms can often provide useful insight to infer breast cancer&#x02013;related (candidate) genes. In this case, the PPI network is critical. Despite the rapid development of biotechnologies, there is still a large amount of data noise in the existing PPI network. Therefore, we propose an improved algorithm (RCRWR), which consists of network reconstruction based on local random walk dynamics and random walk with restart (see Algorithm 1 for the workflow of RCRWR). We try to use local random walks to extract the feature vectors of nodes (i.e., genes or proteins) and then use the feature vectors to calculate the similarity between nodes and reconstruct the PPI network to reduce the impact of data noise so as to improve the ability of disease-gene prediction based on the PPI network. Furthermore, we use KEGG pathways to enhance the ability to predict disease-related genes because the connections in the KEGG pathways tend to be stronger and more reliable than others.</p>
<table-wrap position="float">
<caption><p>Algorithm 1 RCRWR Algorithm.</p></caption>
<table frame="hsides" rules="groups">
<tbody>
<tr>
<td valign="top" align="left"><bold>Input:</bold> PPIs, known disease genes, and number (<italic>k</italic>) of nearest neighbors.</td>
</tr>
<tr>					
<td valign="top" align="left"><bold>Output:</bold> Probability scores.</td></tr>
<tr>
<td valign="top" align="left">1: Calculate behavior vectors (i.e., feature vectors) of all nodes by local random walk dynamics in the PPI network.</td></tr>
<tr>
<td valign="top" align="left">2: Calculate similarity scores between all nodes by the behavior vectors.</td></tr>
<tr>
<td valign="top" align="left">3: Generate a reconstructed PPI network by only retaining similarity scores between each node <italic>i</italic> (&#x0003D; 1&#x0007E;<italic>n</italic>) and its <italic>k</italic>-nearest neighbors.</td></tr>
<tr>
<td valign="top" align="left">4: Calculate probability scores of all nodes by applying network propagation based on random walk with restart to the reconstructed network, where known disease genes are used as seed nodes.</td>
</tr>
</tbody>
</table>
</table-wrap><sec>
<title>Network Reconstruction Based on Local Random Walks</title>
<sec>
<title>Similarity Measure Based on Local Random Walk Dynamics</title>
<p>Generally, similar behavior patterns appear when the dynamic processes are triggered on similar nodes. Therefore, we applied the local random walk dynamics to infer the similarity measure between nodes (Lai et al., <xref ref-type="bibr" rid="B27">2010</xref>; Xiang et al., <xref ref-type="bibr" rid="B44">2016</xref>). The probability of a walker from one node to others in <italic>k</italic>-step random walk is determined by probability matrix <italic>P</italic><sup><italic>k</italic></sup> (<italic>k</italic> is random walk length, determining the range of the local structure that will be explored). Due to the small-world effect, good results can generally be generated by using a small <italic>k</italic>-value (<italic>k</italic> = 2, 3, &#x02026;). The element <italic>P</italic><sub><italic>ij</italic></sub> of the transition matrix <italic>P</italic> is the ratio between the weight of link (<italic>i, j</italic>) and the weighted degree of vertex <italic>i</italic>, <italic>P</italic><sub><italic>ij</italic></sub> &#x0003D; <italic>w</italic><sub><italic>ij</italic></sub>/&#x02211;<sub><italic>j</italic></sub><italic>w</italic><sub><italic>ij</italic></sub>, where <italic>w</italic><sub><italic>ij</italic></sub> is the weight of edge (<italic>i, j</italic>). The behaviors of the random walk dynamics from a node can be quantified by a <italic>n</italic>-dimensional vector <italic>v</italic><sub><italic>i</italic></sub> (<italic>i</italic> = 1&#x0007E;<italic>n</italic>; <italic>n</italic> is the number of nodes in a network), which is defined as the row of the matrix <inline-formula><mml:math id="M2"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>&#x003C4;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:munderover><mml:msup><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003C4;</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula>. Here, all random walks whose steps vary from 1 to <italic>k</italic> are taken into consideration to reinforce the contributions from the nodes near the target nodes. The similarity measure between nodes based on the local random walk dynamics can be calculated by,</p>
<disp-formula id="E2"><label>(2)</label><mml:math id="M3"><mml:mtable class="eqnarray" columnalign="right center left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:msqrt><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msqrt><mml:msqrt><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msqrt></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where, if the behavior vectors <italic>v</italic><sub><italic>x</italic></sub> and <italic>v</italic><sub><italic>y</italic></sub> are highly consistent, then <inline-formula><mml:math id="M4"><mml:msubsup><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x02192;</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>; otherwise, <inline-formula><mml:math id="M5"><mml:msubsup><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x02192;</mml:mo><mml:mn>0</mml:mn></mml:math></inline-formula>.</p></sec>
<sec>
<title>Network Reconstruction</title>
<p>We denote an undirected and weighted network by <italic>G</italic> = (<italic>V, E, W</italic>), where <italic>V</italic> is a set of proteins, <italic>E</italic> is a set of interactions, and <italic>W</italic> is a set of confidence scores of interactions in the original network. By using the above similarity measure based on local random walk dynamics (Equation 2), we calculate the similarity scores between all nodes in the original PPI network and obtain a similarity matrix <italic>S</italic>, where <italic>S</italic><sub><italic>ij</italic></sub> records the similarity score between nodes <italic>i</italic> and <italic>j</italic>. Then, we use the similarity scores to reconstruct the PPI network by retaining only the connections/similarity scores between each node <italic>i</italic> and its <italic>k</italic>-nearest neighbors (that is, its <italic>k</italic> neighbors with the highest similarity scores to the node <italic>i</italic>). The mathematical description of the reconstruction process is as follows.</p>
<p><bold>Definition 1</bold>. For each node <italic>i</italic>, according to the similarity scores between the node and other nodes, all nodes are sorted in a descending order. By the descending order of all nodes, we define a ranking index vector, <inline-formula><mml:math id="M6"><mml:msubsup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x025AA;</mml:mo><mml:mo>,</mml:mo><mml:mi>i</mml:mi></mml:mrow><mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>i</mml:mi></mml:mrow><mml:mrow></mml:mrow></mml:msubsup><mml:mo>|</mml:mo><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:mi>n</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula>, to record ranking indices of all nodes about the node <italic>i</italic> (note that node <italic>i</italic> itself is given a largest ranking index), where <italic>R</italic><sub><italic>j, i</italic></sub> records the ranking index of node <italic>j</italic> in this case, and <italic>n</italic> is the number of nodes in the network.</p>
<p><bold>Definition 2</bold>. By combining the ranking vectors about all nodes, we define a ranking matrix <inline-formula><mml:math id="M7"><mml:mi>R</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x025AA;</mml:mo><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x025AA;</mml:mo><mml:mo>,</mml:mo><mml:mn>2</mml:mn></mml:mrow><mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x025AA;</mml:mo><mml:mo>,</mml:mo><mml:mi>n</mml:mi></mml:mrow><mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>, where <italic>n</italic> is the number of nodes in the network.</p>
<p><bold>Definition 3</bold>. By using the ranking matrix and the similarity matrix <italic>S</italic>, we define a reconstructed and undirected network <inline-formula><mml:math id="M8"><mml:mi>&#x0011C;</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:mo>,</mml:mo><mml:mi>&#x000CA;</mml:mi><mml:mo>,</mml:mo><mml:mi>&#x00174;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>, where <inline-formula><mml:math id="M9"><mml:mover accent="true"><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:mo>=</mml:mo><mml:mi>V</mml:mi></mml:math></inline-formula>, &#x000CA; and &#x00174; denote the set of edges and the set of weights of edges in the reconstructed network, respectively:</p>
<disp-formula id="E3"><mml:math id="M10"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:mi>&#x000CA;</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x0007E;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>n</mml:mi><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x0007E;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>n</mml:mi><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msubsup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>i</mml:mi></mml:mrow><mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x02264;</mml:mo><mml:mi>k</mml:mi><mml:mtext>&#x000A0;</mml:mtext></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>&#x00174;</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x0007E;</mml:mo><mml:mi>n</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x0007E;</mml:mo><mml:mi>n</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02264;</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>S</italic><sub><italic>j, i</italic></sub> &#x0003D; <italic>S</italic><sub><italic>i, j</italic></sub>, and <italic>k</italic> denotes the number of the nearest neighbors (<italic>k</italic> = 50 for default).</p>
<p>In the reconstruction process for a given <italic>k</italic>-value, the newly added edges can be denoted by <inline-formula><mml:math id="M11"><mml:msubsup><mml:mrow><mml:mi>&#x000CA;</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mi>d</mml:mi></mml:mrow><mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x0007E;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>n</mml:mi><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x0007E;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>n</mml:mi><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msubsup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>i</mml:mi></mml:mrow><mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x02264;</mml:mo><mml:mi>k</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>d</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02209;</mml:mo><mml:mi>E</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula>; the removed edges can be denoted by <inline-formula><mml:math id="M12"><mml:msubsup><mml:mrow><mml:mi>&#x000CA;</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>m</mml:mi><mml:mi>o</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi></mml:mrow><mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mi>E</mml:mi><mml:mo>\</mml:mo><mml:mi>&#x000CA;</mml:mi></mml:math></inline-formula>; the retained edges can be denoted by <inline-formula><mml:math id="M13"><mml:msubsup><mml:mrow><mml:mi>&#x000CA;</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi></mml:mrow><mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mi>E</mml:mi><mml:mo>&#x022C2;</mml:mo><mml:mi>&#x000CA;</mml:mi></mml:math></inline-formula>; and the weights of the retained edges are substituted by the similarity scores obtained by the similarity measure based on local random walk dynamics.</p>
<p>By using the reconstruction process, we can generate a reconstructed and undirected network. The reconstructed network may enhance our ability for disease-gene prediction because it can improve the original PPI network. To show the effect of the reconstruction process on the PPI network, we have generated a set of reconstructed PPI networks by using a series of <italic>k</italic>-values, and then we calculate the mean score (in the String database) of retained edges <inline-formula><mml:math id="M14"><mml:msubsup><mml:mrow><mml:mi>&#x000CA;</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi></mml:mrow><mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> and removed edges <inline-formula><mml:math id="M15"><mml:msubsup><mml:mrow><mml:mi>&#x000CA;</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>m</mml:mi><mml:mi>o</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi></mml:mrow><mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> for each <italic>k</italic> value. The results show that the mean score (in the String database) of the retained edges tends to be larger than that of the removed edges (see <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 1</xref>). This is consistent with our expectation: By using the reconstruction process, PPIs with high reliability in the String database tend to be retained, and PPIs with low reliability in the String database tend to be removed, and the reconstruction process also supplements some edges with high similarity scores that do not exist in the original PPI network. Moreover, we have provided an example figure to compare the original network with the reconstructed one, which shows the effect of network reconstruction on the original network, so the reader can more clearly see what is being done (see <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 2</xref>).</p>
<p>As a whole, this reconstruction process may reduce data noise to a certain extent to optimize the PPI network so as to improve the network data environment for disease-gene prediction. In the following step, we apply network propagation to the reconstructed network to predict disease-related genes more effectively.</p></sec></sec>
<sec>
<title>Network Propagation Based on Random Walk With Restart</title>
<p>The random walk with restart can been seen as performing multiple random walks over the PPI network, each starting from a seed node associated to a known disease gene, iteratively moving from one node to a random neighbor, and the stationary distribution can be considered as a measure of the proximity between the seed(s) and all the other nodes in the network. More formally, the random walk with restart is defined as</p>
<disp-formula id="E4"><label>(3)</label><mml:math id="M16"><mml:mtable class="eqnarray" columnalign="right center left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:mi>r</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>M</mml:mi><mml:msubsup><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x0002B;</mml:mo><mml:mi>r</mml:mi><mml:msubsup><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msubsup></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Here, <italic>p</italic><sub>0</sub> is the initial probability distribution. <italic>M</italic> is the column-normalized adjacency matrix of the graph. <italic>r</italic>&#x02208;(0, 1) is the restart probability, and it is set to be 0.7 as suggested by previous studies (Zhao et al., <xref ref-type="bibr" rid="B48">2015</xref>). <italic>p</italic><sub><italic>t</italic></sub> is the probability vector of the random walker reaching all nodes at the end of the <italic>t</italic>th step. After several iterations, the difference between the vectors <italic>p</italic><sub><italic>t</italic>&#x0002B;1</sub> and <italic>p</italic><sub><italic>t</italic></sub> becomes negligible, the stationary probability distribution is reached, and the element in the vector represents a proximity measure between every graph node and the seed(s). In this work, iterations are repeated until the difference between <italic>p</italic><sub><italic>t</italic></sub> and <italic>p</italic><sub><italic>t</italic>&#x0002B;1</sub> falls below 10<sup>-6</sup> as used by previous studies (Zhao et al., <xref ref-type="bibr" rid="B48">2015</xref>).</p>
<p>Note that for cross-validation, the known disease-related genes in the training set are used as seed nodes to conduct the random walk with restart, and all known disease-related genes are used as seed nodes when predicting novel candidate genes.</p></sec>
<sec>
<title>Prediction Based on PPI Network</title>
<p>We first prepare the PPI network. The PPI network from the String database retains edges with confidence scores &#x0003E;400, and we normalize the confidence scores to be between zero and one by dividing a value of 1,000. The PPI network is used as the original PPI network. We use a weighted graph <italic>G</italic> = (<italic>V, E, W</italic>) to denote the PPI network comprising a set of proteins <italic>V</italic>, a set of interactions <italic>E</italic>, and a set of confidence scores <italic>W</italic>. Then, we map known breast cancer&#x02013;related genes into the PPI network and conduct the random walk with restart to predict disease-related genes. Finally, the probabilities of nodes are used to rank candidate genes.</p></sec>
<sec>
<title>Prediction Based on PPI Network and KEGG Pathway</title>
<p>Similarly, we prepare the related data sets, including the PPI network, breast cancer&#x02013;related genes, and KEGG pathway. The PPI network still retains edges with confidence &#x0003E;400. We map known breast cancer&#x02013;related genes to the PPI network. Then, KEGG pathways are mapped into the PPI network and intersect with the above network. Finally, we perform the random walk with restart to predict breast cancer&#x02013;related genes.</p></sec>
<sec>
<title>Performance Evaluation</title>
<p>To evaluate the prediction performance of the algorithm, we apply traditional 3-fold cross-validation in the benchmark. Each time, the known disease genes are randomly split into three parts. Each part is, in turn, used as test set and the rest as a training set. Then, we use the genes in the training set as seeds to perform the random walk with restart to predict disease-related genes. Note that, in the process of predicting disease genes, only genes in the training set are used as seed genes. For the cross-validation, the training set made up of two thirds of all disease genes randomly selected. For the prediction of novel genes, all known disease genes are used as the training set.</p>
<p>For a disease <italic>d</italic> in disease set <italic>D</italic>, <italic>T</italic><sub><italic>d</italic></sub> denotes the set of genes in test set. The disease-gene prediction algorithm provides a ranking list of candidate genes for disease <italic>d</italic>. We denote by <italic>R</italic><sub><italic>d</italic></sub>(<italic>k</italic>) the set of top <italic>k</italic> candidate genes in the ranking list. Then recall in the top <italic>k</italic> ranking list is defined as</p>
<disp-formula id="E5"><label>(4)</label><mml:math id="M17"><mml:mtable class="eqnarray" columnalign="right center left"><mml:mtr><mml:mtd><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>T</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02229;</mml:mo><mml:msub><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>T</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>This metric is used to evaluate the performance of prediction algorithms.</p></sec></sec></sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<p>Here, we first conduct two types of analysis for breast cancer&#x02013;related genes: (1) network analysis of the breast cancer&#x02013;related subnetwork and KEGG pathways and (2) enrichment analysis of GO and the pathway of breast cancer&#x02013;related genes. Then, we predict breast cancer&#x02013;related genes on the (reconstructed) PPI network with and without the KEGG pathways and analyze the prediction performance, including (1) quantitative evaluation on the known breast cancer&#x02013;related gene set, (2) enrichment analysis of GO and the pathway of candidate genes, and (3) a literature validation of candidate genes. <xref ref-type="fig" rid="F1">Figure 1</xref> shows the workflow.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Workflow of the work.</p></caption>
<graphic xlink:href="fgene-12-596794-g0001.tif"/>
</fig>
<sec>
<title>Analysis of Breast Cancer-Related Genes</title>
<sec>
<title>Network Analysis</title>
<sec>
<title>Subnetwork of Breast Cancer-Related Genes</title>
<p>Breast cancer&#x02013;related genes were obtained from Yang et al. (<xref ref-type="bibr" rid="B45">2016</xref>). After mapping breast cancer&#x02013;related genes into the PPI network, there are only 127 breast cancer&#x02013;related genes. We first analyze the distribution of breast cancer&#x02013;related genes in the PPI network as well as KEGG pathways (<xref ref-type="fig" rid="F2">Figure 2</xref>). <xref ref-type="supplementary-material" rid="SM1">Supplementary Figures 3</xref>&#x02013;<xref ref-type="supplementary-material" rid="SM1">7</xref> provide larger plots so that gene names can be identified more easily. <xref ref-type="fig" rid="F2">Figure 2A</xref> displays the subnetwork of breast cancer&#x02013;related genes. The subnetwork is extracted from the PPI network by only retaining breast cancer&#x02013;related genes. We quantitatively analyze the breast cancer&#x02013;related subnetwork by calculating six statistical measures of networks (see <xref ref-type="table" rid="T1">Table 1</xref>). We find that the breast cancer&#x02013;related subnetwork has a higher value of the clustering coefficient (CC) and higher link density compared with random sampling on the whole network, showing significantly more interactions than expected. These results quantitatively suggest that the breast cancer&#x02013;related genes/proteins tend to interact with each other, forming disease module with higher link density than expected.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p><bold>(A)</bold> Subnetwork of breast cancer&#x02013;related genes extracted from the PPI network. <bold>(B&#x02013;F)</bold> Subnetworks extracted from the PPI network by using the sets of genes of five KEGG pathways, respectively: gastric cancer, cellular senescence, human T cell leukemia virus 1 infection, breast cancer, and melanoma. Note that green nodes with larger size denote known breast cancer&#x02013;related genes.</p></caption>
<graphic xlink:href="fgene-12-596794-g0002.tif"/>
</fig>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Statistics of disease-gene subnetworks related to breast cancer as well as other diseases.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Disease</bold></th>
<th valign="top" align="center"><bold>&#x00023;Genes</bold></th>
<th valign="top" align="center"><bold>&#x00023;Interactions</bold></th>
<th valign="top" align="center"><bold>Degree</bold></th>
<th valign="top" align="center"><bold>CC</bold></th>
<th valign="top" align="center"><bold>Link density</bold></th>
<th valign="top" align="center"><bold><italic>p-</italic>value</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Breast cancer</td>
<td valign="top" align="center">130</td>
<td valign="top" align="center">477 (232 &#x000B1; 24)</td>
<td valign="top" align="center">7.3</td>
<td valign="top" align="center">0.55 (0.42 &#x000B1; 0.04)</td>
<td valign="top" align="center">5.7% (2.8% &#x000B1; 0.3%)</td>
<td valign="top" align="center">&#x0003C;1.0e-16</td>
</tr>
<tr>
<td valign="top" align="left">Rheumatoid arthritis</td>
<td valign="top" align="center">115</td>
<td valign="top" align="center">607 (87 &#x000B1; 15)</td>
<td valign="top" align="center">10.6</td>
<td valign="top" align="center">0.45 (0.36 &#x000B1; 0.04)</td>
<td valign="top" align="center">9.3% (1.3% &#x000B1; 0.2%)</td>
<td valign="top" align="center">&#x0003C;1.0e-16</td>
</tr>
<tr>
<td valign="top" align="left">Cholesterol</td>
<td valign="top" align="center">221</td>
<td valign="top" align="center">1,152 (245 &#x000B1; 27)</td>
<td valign="top" align="center">10.4</td>
<td valign="top" align="center">0.47 (0.37 &#x000B1; 0.03)</td>
<td valign="top" align="center">4.7% (1.0% &#x000B1; 0.1%)</td>
<td valign="top" align="center">&#x0003C;1.0e-16</td>
</tr>
<tr>
<td valign="top" align="left">Obesity</td>
<td valign="top" align="center">102</td>
<td valign="top" align="center">764 (65 &#x000B1; 14)</td>
<td valign="top" align="center">15.0</td>
<td valign="top" align="center">0.62 (0.35 &#x000B1; 0.05)</td>
<td valign="top" align="center">14.8% (1.3% &#x000B1; 0.3%)</td>
<td valign="top" align="center">&#x0003C;1.0e-16</td>
</tr>
<tr>
<td valign="top" align="left">Hypertension</td>
<td valign="top" align="center">104</td>
<td valign="top" align="center">234 (64 &#x000B1; 9)</td>
<td valign="top" align="center">4.5</td>
<td valign="top" align="center">0.44 (0.35 &#x000B1; 0.05)</td>
<td valign="top" align="center">4.4% (1.2% &#x000B1; 0.2%)</td>
<td valign="top" align="center">&#x0003C;1.0e-16</td>
</tr>
<tr>
<td valign="top" align="left">Metabolic traits</td>
<td valign="top" align="center">135</td>
<td valign="top" align="center">439 (70 &#x000B1; 10)</td>
<td valign="top" align="center">6.5</td>
<td valign="top" align="center">0.38 (0.34 &#x000B1; 0.04)</td>
<td valign="top" align="center">4.9% (0.8% &#x000B1; 0.1%)</td>
<td valign="top" align="center">&#x0003C;1.0e-16</td>
</tr>
<tr>
<td valign="top" align="left">Crohn&#x00027;s disease</td>
<td valign="top" align="center">194</td>
<td valign="top" align="center">847 (198 &#x000B1; 27)</td>
<td valign="top" align="center">8.7</td>
<td valign="top" align="center">0.50 (0.38 &#x000B1; 0.04)</td>
<td valign="top" align="center">4.5% (1.1% &#x000B1; 0.1%)</td>
<td valign="top" align="center">&#x0003C;1.0e-16</td>
</tr>
<tr>
<td valign="top" align="left">Inflammatory bowel disease</td>
<td valign="top" align="center">220</td>
<td valign="top" align="center">1,653 (251 &#x000B1; 32)</td>
<td valign="top" align="center">15.0</td>
<td valign="top" align="center">0.52 (0.38 &#x000B1; 0.03)</td>
<td valign="top" align="center">6.9% (1.1% &#x000B1; 0.1%)</td>
<td valign="top" align="center">&#x0003C;1.0e-16</td>
</tr>
<tr>
<td valign="top" align="left">Metabolite levels</td>
<td valign="top" align="center">95</td>
<td valign="top" align="center">366 (44 &#x000B1; 10)</td>
<td valign="top" align="center">7.7</td>
<td valign="top" align="center">0.50 (0.34 &#x000B1; 0.05)</td>
<td valign="top" align="center">8.2% (1.0% &#x000B1; 0.2%)</td>
<td valign="top" align="center">&#x0003C;1.0e-16</td>
</tr>
<tr>
<td valign="top" align="left">Prostate cancer</td>
<td valign="top" align="center">238</td>
<td valign="top" align="center">589 (300 &#x000B1; 24)</td>
<td valign="top" align="center">5.0</td>
<td valign="top" align="center">0.44 (0.39 &#x000B1; 0.03)</td>
<td valign="top" align="center">2.1% (1.1% &#x000B1; 0.1%)</td>
<td valign="top" align="center">&#x0003C;1.0e-16</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><italic>Disease-gene subnetworks are extracted from the PPI network by retaining genes related to specific disease, e.g., breast cancer. &#x00023;Genes and &#x00023;Interactions denote the number of genes and edges in the subnetworks, respectively. Degree and CC denote the average degrees of all nodes and CCs in the subnetwork, respectively. Link density is defined as ratio of the number of existing interactions to its maximum of possible edges in the subnetwork. p-value evaluates the significance of interaction enrichment in the subnetwork. &#x0201C;(x &#x000B1; y)&#x0201D; denotes the mean and standard deviation of statistics in random sampling</italic>.</p>
</table-wrap-foot>
</table-wrap>
<p>As we know, in PPI networks, proteins with similar functions tend to connect or interact with each other. The occurrence and development of disease is usually due to the abnormal function of related genes or proteins, which leads to the change of related signal pathways. These proteins usually have functional similarity or correlation. Therefore, genes of the same disease or similar diseases tend to connect with each other in the PPI network to form disease modules.</p>
<p>We calculate the six statistical measures for subnetworks of other diseases, such as rheumatoid arthritis, cholesterol, and obesity (see <xref ref-type="table" rid="T1">Table 1</xref>). Similar conclusions can be obtained for other diseases. Clearly, these diseases also have similar modular property. This again confirms the modular property of disease-related genes (Ghiassian et al., <xref ref-type="bibr" rid="B16">2015</xref>; Xiang et al., <xref ref-type="bibr" rid="B44">2016</xref>; Chen et al., <xref ref-type="bibr" rid="B7">2018</xref>; Hu et al., <xref ref-type="bibr" rid="B22">2018</xref>, <xref ref-type="bibr" rid="B23">2020</xref>; Choobdar et al., <xref ref-type="bibr" rid="B8">2019</xref>; Dwivedi et al., <xref ref-type="bibr" rid="B14">2020</xref>). This is why guilt by association can become a useful strategy in disease-gene prediction based on PPI networks.</p></sec>
<sec>
<title>Subnetworks of KEGG Pathways Related to Breast Cancer</title>
<p>Moreover, we study subnetworks of KEGG pathways related to breast cancer. We analyze the distribution of breast cancer&#x02013;related genes in KEGG pathways (also, see <xref ref-type="supplementary-material" rid="SM1">Supplementary Figures 1</xref>&#x02013;<xref ref-type="supplementary-material" rid="SM1">5</xref>).</p>
<p>We extract the subnetworks of the KEGG pathways from the PPI network by using the sets of genes of the KEGG pathways and calculate the statistical measures of networks for these subnetworks. <xref ref-type="table" rid="T2">Table 2</xref> lists five KEGG pathways significantly related to breast cancer along with the statistical measures of the subnetworks. The results show that these subnetworks have similarly higher values of CC and higher link density than the whole network, and it has significantly more interactions than expected (<italic>p</italic> &#x0003C;1.0e-16). This means the genes/proteins in these KEGG pathways also tend to interact with each other, forming modules with higher link density than expected.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Statistics of KEGG pathways related to breast cancer.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Pathway ID</bold></th>
<th valign="top" align="left"><bold>Pathway Name</bold></th>
<th valign="top" align="center"><bold>&#x00023;Matched Genes</bold></th>
<th valign="top" align="center"><bold>&#x00023;Genes</bold></th>
<th valign="top" align="center"><bold>&#x00023;Interactions</bold></th>
<th valign="top" align="center"><bold>Degree</bold></th>
<th valign="top" align="center"><bold>CC</bold></th>
<th valign="top" align="center"><bold>Link density</bold></th>
<th valign="top" align="center"><bold><italic>p</italic>-value</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">hsa04218</td>
<td valign="top" align="left">Cellular senescence</td>
<td valign="top" align="center">13</td>
<td valign="top" align="center">156</td>
<td valign="top" align="center">2,377 (1,136 &#x000B1; 69)</td>
<td valign="top" align="center">30.5</td>
<td valign="top" align="center">0.65(0.52 &#x000B1; 0.02)</td>
<td valign="top" align="center">19.7% (9.6% &#x000B1; 0.6%)</td>
<td valign="top" align="center">&#x0003C;1.0e-16</td>
</tr>
<tr>
<td valign="top" align="left">hsa05224</td>
<td valign="top" align="left">Breast cancer</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">147</td>
<td valign="top" align="center">3,169 (1,059 &#x000B1; 79)</td>
<td valign="top" align="center">43.1</td>
<td valign="top" align="center">0.72(0.57 &#x000B1; 0.03)</td>
<td valign="top" align="center">29.5% (9.9% &#x000B1; 0.7%)</td>
<td valign="top" align="center">&#x0003C;1.0e-16</td>
</tr>
<tr>
<td valign="top" align="left">hsa05226</td>
<td valign="top" align="left">Gastric cancer</td>
<td valign="top" align="center">15</td>
<td valign="top" align="center">149</td>
<td valign="top" align="center">3,042 (953 &#x000B1; 74)</td>
<td valign="top" align="center">40.8</td>
<td valign="top" align="center">0.71(0.55 &#x000B1; 0.03)</td>
<td valign="top" align="center">27.6% (9.0% &#x000B1; 0.7%)</td>
<td valign="top" align="center">&#x0003C;1.0e-18</td>
</tr>
<tr>
<td valign="top" align="left">hsa05166</td>
<td valign="top" align="left">Human T-cell leukemia virus 1 infection</td>
<td valign="top" align="center">13</td>
<td valign="top" align="center">217</td>
<td valign="top" align="center">3,872 (1,516 &#x000B1; 96)</td>
<td valign="top" align="center">35.7</td>
<td valign="top" align="center">0.63(0.49 &#x000B1; 0.02)</td>
<td valign="top" align="center">16.5% (6.6% &#x000B1; 0.4%)</td>
<td valign="top" align="center">&#x0003C;1.0e-17</td>
</tr>
<tr>
<td valign="top" align="left">hsa05218</td>
<td valign="top" align="left">Melanoma</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">72</td>
<td valign="top" align="center">1,112 (385 &#x000B1; 39)</td>
<td valign="top" align="center">30.9</td>
<td valign="top" align="center">0.77(0.61 &#x000B1; 0.04)</td>
<td valign="top" align="center">43.5% (15.1% &#x000B1; 1.5%)</td>
<td valign="top" align="center">&#x0003C;1.0e-16</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><italic>The KEGG pathways used in analysis are selected based on the number of matched genes between the pathways and known disease gene set.</italic></p>
<p><italic>&#x00023;Matched Genes denotes the number of common genes between gene set of pathway and breast cancer&#x02013;related gene set; &#x00023;Genes in Pathway denotes the number of genes in pathway; &#x00023;Edges denotes the number of interaction in the PPI subnetwork consisting of genes in pathway; Degree and CC denote the average degrees of all nodes and CCs in the subnetwork, respectively. Link density is defined as ratio of the number of existing interactions to its maximum of possible edges in the subnetwork. p-value evaluates the significance of interaction enrichment in the subnetwork. &#x0201C;(x &#x000B1; y)&#x0201D; denotes the mean and standard deviation of statistics in random sampling</italic>.</p>
</table-wrap-foot>
</table-wrap>
<p>The values of CC and link density for most KEGG pathways are higher than those of the above subnetwork of breast cancer&#x02013;related genes (see <xref ref-type="table" rid="T1">Tables 1</xref>, <xref ref-type="table" rid="T2">2</xref>). This means the genes in the KEGG pathways are more modular than breast cancer&#x02013;related genes. The reason may be that genes in these KEGG pathways are more closely related than other genes in functions. Moreover, we can find that there exist submodule structures in the subnetworks of the KEGG pathways (see <xref ref-type="fig" rid="F2">Figures 2B&#x02013;F</xref>). This means that there exist functional subunits in the KEGG pathways.</p>
<p>We label known breast cancer&#x02013;related genes in the subnetworks of the KEGG pathways. Other unlabeled genes in the KEGG pathways are also likely to be related to breast cancer because they are likely to jointly affect breast cancer&#x02013;related functions. One can see that some subunits have more breast cancer&#x02013;related genes. This means that the known breast cancer&#x02013;related genes may be non-randomly distributed in the subnetworks of KEGG pathways, and some subunits in the KEGG pathways may be more related to breast cancer.</p>
<p>Overall, the physical and functional connections between genes in the KEGG pathways are stronger and more reliable than others. Therefore, we make use of information of KEGG pathways in disease-gene prediction.</p></sec></sec>
<sec>
<title>Enrichment Analysis</title>
<p>To analyze the relatedness of disease-gene sets to functional units, we perform GO enrichment analysis and KEGG pathway enrichment analysis. <xref ref-type="fig" rid="F3">Figure 3</xref> shows the results of GO enrichment analysis and KEGG pathway enrichment analysis (obtained by clusterProfiler; Yu et al., <xref ref-type="bibr" rid="B46">2012</xref>).</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Enrichment analysis of known breast cancer&#x02013;related genes: <bold>(A)</bold> GO enrichment analysis and <bold>(B)</bold> pathway enrichment analysis.</p></caption>
<graphic xlink:href="fgene-12-596794-g0003.tif"/>
</fig>
<p>According to the GO terms in <xref ref-type="fig" rid="F3">Figure 3</xref>, breast cancer&#x02013;related genes are enriched in the following GO terms, e.g., &#x0201C;double-strand break repair,&#x0201D; &#x0201C;replicative senescence,&#x0201D; &#x0201C;cell aging,&#x0201D; &#x0201C;aging,&#x0201D; &#x0201C;cell cycle checkpoint,&#x0201D; &#x0201C;cell cycle arrest,&#x0201D; &#x0201C;gland development,&#x0201D; &#x0201C;signal transduction by p53 class mediator,&#x0201D; &#x0201C;mitotic cell cycle checkpoint,&#x0201D; and &#x0201C;protein kinase B signaling.&#x0201D;</p>
<p>According to the KEGG pathways in <xref ref-type="fig" rid="F3">Figure 3</xref>, breast cancer&#x02013;related genes are enriched in cancer-related KEGG pathways, e.g., gastric cancer, endometrial cancer, colorectal cancer, thyroid cancer, pancreatic cancer, prostate cancer, central carbon metabolism in cancer, proteoglycans in cancer, bladder cancer.</p>
<p>BRCA gene mutations, which are commonly present in breast cancer, are associated with significantly increased susceptibility to tumors, including prostate, pancreatic, gallbladder/cholangioma, and stomach cancer as well as malignant melanoma. These tumors share a common pathogenic gene network in which the BRCA gene plays an important role as it is a member of the mismatch repair gene family. The prediction of breast cancer&#x02013;related genes can discover the interaction between tumors and enrich the relationship network, which is of great significance for finding therapeutic targets for tumors.</p></sec></sec>
<sec>
<title>Prediction of Breast Cancer Genes Based on PPI Network</title>
<p>To evaluate the prediction performance of our algorithm, we first apply RCRWR to the PPI network. The results show that RCRWR significantly outperforms the original RWR algorithm (Wu et al., <xref ref-type="bibr" rid="B42">2008</xref>) on the PPI network for the top 1, 5, and 10% lists of candidate genes (see <xref ref-type="fig" rid="F4">Figure 4</xref>). This means that the network reconstruction indeed can improve the PPI network so as to enhance the ability to predict breast cancer&#x02013;related genes. Moreover, it is clear that RCRWR and RWR are significantly better than that in the random case.</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>Top-<italic>k</italic> Recall (<italic>k</italic> = 1, 5, 10, 20%) of the original and improved algorithms in the PPI network.</p></caption>
<graphic xlink:href="fgene-12-596794-g0004.tif"/>
</fig></sec>
<sec>
<title>Prediction of Breast Cancer Genes Based on PPI Network and KEGG Pathway</title>
<p>Further, we intersect genes in the KEGG pathways with genes in the PPI to obtain a more reliable PPI network and then apply RCRWR to the PPI network. The results show that RCRWR is significantly better than the RWR algorithm on the PPI network for top 1, 5, 10, and 20% lists of candidate genes (see <xref ref-type="fig" rid="F5">Figure 5</xref>). This again proves that the network reconstruction can indeed enhance the ability to infer breast cancer&#x02013;related genes on the PPI network. Moreover, it is clear that the results of RCRWR and RWR are also significantly better than in the random case.</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>Top-<italic>k</italic> Recall (<italic>k</italic> = 1, 5, 10, 20%) of the original and improved algorithms in the PPI network with KEGG pathway (PPI_ KEGG).</p></caption>
<graphic xlink:href="fgene-12-596794-g0005.tif"/>
</fig>
<p>Compared with the results on the PPI network with and without KEGG pathway data (see <xref ref-type="fig" rid="F6">Figure 6</xref>), it is very clear that the prediction performance of both RWR and RCRWR can be enhanced due to the addition of information of the KEGG pathway. The information of the KEGG pathway is very helpful for the prediction of disease-related genes.</p>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p>Comparison of top-<italic>k</italic> Recall (<italic>k</italic> = 1, 5, 10, 20%) in the PPI network with and without KEGG pathway by the <bold>(A)</bold> original algorithm and <bold>(B)</bold> improved algorithm.</p></caption>
<graphic xlink:href="fgene-12-596794-g0006.tif"/>
</fig></sec>
<sec>
<title>Analysis of Candidate Genes of Breast Cancer</title>
<p>Here, we use all known breast cancer&#x02013;related genes as training set to predict candidate genes. We map breast cancer&#x02013;related genes into the PPI network and map the KEGG pathway onto the PPI network because the KEGG pathway is helpful for disease-gene prediction. We perform our improved algorithm RCRWR in the network to score all candidate genes. Then, we generate a ranking list of candidate genes for breast cancer. The higher the ranking of genes, the more likely they are to be associated with breast cancer.</p>
<p>We list the top 10 predicted genes in <xref ref-type="table" rid="T3">Table 3</xref>, which are considered to be most closely associated with breast cancer according to the scores from prediction algorithm. To check the effectiveness of prediction for the candidate genes, we search the literature and try to find the connections between these genes and breast cancer.</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Predicted top 10 candidate genes for breast cancer using PPI and KEGG pathway.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Gene</bold></th>
<th valign="top" align="left"><bold>References</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><italic>CDK4</italic></td>
<td valign="top" align="left">Ullah Shah et al., <xref ref-type="bibr" rid="B36">2015</xref></td>
</tr>
<tr>
<td valign="top" align="left"><italic>RAD51</italic></td>
<td valign="top" align="left">Gao et al., <xref ref-type="bibr" rid="B15">2011</xref>; Wong et al., <xref ref-type="bibr" rid="B41">2011</xref>; Wu et al., <xref ref-type="bibr" rid="B43">2015</xref>; Liang et al., <xref ref-type="bibr" rid="B28">2016</xref>; Bhattacharya et al., <xref ref-type="bibr" rid="B3">2017</xref></td>
</tr>
<tr>
<td valign="top" align="left"><italic>ATR</italic></td>
<td valign="top" align="left">Di Benedetto et al., <xref ref-type="bibr" rid="B12">2017</xref></td>
</tr>
<tr>
<td valign="top" align="left"><italic>TOP3A</italic></td>
<td valign="top" align="left">Broberg et al., <xref ref-type="bibr" rid="B5">2009</xref></td>
</tr>
<tr>
<td valign="top" align="left"><italic>BLM</italic></td>
<td valign="top" align="left">Ding et al., <xref ref-type="bibr" rid="B13">2009</xref></td>
</tr>
<tr>
<td valign="top" align="left"><italic>XRCC6</italic></td>
<td valign="top" align="left">Willems et al., <xref ref-type="bibr" rid="B40">2009</xref>; He et al., <xref ref-type="bibr" rid="B21">2012</xref></td>
</tr>
<tr>
<td valign="top" align="left"><italic>RAD52</italic></td>
<td valign="top" align="left">Huang et al., <xref ref-type="bibr" rid="B24">2016</xref></td>
</tr>
<tr>
<td valign="top" align="left"><italic>EXO1</italic></td>
<td valign="top" align="left">Wang et al., <xref ref-type="bibr" rid="B39">2009</xref></td>
</tr>
<tr>
<td valign="top" align="left"><italic>MRE11A</italic></td>
<td valign="top" align="left">Podralska et al., <xref ref-type="bibr" rid="B34">2018</xref></td>
</tr>
<tr>
<td valign="top" align="left"><italic>RAD54B</italic></td>
<td valign="top" align="left">Zhang et al., <xref ref-type="bibr" rid="B47">2019</xref></td>
</tr>
</tbody>
</table>
</table-wrap>
<p>DNA damage repair is an important cellular defense mechanism, and its dysfunction has been linked to a variety of diseases, including breast cancer. Most of the top 10 candidate genes for breast cancer are related to the DNA damage repair function. RAD51 is a eukaryotic protein that plays a role in DNA repair, neuronal development in the motor system, and innate immune response (Liang et al., <xref ref-type="bibr" rid="B28">2016</xref>). At present, studies on the RAD51 gene mainly focus on the interaction between tumor suppressors, the cell cycle, and apoptotic regulators to promote the transformation of normal breast epithelial cells into tumor molecules (Bhattacharya et al., <xref ref-type="bibr" rid="B3">2017</xref>). Genetic association studies confirm that the RAD51 polymorphisms contribute to the susceptibility of breast cancer in multiple populations (Gao et al., <xref ref-type="bibr" rid="B15">2011</xref>; Wong et al., <xref ref-type="bibr" rid="B41">2011</xref>; Wu et al., <xref ref-type="bibr" rid="B43">2015</xref>). RAD52 and RAD54B are key homologous recombination repair (HRR) proteins, which is closely related to the annealing of homologous complementary sequences. RAD52 is shown to be associated with breast cancer susceptibility genes BRCA1 and BRCA2. When RAD52 is knocked out in BRCA1- or BRCA2-deficient tumor cells, HRR frequency is significantly reduced (Huang et al., <xref ref-type="bibr" rid="B24">2016</xref>). For RAD54B, Zhang et al. show that RAD54B protein expression in breast cancer tissues was higher than that in adjacent normal tissues through bioinformatics analysis of multiple relevant databases and experiments related to immunohistochemistry and breast cancer cell lines (Zhang et al., <xref ref-type="bibr" rid="B47">2019</xref>). In addition, the X-ray repair cross-complementing 6 (XRCC6) protein was also a key molecule on the non-homologous end-joining (NHEJ) repair pathway (Bau et al., <xref ref-type="bibr" rid="B2">2011</xref>). Studies show that the XRCC6 polymorphism is correlated with the occurrence and development of breast cancer (Willems et al., <xref ref-type="bibr" rid="B40">2009</xref>; He et al., <xref ref-type="bibr" rid="B21">2012</xref>). Ataxia-telangiectasia mutated and Rad3-related protein (ATR) is an important regulator of the response mechanism of DNA damage repair. The ATR molecular pathway regulates cell DNA damage repair through a variety of cytokines, thus leading to the development of normal cells into tumor cells. High ATR expression was found to be associated with late breast cancer stage and poor prognosis (Di Benedetto et al., <xref ref-type="bibr" rid="B12">2017</xref>). Furthermore, Exonuclease 1 (EXO1), a kind of multifunctional enzyme, is mainly used in clearing double-stranded DNA or RNA molecules that exist in the single sequence. Wang et al. report that the A allele EXO1 K589E conferred a significantly increased risk of breast cancer (Wang et al., <xref ref-type="bibr" rid="B39">2009</xref>). Apart from the above genes, the CDK4 (Ullah Shah et al., <xref ref-type="bibr" rid="B36">2015</xref>), MRE11A (Podralska et al., <xref ref-type="bibr" rid="B34">2018</xref>), BLM (Ding et al., <xref ref-type="bibr" rid="B13">2009</xref>), and TOP3A (Broberg et al., <xref ref-type="bibr" rid="B5">2009</xref>) are shown to be associated with the pathogenesis of breast cancer. These results show that our predictions are in concert with existing reports, and the algorithm is valuable for predicting the new disease-gene associations.</p>
<p>To further evaluate our predictions, we perform GO and KEGG pathway enrichment analysis on the top 10 ranked genes. The results of GO enrichment analysis show that the genes are mostly enriched in DNA recombination in its biological process, PML body in its cellular component and catalytic activity, acting on DNA in its molecular function (<xref ref-type="fig" rid="F7">Figure 7A</xref>). GO analysis shows that these genes are involved in DNA damage repair and cell growth and transformation, which are important in the pathogenesis of cancers. According to the KEGG pathways listed in <xref ref-type="fig" rid="F7">Figure 7B</xref>, the top 10 candidate genes are enriched in cells divide and grow pathways including homologous recombination, NHEJ, and cell cycle pathways, which are shown to play important roles in the division and growth of cancer cells.</p>
<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p>Enrichment analysis of top 10 candidate genes for breast cancer: <bold>(A)</bold> GO enrichment analysis and <bold>(B)</bold> pathway enrichment analysis.</p></caption>
<graphic xlink:href="fgene-12-596794-g0007.tif"/>
</fig></sec>
<sec>
<title>Application to Other Diseases</title>
<p>Moreover, we apply the above RCRWR algorithm to other diseases, such as inflammatory bowel disease, metabolite levels, and cholesterol. To display the prediction performance in the diseases, we still apply 3-fold cross-validation to the diseases. <xref ref-type="fig" rid="F8">Figure 8</xref> shows average top-<italic>k</italic> Recall prediction performance for all diseases in the data set. The results show that RCRWR outperforms the original algorithm on the whole. As examples, <xref ref-type="fig" rid="F9">Figure 9</xref> shows the top 1% Recall prediction performance for some diseases. The results show that RCRWR can improve the ability of predicting disease-related genes for most diseases such as inflammatory bowel disease and rheumatoid arthritis.</p>
<fig id="F8" position="float">
<label>Figure 8</label>
<caption><p>Top-<italic>k</italic> Recall performance for all diseases in the PPI network.</p></caption>
<graphic xlink:href="fgene-12-596794-g0008.tif"/>
</fig>
<fig id="F9" position="float">
<label>Figure 9</label>
<caption><p>Top 5% Recall for different diseases in the PPI network with KEGG pathway.</p></caption>
<graphic xlink:href="fgene-12-596794-g0009.tif"/>
</fig></sec></sec>
<sec sec-type="conclusions" id="s4">
<title>Conclusion</title>
<p>In this study, we have conducted analysis and prediction of breast cancer&#x02013;related genes based on the PPI network and KEGG pathway. First, we analyzed the distribution of breast cancer&#x02013;related genes from the aspects of network and enrichment analysis. The results show that the subnetwork of breast cancer&#x02013;related genes has larger link density than that of the whole network. This means that the breast cancer&#x02013;related genes tend to cluster together in the network, forming a disease module related to breast cancer. This is the case for other diseases. We also analyzed the structures of the KEGG pathways significantly related to breast cancer and visually display the distribution of breast cancer&#x02013;related genes in KEGG pathways, which may help to understand how breast cancer&#x02013;related genes affect related biological processes and functions in breast cancer.</p>
<p>Further, we propose the improved algorithm RCRWR to predict genes related to breast cancer as well as other diseases in the PPI network with and without the KEGG pathway. The results show that RCRWR can effectively improve the ability of predicting genes related to breast cancer and other diseases in the PPI network, and the KEGG pathway is very useful in enhancing disease-gene prediction. We used known breast cancer&#x02013;related genes as a training set to predict candidate genes. For the top 10 candidate genes, we conducted enrichment analysis of the GO and KEGG pathways as well as literature validation and confirmed the connections between these candidate genes and breast cancer. This means that the list of candidate genes is closely related to breast cancer. We believe that these results may provide useful insights into the study of breast cancer&#x02013;related genes and the understanding of its molecular mechanism.</p></sec>
<sec sec-type="data-availability-statement" id="s5">
<title>Data Availability Statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="supplementary-material" rid="SM1">Supplementary Material</xref>, further inquiries can be directed to the corresponding author/s.</p></sec>
<sec id="s6">
<title>Author Contributions</title>
<p>JY, B-SH, and JL conceived, designed, and managed the study. YZ and JX performed the experiments and drafted the manuscript. LT, JL, QL, and GT reviewed the manuscript. All authors approved the final manuscript.</p></sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of Interest</title>
<p>JY, GT, and QL were employed by the company Geneis Beijing Co., Ltd. The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec sec-type="disclaimer" id="s7">
<title>Publisher&#x00027;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
</body>
<back>
<sec sec-type="supplementary-material" id="s8">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2021.596794/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2021.596794/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Datas_Sheet_1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_1.XLSX" id="SM2" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_2.XLSX" id="SM3" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Aitman</surname> <given-names>A. M.</given-names></name></person-group> (<year>2002</year>). <article-title>Finding genes that underlie complex traits</article-title>. <source>Science</source> <volume>298</volume>, <fpage>2345</fpage>&#x02013;<lpage>2349</lpage>. <pub-id pub-id-type="doi">10.1126/science.1076641</pub-id><pub-id pub-id-type="pmid">12493905</pub-id></citation></ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bau</surname> <given-names>D. T.</given-names></name> <name><surname>Tsai</surname> <given-names>C. W.</given-names></name> <name><surname>Wu</surname> <given-names>C. N.</given-names></name></person-group> (<year>2011</year>). <article-title>Role of the XRCC5/XRCC6 dimer in carcinogenesis and pharmacogenomics</article-title>. <source>Pharmacogenomics</source> <volume>12</volume>, <fpage>515</fpage>&#x02013;<lpage>534</lpage>. <pub-id pub-id-type="doi">10.2217/pgs.10.209</pub-id><pub-id pub-id-type="pmid">21521024</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bhattacharya</surname> <given-names>S.</given-names></name> <name><surname>Srinivasan</surname> <given-names>K.</given-names></name> <name><surname>Abdisalaam</surname> <given-names>S.</given-names></name> <name><surname>Su</surname> <given-names>F.</given-names></name> <name><surname>Raj</surname> <given-names>P.</given-names></name> <name><surname>Dozmorov</surname> <given-names>I.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>RAD51 interconnects between DNA replication, DNA repair and immunity</article-title>. <source>Nucleic Acids Res.</source> <volume>45</volume>, <fpage>4590</fpage>&#x02013;<lpage>4605</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkx126</pub-id><pub-id pub-id-type="pmid">28334891</pub-id></citation></ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bi</surname> <given-names>K.</given-names></name> <name><surname>Chen</surname> <given-names>T.</given-names></name> <name><surname>He</surname> <given-names>Z.</given-names></name> <name><surname>Gao</surname> <given-names>Z.</given-names></name> <name><surname>Zhao</surname> <given-names>Y.</given-names></name> <name><surname>Fu</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Proto-oncogenes in a eukaryotic unicellular organism play essential roles in plasmodial growth in host cells</article-title>. <source>BMC Genomics</source> <volume>19</volume>:<fpage>881</fpage>. <pub-id pub-id-type="doi">10.1186/s12864-018-5307-4</pub-id><pub-id pub-id-type="pmid">31068144</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Broberg</surname> <given-names>K.</given-names></name> <name><surname>Huynh</surname> <given-names>E.</given-names></name> <name><surname>Schlawicke Engstrom</surname> <given-names>K.</given-names></name> <name><surname>Bjork</surname> <given-names>J.</given-names></name> <name><surname>Albin</surname> <given-names>M.</given-names></name> <name><surname>Ingvar</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>Association between polymorphisms in RMI1, TOP3A, and BLM and risk of cancer, a case-control study</article-title>. <source>BMC Cancer</source> <volume>9</volume>:<fpage>140</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2407-9-140</pub-id><pub-id pub-id-type="pmid">19432957</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>B.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Li</surname> <given-names>M.</given-names></name> <name><surname>Wu</surname> <given-names>F.-X.</given-names></name></person-group> (<year>2014</year>). <article-title>Identifying disease genes by integrating multiple data sources</article-title>. <source>BMC Med. Genomics</source> <volume>7</volume>:<fpage>S2</fpage>. <pub-id pub-id-type="doi">10.1186/1755-8794-7-S2-S2</pub-id><pub-id pub-id-type="pmid">25350511</pub-id></citation></ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>S.</given-names></name> <name><surname>Wang</surname> <given-names>Z.-Z.</given-names></name> <name><surname>Tang</surname> <given-names>L.</given-names></name> <name><surname>Tang</surname> <given-names>Y.-N.</given-names></name> <name><surname>Gao</surname> <given-names>Y.-Y.</given-names></name> <name><surname>Li</surname> <given-names>H.-J.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Global vs local modularity for network community detection</article-title>. <source>PLoS ONE</source> <volume>13</volume>:<fpage>e0205284</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0205284</pub-id><pub-id pub-id-type="pmid">30372429</pub-id></citation></ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Choobdar</surname> <given-names>S.</given-names></name> <name><surname>Ahsen</surname> <given-names>M. E.</given-names></name> <name><surname>Crawford</surname> <given-names>J.</given-names></name> <name><surname>Tomasoni</surname> <given-names>M.</given-names></name> <name><surname>Fang</surname> <given-names>T.</given-names></name> <name><surname>Lamparter</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Assessment of network module identification across complex diseases</article-title>. <source>Nat. Methods</source> <volume>16</volume>, <fpage>843</fpage>&#x02013;<lpage>852</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-019-0509-5</pub-id><pub-id pub-id-type="pmid">31471613</pub-id></citation></ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Colli</surname> <given-names>L. M.</given-names></name> <name><surname>Machiela</surname> <given-names>M. J.</given-names></name> <name><surname>Zhang</surname> <given-names>H.</given-names></name> <name><surname>Myers</surname> <given-names>T. A.</given-names></name> <name><surname>Jessop</surname> <given-names>L.</given-names></name> <name><surname>Delattre</surname> <given-names>O.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Landscape of combination immunotherapy and targeted therapy to improve cancer management</article-title>. <source>Cancer Res.</source> <volume>77</volume>, <fpage>3666</fpage>&#x02013;<lpage>3671</lpage>. <pub-id pub-id-type="doi">10.1158/0008-5472.CAN-16-3338</pub-id><pub-id pub-id-type="pmid">28446466</pub-id></citation></ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cowen</surname> <given-names>L.</given-names></name> <name><surname>Ideker</surname> <given-names>T.</given-names></name> <name><surname>Raphael</surname> <given-names>B. J.</given-names></name> <name><surname>Sharan</surname> <given-names>R.</given-names></name></person-group> (<year>2017</year>). <article-title>Network propagation: a universal amplifier of genetic associations</article-title>. <source>Nat. Rev. Genet.</source> <volume>18</volume>:<fpage>551</fpage>. <pub-id pub-id-type="doi">10.1038/nrg.2017.38</pub-id><pub-id pub-id-type="pmid">28607512</pub-id></citation></ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Danielle</surname> <given-names>W.</given-names></name> <name><surname>Jacqueline</surname> <given-names>M.</given-names></name> <name><surname>Joannella</surname> <given-names>M.</given-names></name> <name><surname>Tony</surname> <given-names>B.</given-names></name> <name><surname>Peggy</surname> <given-names>H.</given-names></name> <name><surname>Heather</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2013</year>). <article-title>The NHGRI GWAS Catalog, a curated resource of SNP-trait associations</article-title>. <source>Nucleic Acids Res</source>. <volume>42</volume>, <fpage>D1001</fpage>&#x02013;<lpage>D1006</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkt1229</pub-id><pub-id pub-id-type="pmid">24316577</pub-id></citation></ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Di Benedetto</surname> <given-names>A.</given-names></name> <name><surname>Ercolani</surname> <given-names>C.</given-names></name> <name><surname>Mottolese</surname> <given-names>M.</given-names></name> <name><surname>Sperati</surname> <given-names>F.</given-names></name> <name><surname>Pizzuti</surname> <given-names>L.</given-names></name> <name><surname>Vici</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Analysis of the ATR-Chk1 and ATM-Chk2 pathways in male breast cancer revealed the prognostic significance of ATR expression</article-title>. <source>Sci Rep.</source> <volume>7</volume>:<fpage>8078</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-017-07366-7</pub-id><pub-id pub-id-type="pmid">28808232</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ding</surname> <given-names>S. L.</given-names></name> <name><surname>Yu</surname> <given-names>J. C.</given-names></name> <name><surname>Chen</surname> <given-names>S. T.</given-names></name> <name><surname>Hsu</surname> <given-names>G. C.</given-names></name> <name><surname>Kuo</surname> <given-names>S. J.</given-names></name> <name><surname>Lin</surname> <given-names>Y. H.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>Genetic variants of BLM interact with RAD51 to increase breast cancer susceptibility</article-title>. <source>Carcinogenesis</source> <volume>30</volume>, <fpage>43</fpage>&#x02013;<lpage>49</lpage>. <pub-id pub-id-type="doi">10.1093/carcin/bgn233</pub-id><pub-id pub-id-type="pmid">18974064</pub-id></citation></ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dwivedi</surname> <given-names>S. K.</given-names></name> <name><surname>Tj&#x000E4;rnberg</surname> <given-names>A.</given-names></name> <name><surname>Tegn&#x000E9;r</surname> <given-names>J.</given-names></name> <name><surname>Gustafsson</surname> <given-names>M.</given-names></name></person-group> (<year>2020</year>). <article-title>Deriving disease modules from the compressed transcriptional space embedded in a deep autoencoder</article-title>. <source>Nat. Commun.</source> <volume>11</volume>:<fpage>856</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-020-14666-6</pub-id><pub-id pub-id-type="pmid">32051402</pub-id></citation></ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gao</surname> <given-names>L. B.</given-names></name> <name><surname>Pan</surname> <given-names>X. M.</given-names></name> <name><surname>Li</surname> <given-names>L. J.</given-names></name> <name><surname>Liang</surname> <given-names>W. B.</given-names></name> <name><surname>Zhu</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>L. S.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>RAD51 135G/C polymorphism and breast cancer risk: a meta-analysis from 21 studies</article-title>. <source>Breast Cancer Res Treat.</source> <volume>125</volume>, <fpage>827</fpage>&#x02013;<lpage>835</lpage>. <pub-id pub-id-type="doi">10.1007/s10549-010-0995-8</pub-id><pub-id pub-id-type="pmid">20640595</pub-id></citation></ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ghiassian</surname> <given-names>S. D.</given-names></name> <name><surname>Menche</surname> <given-names>J.</given-names></name> <name><surname>Barab&#x000E1;si</surname> <given-names>A.-L.</given-names></name></person-group> (<year>2015</year>). <article-title>A DIseAse MOdule Detection (DIAMOnD) algorithm derived from a systematic analysis of connectivity patterns of disease proteins in the human interactome</article-title>. <source>PLoS Comput. Biol.</source> <volume>11</volume>:<fpage>e1004120</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1004120</pub-id><pub-id pub-id-type="pmid">25853560</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gill</surname> <given-names>N.</given-names></name> <name><surname>Singh</surname> <given-names>S.</given-names></name> <name><surname>Aseri</surname> <given-names>T. C.</given-names></name></person-group> (<year>2014</year>). <article-title>Computational disease gene prioritization: an appraisal</article-title>. <source>J. Comput. Biol.</source> <volume>21</volume>, <fpage>456</fpage>&#x02013;<lpage>465</lpage>. <pub-id pub-id-type="doi">10.1089/cmb.2013.0158</pub-id><pub-id pub-id-type="pmid">24665902</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Goh</surname> <given-names>K. I.</given-names></name> <name><surname>Cusick</surname> <given-names>M. E.</given-names></name> <name><surname>Valle</surname> <given-names>D.</given-names></name> <name><surname>Childs</surname> <given-names>B.</given-names></name> <name><surname>Vidal</surname> <given-names>M.</given-names></name> <name><surname>Barab&#x000E1;si</surname> <given-names>A.-L.</given-names></name></person-group> (<year>2007</year>). <article-title>The human disease network</article-title>. <source>Proc. Natl. Acad. Sci. U.S.A</source>. <volume>104</volume>, <fpage>8685</fpage>&#x02013;<lpage>8690</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0701361104</pub-id><pub-id pub-id-type="pmid">17502601</pub-id></citation></ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guan</surname> <given-names>L. Y.</given-names></name> <name><surname>Lu</surname> <given-names>Y.</given-names></name></person-group> (<year>2018</year>). <article-title>New developments in molecular targeted therapy of ovarian cancer</article-title>. <source>Discov. Med.</source> <volume>26</volume>, <fpage>219</fpage>&#x02013;<lpage>229</lpage>. <pub-id pub-id-type="doi">10.21820/23987073.2018.12.26</pub-id><pub-id pub-id-type="pmid">30695681</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hamosh</surname> <given-names>A.</given-names></name></person-group> (<year>2004</year>). <article-title>Online Mendelian Inheritance in Man (OMIM), a knowledgebase of human genes and genetic disorders</article-title>. <source>Nucleic Acids Res</source>. <volume>33</volume>, <fpage>D514</fpage>&#x02013;<lpage>D517</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gki033</pub-id><pub-id pub-id-type="pmid">15608251</pub-id></citation></ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>He</surname> <given-names>W.</given-names></name> <name><surname>Luo</surname> <given-names>S.</given-names></name> <name><surname>Huang</surname> <given-names>T.</given-names></name> <name><surname>Ren</surname> <given-names>J.</given-names></name> <name><surname>Wu</surname> <given-names>X.</given-names></name> <name><surname>Shao</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2012</year>). <article-title>The Ku70&#x02212;1310C/G promoter polymorphism is associated with breast cancer susceptibility in Chinese Han population</article-title>. <source>Mol. Biol. Rep</source>. <volume>39</volume>, <fpage>577</fpage>&#x02013;<lpage>583</lpage>. <pub-id pub-id-type="doi">10.1007/s11033-011-0773-7</pub-id><pub-id pub-id-type="pmid">21556760</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>K.</given-names></name> <name><surname>Hu</surname> <given-names>J.-B.</given-names></name> <name><surname>Tang</surname> <given-names>L.</given-names></name> <name><surname>Xiang</surname> <given-names>J.</given-names></name> <name><surname>Ma</surname> <given-names>J.-L.</given-names></name> <name><surname>Gao</surname> <given-names>Y.-Y.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Predicting disease-related genes by path structure and community structure in protein&#x02013;protein networks</article-title>. <source>J. Stat. Mech. Theory Exp.</source> <volume>2018</volume>:<fpage>100001</fpage>. <pub-id pub-id-type="doi">10.1088/1742-5468/aae02b</pub-id></citation>
</ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>K.</given-names></name> <name><surname>Xiang</surname> <given-names>J.</given-names></name> <name><surname>Yu</surname> <given-names>Y.-X.</given-names></name> <name><surname>Tang</surname> <given-names>L.</given-names></name> <name><surname>Xiang</surname> <given-names>Q.</given-names></name> <name><surname>Li</surname> <given-names>J.-M.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Significance-based multi-scale method for network community detection and its application in disease-gene prediction</article-title>. <source>PLoS ONE</source> <volume>15</volume>:<fpage>e0227244</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0227244</pub-id><pub-id pub-id-type="pmid">32196490</pub-id></citation></ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>F.</given-names></name> <name><surname>Goyal</surname> <given-names>N.</given-names></name> <name><surname>Sullivan</surname> <given-names>K.</given-names></name> <name><surname>Hanamshet</surname> <given-names>K.</given-names></name> <name><surname>Patel</surname> <given-names>M.</given-names></name> <name><surname>Mazina</surname> <given-names>O. M.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Targeting BRCA1- and BRCA2-deficient cells with RAD52 small molecule inhibitors</article-title>. <source>Nucleic Acids Res.</source> <volume>44</volume>, <fpage>4189</fpage>&#x02013;<lpage>4199</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkw087</pub-id><pub-id pub-id-type="pmid">26873923</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kanehisa</surname> <given-names>M.</given-names></name> <name><surname>Goto</surname> <given-names>S.</given-names></name></person-group> (<year>2000</year>). <article-title>KEGG: Kyoto encyclopaedia of genes and genomes</article-title>. <source>Nucleic Acids Res.</source> <volume>28</volume>, <fpage>27</fpage>&#x02013;<lpage>30</lpage>. <pub-id pub-id-type="doi">10.1093/nar/28.1.27</pub-id><pub-id pub-id-type="pmid">10592173</pub-id></citation></ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Keshava Prasad</surname> <given-names>T. S.</given-names></name> <name><surname>Goel</surname> <given-names>R.</given-names></name> <name><surname>Kandasamy</surname> <given-names>K.</given-names></name> <name><surname>Keerthikumar</surname> <given-names>S.</given-names></name> <name><surname>Kumar</surname> <given-names>S.</given-names></name> <name><surname>Mathivanan</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>Human protein reference database&#x02014; 2009 update</article-title>. <source>Nucleic Acids Res</source>. <volume>37</volume>, <fpage>D767</fpage>&#x02013;<lpage>D772</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkn892</pub-id><pub-id pub-id-type="pmid">18988627</pub-id></citation></ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lai</surname> <given-names>D.</given-names></name> <name><surname>Lu</surname> <given-names>H.</given-names></name> <name><surname>Nardini</surname> <given-names>C.</given-names></name></person-group> (<year>2010</year>). <article-title>Enhanced modularity-based community detection by random walk network preprocessing</article-title>. <source>Phys. Rev. E</source> <volume>81</volume>:<fpage>066118</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevE.81.066118</pub-id><pub-id pub-id-type="pmid">20866489</pub-id></citation></ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liang</surname> <given-names>F.</given-names></name> <name><surname>Longerich</surname> <given-names>S.</given-names></name> <name><surname>Miller</surname> <given-names>A. S.</given-names></name> <name><surname>Tang</surname> <given-names>C.</given-names></name> <name><surname>Buzovetsky</surname> <given-names>O.</given-names></name> <name><surname>Xiong</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Promotion of RAD51-mediated homologous DNA pairing by the RAD51AP1-UAF1 complex</article-title>. <source>Cell Rep.</source> <volume>15</volume>, <fpage>2118</fpage>&#x02013;<lpage>2126</lpage>. <pub-id pub-id-type="doi">10.1016/j.celrep.2016.05.007</pub-id><pub-id pub-id-type="pmid">27239033</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liberzon</surname> <given-names>A.</given-names></name> <name><surname>Subramanian</surname> <given-names>A.</given-names></name> <name><surname>Pinchback</surname> <given-names>R.</given-names></name> <name><surname>Thorvaldsdottir</surname> <given-names>H.</given-names></name> <name><surname>Tamayo</surname> <given-names>P.</given-names></name> <name><surname>Mesirov</surname> <given-names>J. P.</given-names></name></person-group> (<year>2011</year>). <article-title>Molecular signatures database (MSigDB) 3.0</article-title>. <source>Bioinformatics</source> <volume>27</volume>, <fpage>1739</fpage>&#x02013;<lpage>1740</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btr260</pub-id><pub-id pub-id-type="pmid">21546393</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>P.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Tian</surname> <given-names>L.-P.</given-names></name> <name><surname>Wu</surname> <given-names>F.-X.</given-names></name></person-group> (<year>2019a</year>). <article-title>Enhancing the prediction of disease&#x02013;gene associations with multimodal deep learning</article-title>. <source>Bioinformatics</source> <volume>35</volume>, <fpage>3735</fpage>&#x02013;<lpage>3742</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btz155</pub-id><pub-id pub-id-type="pmid">30825303</pub-id></citation></ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>P.</given-names></name> <name><surname>Xiao</surname> <given-names>Q.</given-names></name> <name><surname>Wei</surname> <given-names>P.-J.</given-names></name> <name><surname>Liao</surname> <given-names>B.</given-names></name> <name><surname>Wu</surname> <given-names>F.-X.</given-names></name></person-group> (<year>2019b</year>). <article-title>Identifying disease-gene associations with graph-regularized manifold learning</article-title>. <source>Front. Genet.</source> <volume>10</volume>:<fpage>270</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2019.00270</pub-id><pub-id pub-id-type="pmid">31001321</pub-id></citation></ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Oliver</surname> <given-names>S.</given-names></name></person-group> (<year>2000</year>). <article-title>Guilt-by-association goes global</article-title>. <source>Nature</source> <volume>403</volume>, <fpage>601</fpage>&#x02013;<lpage>603</lpage>. <pub-id pub-id-type="doi">10.1038/35001165</pub-id><pub-id pub-id-type="pmid">10688178</pub-id></citation></ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Opap</surname> <given-names>K.</given-names></name> <name><surname>Mulder</surname> <given-names>N.</given-names></name></person-group> (<year>2017</year>). <article-title>Recent advances in predicting gene&#x02013;disease associations</article-title>. <source>FResearch 1000</source> <volume>6</volume>:<fpage>578</fpage>. <pub-id pub-id-type="doi">10.12688/f1000research.10788.1</pub-id><pub-id pub-id-type="pmid">28529714</pub-id></citation></ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Podralska</surname> <given-names>M.</given-names></name> <name><surname>Ziolkowska-Suchanek</surname> <given-names>I.</given-names></name> <name><surname>Zurawek</surname> <given-names>M.</given-names></name> <name><surname>Dzikiewicz-Krawczyk</surname> <given-names>A.</given-names></name> <name><surname>Slomski</surname> <given-names>R.</given-names></name> <name><surname>Nowak</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Genetic variants in ATM, H2AFX and MRE11 genes and susceptibility to breast cancer in the polish population</article-title>. <source>BMC Cancer</source> <volume>18</volume>:<fpage>452</fpage>. <pub-id pub-id-type="doi">10.1186/s12885-018-4360-3</pub-id><pub-id pub-id-type="pmid">29678143</pub-id></citation></ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Taherian-Fard</surname> <given-names>A.</given-names></name> <name><surname>Srihari</surname> <given-names>S.</given-names></name> <name><surname>Ragan</surname> <given-names>M. A.</given-names></name></person-group> (<year>2015</year>). <article-title>Breast cancer classification: linking molecular mechanisms to disease prognosis</article-title>. <source>Brief Bioinformatics</source> <volume>16</volume>, <fpage>461</fpage>&#x02013;<lpage>474</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbu020</pub-id><pub-id pub-id-type="pmid">24950687</pub-id></citation></ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ullah Shah</surname> <given-names>A.</given-names></name> <name><surname>Mahjabeen</surname> <given-names>I.</given-names></name> <name><surname>Kayani</surname> <given-names>M. A.</given-names></name></person-group> (<year>2015</year>). <article-title>Genetic polymorphisms in cell cycle regulatory genes CCND1 and CDK4 are associated with susceptibility to breast cancer</article-title>. <source>J BUON.</source> <volume>20</volume>, <fpage>985</fpage>&#x02013;<lpage>993</lpage>.<pub-id pub-id-type="pmid">26416047</pub-id></citation></ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Valencia</surname> <given-names>O. M.</given-names></name> <name><surname>Samuel</surname> <given-names>S. E.</given-names></name> <name><surname>Viscusi</surname> <given-names>R. K.</given-names></name> <name><surname>Riall</surname> <given-names>T. S.</given-names></name> <name><surname>Neumayer</surname> <given-names>L. A.</given-names></name> <name><surname>Aziz</surname> <given-names>H.</given-names></name></person-group> (<year>2017</year>). <article-title>The role of genetic testing in patients with breast cancer: a review</article-title>. <source>JAMA Surg.</source> <volume>152</volume>, <fpage>589</fpage>&#x02013;<lpage>594</lpage>. <pub-id pub-id-type="doi">10.1001/jamasurg.2017.0552</pub-id><pub-id pub-id-type="pmid">28423155</pub-id></citation></ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>von Mering</surname> <given-names>C.</given-names></name> <name><surname>Huynen</surname> <given-names>M.</given-names></name> <name><surname>Jaeggi</surname> <given-names>D.</given-names></name> <name><surname>Schmidt</surname> <given-names>S.</given-names></name> <name><surname>Bork</surname> <given-names>P.</given-names></name> <name><surname>Snel</surname> <given-names>B.</given-names></name></person-group> (<year>2003</year>). <article-title>STRING: a database of predicted functional associations between proteins</article-title>. <source>Nucleic Acids Res.</source> <volume>31</volume>, <fpage>258</fpage>&#x02013;<lpage>261</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkg034</pub-id><pub-id pub-id-type="pmid">12519996</pub-id></citation></ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>H. C.</given-names></name> <name><surname>Chiu</surname> <given-names>C. F.</given-names></name> <name><surname>Tsai</surname> <given-names>R. Y.</given-names></name> <name><surname>Kuo</surname> <given-names>Y. S.</given-names></name> <name><surname>Chen</surname> <given-names>H. S.</given-names></name> <name><surname>Wang</surname> <given-names>R. F.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>Association of genetic polymorphisms of EXO1 gene with risk of breast cancer in Taiwan</article-title>. <source>Anticancer Res.</source> <volume>29</volume>, <fpage>3897</fpage>&#x02013;<lpage>3901</lpage>.<pub-id pub-id-type="pmid">19846925</pub-id></citation></ref>
<ref id="B40">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Willems</surname> <given-names>P.</given-names></name> <name><surname>De Ruyck</surname> <given-names>K.</given-names></name> <name><surname>Van den Broecke</surname> <given-names>R.</given-names></name> <name><surname>Makar</surname> <given-names>A.</given-names></name> <name><surname>Perletti</surname> <given-names>G.</given-names></name> <name><surname>Thierens</surname> <given-names>H.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>A polymorphism in the promoter region of Ku70/XRCC6, associated with breast cancer risk and oestrogen exposure</article-title>. <source>J. Cancer Res. Clin. Oncol.</source> <volume>135</volume>, <fpage>1159</fpage>&#x02013;<lpage>1168</lpage>. <pub-id pub-id-type="doi">10.1007/s00432-009-0556-x</pub-id><pub-id pub-id-type="pmid">19219618</pub-id></citation></ref>
<ref id="B41">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wong</surname> <given-names>M. W.</given-names></name> <name><surname>Nordfors</surname> <given-names>C.</given-names></name> <name><surname>Mossman</surname> <given-names>D.</given-names></name> <name><surname>Pecenpetelovska</surname> <given-names>G.</given-names></name> <name><surname>Avery-Kiejda</surname> <given-names>K. A.</given-names></name> <name><surname>Talseth-Palmer</surname> <given-names>B.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>BRIP1, PALB2, and RAD51C mutation analysis reveals their relative importance as genetic susceptibility factors for breast cancer</article-title>. <source>Breast Cancer Res. Treat.</source> <volume>127</volume>, <fpage>853</fpage>&#x02013;<lpage>859</lpage>. <pub-id pub-id-type="doi">10.1007/s10549-011-1443-0</pub-id><pub-id pub-id-type="pmid">21409391</pub-id></citation></ref>
<ref id="B42">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>X.</given-names></name> <name><surname>Jiang</surname> <given-names>R.</given-names></name> <name><surname>Zhang</surname> <given-names>M. Q.</given-names></name> <name><surname>Li</surname> <given-names>S.</given-names></name></person-group> (<year>2008</year>). <article-title>Network-based global inference of human disease genes</article-title>. <source>Mol. Syst. Biol.</source> <volume>4</volume>:<fpage>189</fpage>. <pub-id pub-id-type="doi">10.1038/msb.2008.27</pub-id><pub-id pub-id-type="pmid">18463613</pub-id></citation></ref>
<ref id="B43">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>Z.</given-names></name> <name><surname>Wang</surname> <given-names>P.</given-names></name> <name><surname>Song</surname> <given-names>C.</given-names></name> <name><surname>Wang</surname> <given-names>K.</given-names></name> <name><surname>Yan</surname> <given-names>R.</given-names></name> <name><surname>Li</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>Evaluation of miRNA-binding-site SNPs of MRE11A, NBS1, RAD51 and RAD52 involved in HRR pathway genes and risk of breast cancer in China</article-title>. <source>Mol. Genet. Genomics</source> <volume>290</volume>, <fpage>1141</fpage>&#x02013;<lpage>1153</lpage>. <pub-id pub-id-type="doi">10.1007/s00438-014-0983-5</pub-id><pub-id pub-id-type="pmid">25566853</pub-id></citation></ref>
<ref id="B44">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xiang</surname> <given-names>J.</given-names></name> <name><surname>Hu</surname> <given-names>K.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Bao</surname> <given-names>M.-H.</given-names></name> <name><surname>Tang</surname> <given-names>L.</given-names></name> <name><surname>Tang</surname> <given-names>Y.-N.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Enhancing community detection by using local structural information</article-title>. <source>J. Stat. Mech. Theory Exp.</source> <volume>2016</volume>:<fpage>033405</fpage>. <pub-id pub-id-type="doi">10.1088/1742-5468/2016/03/033405</pub-id></citation>
</ref>
<ref id="B45">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Huang</surname> <given-names>T.</given-names></name> <name><surname>Song</surname> <given-names>W. M.</given-names></name> <name><surname>Petralia</surname> <given-names>F.</given-names></name> <name><surname>Mobbs</surname> <given-names>C. V.</given-names></name> <name><surname>Zhang</surname> <given-names>B.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Discover the network mechanisms underlying the connections between aging and age-related diseases</article-title>. <source>Sci. Rep</source>. <volume>6</volume>:<fpage>32566</fpage>. <pub-id pub-id-type="doi">10.1038/srep32566</pub-id><pub-id pub-id-type="pmid">27582315</pub-id></citation></ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>G.</given-names></name> <name><surname>Wang</surname> <given-names>L. G.</given-names></name> <name><surname>Han</surname> <given-names>Y.</given-names></name> <name><surname>He</surname> <given-names>Q. Y.</given-names></name></person-group> (<year>2012</year>). <article-title>clusterProfiler: an R package for comparing biological themes among gene clusters</article-title>. <source>OMICS</source> <volume>16</volume>, <fpage>284</fpage>&#x02013;<lpage>287</lpage>. <pub-id pub-id-type="doi">10.1089/omi.2011.0118</pub-id><pub-id pub-id-type="pmid">22455463</pub-id></citation></ref>
<ref id="B47">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Han</surname> <given-names>Y.</given-names></name> <name><surname>Ji</surname> <given-names>T.</given-names></name> <name><surname>Huang</surname> <given-names>X.</given-names></name> <name><surname>Gao</surname> <given-names>Q.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>RAD54B potentiates tumor growth and predicts poor prognosis of patients with luminal A breast cancer</article-title>. <source>Biomed. Pharmacother.</source> <volume>118</volume>:<fpage>109341</fpage>. <pub-id pub-id-type="doi">10.1016/j.biopha.2019.109341</pub-id><pub-id pub-id-type="pmid">31545289</pub-id></citation></ref>
<ref id="B48">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>Z. Q.</given-names></name> <name><surname>Han</surname> <given-names>G. S.</given-names></name> <name><surname>Yu</surname> <given-names>Z. G.</given-names></name> <name><surname>Li</surname> <given-names>J.</given-names></name></person-group> (<year>2015</year>). <article-title>Laplacian normalization and random walk on heterogeneous networks for disease-gene prioritization</article-title>. <source>Comput. Biol. Chem.</source> <volume>57</volume>, <fpage>21</fpage>&#x02013;<lpage>28</lpage>. <pub-id pub-id-type="doi">10.1016/j.compbiolchem.2015.02.008</pub-id><pub-id pub-id-type="pmid">25736609</pub-id></citation></ref>
</ref-list>
<fn-group>
<fn fn-type="financial-disclosure"><p><bold>Funding.</bold> This work was supported by the Training Program for Excellent Young Innovators of Changsha (Grant No. kq2009093 and kq2009095), the National Natural Science Foundation of China (Grant No. 61702054, 81873780, U1909208, and 61972423), the Fundamental Research Funds for the Central Universities of Central South University (Grant No. 2019zzts279), Foundation of the Education Department of Hunan Province (Grant No. 18B539 and 19A058), Foundation of Health and Family Planning Commission of Hunan Province (20201918), Hunan Natural Science Foundation Youth Program (2019JJ50697), Application Characteristic Discipline of Hunan Province the Project of Changsha Science and Technology (Grant No. kq2004077), the Natural Science Foundation of Hunan province (Grant No. 2018JJ3570), and the Project to Introduce Intelligence from Oversea Experts to Changsha City (Grant No. 2089901).</p>
</fn>
</fn-group>
</back>
</article> 