<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Cell. Neurosci.</journal-id>
<journal-title>Frontiers in Cellular Neuroscience</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Cell. Neurosci.</abbrev-journal-title>
<issn pub-type="epub">1662-5102</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fncel.2024.1369242</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Cellular Neuroscience</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>PathFinder: a novel graph transformer model to infer multi-cell intra- and inter-cellular signaling pathways and communications</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Feng</surname> <given-names>Jiarui</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn0001"><sup>&#x2020;</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Song</surname> <given-names>Haoran</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn0001"><sup>&#x2020;</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Province</surname> <given-names>Michael</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/83069/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Li</surname> <given-names>Guangfu</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/862410/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Payne</surname> <given-names>Philip R. O.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1450028/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Chen</surname> <given-names>Yixin</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Li</surname> <given-names>Fuhai</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff7"><sup>7</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/59620/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Institute for Informatics (I2), Washington University School of Medicine, Washington University in St. Louis</institution>, <addr-line>St. Louis, MO</addr-line>, <country>United States</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Computer Science and Engineering, Washington University in St. Louis</institution>, <addr-line>St. Louis, MO</addr-line>, <country>United States</country></aff>
<aff id="aff3"><sup>3</sup><institution>Division of Statistical Genomics, Department of Genetics, Washington University in St. Louis</institution>, <addr-line>St. Louis, MO</addr-line>, <country>United States</country></aff>
<aff id="aff4"><sup>4</sup><institution>Department of Surgery, University of Missouri-Columbia</institution>, <addr-line>Columbia, MO</addr-line>, <country>United States</country></aff>
<aff id="aff5"><sup>5</sup><institution>Department of Molecular Microbiology and Immunology, University of Missouri-Columbia</institution>, <addr-line>Columbia, MO</addr-line>, <country>United States</country></aff>
<aff id="aff6"><sup>6</sup><institution>NextGen Precision Health Institute, University of Missouri-Columbia</institution>, <addr-line>Columbia, MO</addr-line>, <country>United States</country></aff>
<aff id="aff7"><sup>7</sup><institution>Department of Pediatrics, Washington University School of Medicine, Washington University in St. Louis</institution>, <addr-line>St. Louis, MO</addr-line>, <country>United States</country></aff>
<author-notes>
<fn fn-type="edited-by" id="fn0002">
<p>Edited by: Ning Huang, Chongqing Medical University, China</p>
</fn>
<fn fn-type="edited-by" id="fn0003">
<p>Reviewed by: Jiahe Tan, First Affiliated Hospital of Chongqing Medical University, China</p>
<p>Austin W. T. Chiang, Augusta University, United States</p>
</fn>
<corresp id="c001">&#x002A;Correspondence: Fuhai Li, <email>fuhai.li@wustl.edu</email></corresp>
<fn fn-type="equal" id="fn0001">
<p><sup>&#x2020;</sup>These authors share first authorship</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>23</day>
<month>05</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>18</volume>
<elocation-id>1369242</elocation-id>
<history>
<date date-type="received">
<day>11</day>
<month>01</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>30</day>
<month>04</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2024 Feng, Song, Province, Li, Payne, Chen and Li.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Feng, Song, Province, Li, Payne, Chen and Li</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Recently, large-scale scRNA-seq datasets have been generated to understand the complex signaling mechanisms within the microenvironment of Alzheimer&#x2019;s Disease (AD), which are critical for identifying novel therapeutic targets and precision medicine. However, the background signaling networks are highly complex and interactive. It remains challenging to infer the core intra- and inter-multi-cell signaling communication networks using scRNA-seq data. In this study, we introduced a novel graph transformer model, PathFinder, to infer multi-cell intra- and inter-cellular signaling pathways and communications among multi-cell types. Compared with existing models, the novel and unique design of PathFinder is based on the divide-and-conquer strategy. This model divides complex signaling networks into signaling paths, which are then scored and ranked using a novel graph transformer architecture to infer intra- and inter-cell signaling communications. We evaluated the performance of PathFinder using two scRNA-seq data cohorts. The first cohort is an APOE4 genotype-specific AD, and the second is a human cirrhosis cohort. The evaluation confirms the promising potential of using PathFinder as a general signaling network inference model.</p>
</abstract>
<kwd-group>
<kwd>Alzheimer&#x2019;s disease</kwd>
<kwd>signaling pathways</kwd>
<kwd>cell cell signaling communications</kwd>
<kwd>microenvironment</kwd>
<kwd>graph neural network</kwd>
</kwd-group>
<contract-sponsor id="cn1">NIA R56AG065352</contract-sponsor>
<contract-sponsor id="cn2">1R21AG078799-01A1</contract-sponsor>
<contract-sponsor id="cn3">1RM1NS132962-01</contract-sponsor>
<counts>
<fig-count count="6"/>
<table-count count="1"/>
<equation-count count="22"/>
<ref-count count="53"/>
<page-count count="16"/>
<word-count count="11161"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Non-Neuronal Cells</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<title>Introduction</title>
<p>Single-cell RNA sequencing data (scRNA-seq) technologies have become popular in recent years because of their ability to profile gene expression and analyze cell composition in the single cell resolution (<xref ref-type="bibr" rid="ref24">Kolodziejczyk et al., 2015</xref>; <xref ref-type="bibr" rid="ref33">Tanay and Regev, 2017</xref>; <xref ref-type="bibr" rid="ref23">Hwang et al., 2018</xref>). On the one hand, by profiling and annotating scRNA-seq data, researchers can analyze differentially expressed genes in each cell population and sub-population to understand which gene is altered in certain conditions. On the other hand, scRNA-seq data also show great potential in discovering intra- and inter-cellular communication. However, there are only limited methods for discovering active signaling pathways or intra-cellular communication using scRNA-seq data. The existing models are mainly based on correlation, regression, and Bayesian analysis (<xref ref-type="bibr" rid="ref32">Saint-Antoine and Singh, 2019</xref>), and the direct interaction signaling cascades were usually ignored in those methods because only a small set of genes exhibit gene expression changes between different conditions (<xref ref-type="bibr" rid="ref13">Feng et al., 2020</xref>). For example, CellPhoneDB (<xref ref-type="bibr" rid="ref11">Efremova et al., 2020</xref>) can model the interactions between ligands from one cell type and receptors from another cell type. However, it cannot model the downstream signaling. CCCExplorer (<xref ref-type="bibr" rid="ref10">Choi et al., 2015</xref>) can discover both the ligand&#x2013;receptor interaction and downstream the signaling network by modeling differentially expressed genes. NicheNet (<xref ref-type="bibr" rid="ref5">Browaeys et al., 2020</xref>) takes a further step by integrating various interaction databases and training a predictive model to assess the interaction potential between the ligand and downstream targets. However, it only applies a statistical model, which cannot generate a clear communication path. CytoTalk (<xref ref-type="bibr" rid="ref22">Hu et al., 2021</xref>) applies the Steiner tree to discover the de-novo signal transduction network from gene co-expression. However, the discovered signaling is based on co-expression, and the physical interaction cascade is still unknown.</p>
<p>In the past few years, graph neural networks (GNNs) have become famous due to their great performance in node and graph representation as wells as in classification tasks. For instance, GraphSAGE (<xref ref-type="bibr" rid="ref18">Hamilton et al., 2017</xref>) proposed the first general framework for learning the node representation inductively. GAT (<xref ref-type="bibr" rid="ref38">Veli&#x010D;kovi&#x0107; et al., 2017</xref>) incorporates the attention mechanism into GNNs to actively learn how to aggregate all the information in graphs. The DGCNN (<xref ref-type="bibr" rid="ref48">Zhang et al., 2018</xref>) model proposes sortPooling to efficiently sort nodes and learn graph features for graph classification. GIN (<xref ref-type="bibr" rid="ref43">Xu et al., 2018</xref>) connects message-passing GNNs with the 1-dimensional Wifelier-Lehman test (1-WL test) on learning graph structure and proposes a new GNN algorithm that is equally powerful as the 1-WL test. More recently, researchers have tried to generalize the transformer architecture (<xref ref-type="bibr" rid="ref37">Vaswani et al., 2017</xref>) into graph learning fields as it already shows superior power in learning both text and image data. Many studies (<xref ref-type="bibr" rid="ref7">Cai and Lam, 2020</xref>; <xref ref-type="bibr" rid="ref20">Hu et al., 2020</xref>; <xref ref-type="bibr" rid="ref31">Rong et al., 2020</xref>; <xref ref-type="bibr" rid="ref51">Zhang et al., 2020</xref>; <xref ref-type="bibr" rid="ref45">Yang et al., 2021</xref>; <xref ref-type="bibr" rid="ref46">Ying et al., 2021</xref>) have shown great potential in applying the transformer model to the graph data. They either nest GNN architectures in the transformer layer, design specific attention mechanisms, or design novel encoding mechanisms to incorporate the graph structure into the transformer model. However, using GNNs to discover the intra- and inter-cell communication network remains unknown as these networks are typically black-box models and it is hard to interpret their prediction results.</p>
<p>In this study, we present a novel framework called PathFinder to discover both intra- and inter-cell communication networks with a novel graph transformer-based neural network. Given the scRNA-seq expression data and the condition (control/test), PathFinder first samples a series of predefined paths through the prior gene&#x2013;gene interaction database. Then, the PathFinder model takes the scRNA-seq expression data and the predefined path list as inputs to predict the condition of each cell. Through the training, the path important score will be learned to indicate the relative importance of each path in separating between the control and test conditions. To learn different types of communication, such as upregulated or downregulated networks, a novel regularization term is introduced. PathFinder will first generate a prior score for each path based on the expression level of genes in the path. Then, during the training, this regularization term will regularize the learned path scores to be close to the prior scores. After training, the path score will be sorted and the intra-communication network for each cell type will be generated by extracting the top K important paths. To generate the inter-cell communication network between the ligand cell and the receptor cell, the intra-cell communication network for the receptor cell will be collected, and the ligand list will be extracted from the differential expressed gene list in the ligand cell. Finally, the ligands are linked to the intra-cell network based on the ligand&#x2013;receptor interaction database. The overall procedure of generating both intra- and inter-cell communication networks using PathFinder is shown in <xref ref-type="fig" rid="fig1">Figure 1</xref>. To thhe best of our knowledge, this is the first method to apply deep learning and graph transformers to discover signaling networks in scRNA-seq data. The advantages of PathFinder are listed below: (1) The model is designed based on a graph transformer, which has the great ability to learn both local and long-range signaling patterns from gene expression and large-scale networks. (2) It is capable of identifying and providing the full signaling network between cells via cellular ligands and receptors. (3) The proposed PathFinder is a general framework that allows users to input their own defined signaling paths or gene&#x2013;gene interaction network database to identify important signaling based on their interests. Furthermore, (4) it can separate and generate different types of communication networks (Differential expressed/upregulated/downregulated), which allows more precise downstream analysis. We applied the PathFinder model on two scRNA-seq data cohorts: one is a mice cohort of AD and another is a human cohort of cirrhosis. The PathFinder not only achieves great prediction results but also generates intra- and inter-cell communication networks that align well with the latest knowledge on the mechanism of both two diseases.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p><bold>(Upper)</bold> Overview of the PathFinder method to discover both intra- and inter-cell communication networks. The input scRNA expression data with both samples from the control condition and the test condition are used to construct the gene&#x2013;gene interaction network based on our large database. Then, the path sampler is used to generate all pre-defined path from the interaction network. Then, the PathFinder model is trained to separate the cells from two different conditions. After the training, the learned path score can indicate the importance of each path. The top <inline-formula>
<mml:math id="M1">
<mml:mi>k</mml:mi>
</mml:math>
</inline-formula> paths are selected to generate the intra-cell communication network. Finally, the ligand&#x2013;receptor database is used to link all picked ligands (like differential expressed ligands) from ligand cells to the receptors in the intra-cell communication network of receptor cells to construct the inter-cell communication network. <bold>(Lower)</bold> Model architecture of PathFinder. The PathFinder model consists of three components: node encoder, path encoder, and graph encoder. The node encoder is a stack of <inline-formula>
<mml:math id="M2">
<mml:mi>L</mml:mi>
</mml:math>
</inline-formula> the transformer layer with special encoding to encode local graph structure information of each node. The path encoder take the output from each layer of node encoder to learn long-range path embedding for each pre-defined path. Finally, the graph encoder aggregate information from each path to generate graph embedding and make final prediction. In the graph encoder, the trainable path weight will be learned to assign each path an importance score, which can be used to generate intra-cell communication networks.</p>
</caption>
<graphic xlink:href="fncel-18-1369242-g001.tif"/>
</fig>
</sec>
<sec sec-type="results" id="sec2">
<title>Results</title>
<sec id="sec3">
<title>scRNA-seq data of Alzheimer&#x2019;s disease cohort on mice</title>
<p>To evaluate the proposed PathFinder method, scRNA-seq data on Alzheimer&#x2019;s disease are collected from the Gene Expression Omnibus (GEO) database with accession number GSE164507 (<xref ref-type="bibr" rid="ref40">Wang et al., 2021</xref>). The raw data are processed using the Seurat R package (<xref ref-type="bibr" rid="ref19">Hao et al., 2021</xref>), and the process procedure is conducted by following the previous study&#x2019;s procedure (<xref ref-type="bibr" rid="ref40">Wang et al., 2021</xref>). Specifically, we select cell samples from two different conditions, denoted as TAFE4_tam and TAFE4_oil. TAFE4_tam refers to mice with the APOE4 gene knocked out from astrocyte cells, and TAFE4_oil refers to mice with the existence of APOE4. It is well known that APOE4 is one of the most significant genetic risk factors for late-onset AD. By analyzing the difference between the signaling pattern with and without APOE4, we can gain a deeper understanding of the effects of the APOE4 gene on brain cells.</p>
<p>Concretely, the excitatory neuron (Ex), microglia (Mic), and astrocyte (Ast) of the TAFE4 group are collected from the dataset with a total number of samples of 13,604, 3,874, and 734, respectively. The detailed data distribution are provided in <xref ref-type="supplementary-material" rid="SM1">Supplementary Table S1</xref>. Then, the PathFinder method is applied to predict the condition of each cell (oil or tam) separately for each cell type and generate both intra- and inter-cell communication networks between these three cell types. The pre-defined path list includes all shortest distance paths starting from receptors and all possible paths from the receptor to the target gene. For the shortest distance paths, we only select paths with a minimum length of 3 (except all receptor direct regularizations, which have a length of 2) and a maximum length of 10. We compute the prior score of each path based on the average differential expression level of all genes in the path (more details in the Method section) for the path score regularization. To ensure the robustness of the analysis, we only selected the top 8,192 variable genes from the original dataset as input to the model, which resulted in a final count of 1,210 pre-selected paths. The detailed path selection procedure can be found in the Method section.</p>
</sec>
<sec id="sec4">
<title>scRNA-seq data of cirrhosis cohort on humans</title>
<p>The scRNA-seq data of human cirrhosis is obtained from the GEO database under the accession number GSE136103, which includes non-parenchymal cells collected from healthy individuals and patients with cirrhosis. After processing, single-cell data were obtained from five healthy individuals (healthy1-5) and five patients with cirrhosis (cirrhotic1-5). Similarly, the raw data are processed using the Seurat R package (<xref ref-type="bibr" rid="ref19">Hao et al., 2021</xref>). After the process, we select three important cell types: endothelial (Endo), macrophages (Mac), and T cells (Tcell). The total number of cells for each cell type is 6,197, 9,173, and 20,950, respectively. The detailed data distribution is provided in <xref ref-type="supplementary-material" rid="SM1">Supplementary Table S1</xref>. Similar to the AD cohort, we use PathFinder to predict the cell condition for each cell type. The pre-defined path list is selected in the same way as the AD dataset. For the cirrhosis cohort, we selected the top 12,000 variable genes from the original dataset as input to the model, which resulted in a final count of 1,549 pre-selected paths.</p>
</sec>
<sec id="sec5">
<title>PathFinder can effectively separate cells from different conditions of AD by selecting differentially expressed signaling paths</title>
<p>To evaluate the performance of the PathFinder model, it is applied to excitatory neurons, astrocytes, and microglia cells from the AD cohort separately to predict the conditions of each cell (tam/oil), denoted as TAFE4_ex, TAFE4_mic, and TAFE4_ast, respectively. For each cell type, we repeat the training five times, each time randomly splitting the whole dataset into train, validation, and test subsets at a ratio of 0.7/0.1/0.2. We report the average performance and standard deviation on the test set over all five runs. The detailed experimental setting can be found in the Method section. The detailed results are shown in <xref ref-type="table" rid="tab1">Table 1</xref> and <xref ref-type="fig" rid="fig2">Figure 2A</xref>.</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Evaluation results of the PathFinder model.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th/>
<th align="center" valign="top">Accuracy</th>
<th align="center" valign="top">Recall</th>
<th align="center" valign="top">Precision</th>
<th align="center" valign="top">Specificity</th>
<th align="center" valign="top">F1</th>
<th align="center" valign="top">AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">TAFE4_ex</td>
<td align="char" valign="top" char="&#x00B1;">0.67 &#x00B1; 0.01</td>
<td align="char" valign="top" char="&#x00B1;">0.71 &#x00B1; 0.04</td>
<td align="char" valign="top" char="&#x00B1;">0.66 &#x00B1; 0.02</td>
<td align="char" valign="top" char="&#x00B1;">0.64 &#x00B1; 0.05</td>
<td align="char" valign="top" char="&#x00B1;">0.68 &#x00B1; 0.01</td>
<td align="char" valign="top" char="&#x00B1;">0.73 &#x00B1; 0.01</td>
</tr>
<tr>
<td align="left" valign="top">TAFE4_mic</td>
<td align="char" valign="top" char="&#x00B1;">0.67 &#x00B1; 0.01</td>
<td align="char" valign="top" char="&#x00B1;">0.76 &#x00B1; 0.03</td>
<td align="char" valign="top" char="&#x00B1;">0.65 &#x00B1; 0.02</td>
<td align="char" valign="top" char="&#x00B1;">0.58 &#x00B1; 0.04</td>
<td align="char" valign="top" char="&#x00B1;">0.70 &#x00B1; 0.01</td>
<td align="char" valign="top" char="&#x00B1;">0.71 &#x00B1; 0.01</td>
</tr>
<tr>
<td align="left" valign="top">TAFE4_ast</td>
<td align="char" valign="top" char="&#x00B1;">0.62 &#x00B1; 0.04</td>
<td align="char" valign="top" char="&#x00B1;">0.75 &#x00B1; 0.15</td>
<td align="char" valign="top" char="&#x00B1;">0.65 &#x00B1; 0.03</td>
<td align="char" valign="top" char="&#x00B1;">0.44 &#x00B1; 0.14</td>
<td align="char" valign="top" char="&#x00B1;">0.69 &#x00B1; 0.06</td>
<td align="char" valign="top" char="&#x00B1;">0.65 &#x00B1; 0.04</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Evaluation of the PathFinder model on the AD cohort. <bold>(A)</bold> The detailed evaluation metrics on test dataset from all runs. <bold>(B)</bold> The comparison of the average differential expression level of top paths sorted by PathFinder during the training. The top 200 paths have higher differential expression level than others for all three cell types. <bold>(C)</bold> The learned path scores of PathFinder on different runs. All paths are ranked by the average score across all runs.</p>
</caption>
<graphic xlink:href="fncel-18-1369242-g002.tif"/>
</fig>
<p>As can be seen, the PathFinder can successfully classify the majority of cells in the test dataset into the correct condition. This means that, after training, the model learned the most important difference between the two conditions from a huge gene expression profile. Such differences can be reflected in the important score of each path, as the final prediction is made based on the different predefined paths. Among all results, the standard deviation of the metrics for TAFE4_ast is much larger than the other two cell types. We speculate that this discrepancy is caused by the limited number of cell samples in the TAFE4_ast group, which makes the model easily overfit to the training data.</p>
<p>Then, we evaluate the learned path score from each group. For each cell group, we first average the learned path score from five repeated runs to get the final path score. We average the absolute fold-change level of all genes within each path to get an average differential expression level for each path. Then, we compare the top 200 selected paths from the results of the PathFinder model to the remaining paths. The results are shown in <xref ref-type="fig" rid="fig2">Figure 2B</xref>. We can see that, for all three different cell types, the selected top 200 paths from PathFinder have a much higher average differential expression level compared to the remaining paths. The results indicate that PathFinder is effective in ranking differential expressed paths through the training. This can be attributed to two objective functions used in PathFinder. First, by minimizing the classification loss, the model is forced to increase the score for paths that are useful for separating two different conditions. It is intuitive that paths with higher average differential expression levels are more helpful for the prediction. Second, by minimizing the regularization loss, the model tends to give a high score for paths with high prior weight, and the prior weight is positively related to the average differential expression level.</p>
<p>Then, we evaluate the robustness and stability of the PathFinder. Concretely, we want the final path score distribution (ranking) learned from PathFinder to be stable and robust even if we slightly alter the training data. Since we randomly split the whole dataset for each repeated run, we can directly compare the learned score for each run to achieve our goal. Therefore, we plot the learned score for all paths, and all runs with paths are sorted by the average score. The results are shown in <xref ref-type="fig" rid="fig2">Figure 2C</xref>. For all three cell types, the learned scores are very stable across different runs, as paths with higher ranks always have higher scores. This means that, even if we slightly alter the training dataset, the PathFinder model can still output almost the same top k paths. The results successfully demonstrate the robustness of the PathFinder model for extracting important paths and constructing intra-communication networks.</p>
<p>Finally, we further evaluate the effectiveness of PathFinder on intra-cell signaling networks using the human cirrhosis cohort. Specifically, we run PathFinder on endothelial, macrophages, and T cells. The procedure is the same as the AD cohort. The average evaluation metric on the test set can be found in <xref ref-type="supplementary-material" rid="SM1">Supplementary Table S2</xref> and the comparison of the average differential expression level of paths can be found in <xref ref-type="supplementary-material" rid="SM1">Supplementary Figures S1A,B</xref>.</p>
</sec>
<sec id="sec6">
<title>Core intra-cell signaling networks associated with the APOE4 genotype</title>
<p>In this section, we evaluate the intra-cell communication networks discovered by the PathFinder model. Particularly, we want to know whether the discovered networks can reveal the recent discovery of APOE4-driven AD or even indicate new findings. First, for all three cell types, the final networks are generated by first averaging the path score learned from five repeated runs and then ranking and selecting the top 300 paths from all paths to form the final networks. The generated networks for all three cell types are shown in <xref ref-type="fig" rid="fig3">Figure 3</xref>. Then, we perform the enrichment analysis on all generated networks using KEGG signaling pathways and gene ontology (GO) terms. The enrichment results are shown in <xref ref-type="fig" rid="fig4">Figure 4A</xref>. Based on the results, we find several key factors that are important to the development of APOE4-driven AD.</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>Intra-cell communication networks discovered by the PathFinder model for the AD cohort. <bold>(A)</bold> Excitatory neurons; <bold>(B)</bold> Microglia; <bold>(C)</bold> Astrocyte.</p>
</caption>
<graphic xlink:href="fncel-18-1369242-g003.tif"/>
</fig>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p>Analyses of the results. <bold>(A)</bold> KEGG and GO enrichment analyses on all discovered intra-networks. <bold>(B)</bold> Differential expression analysis. <bold>(C)</bold> Inter-cell communication networks. All ligands are from DEGs of the ligand cells. Receptors are marked as blue.</p>
</caption>
<graphic xlink:href="fncel-18-1369242-g004.tif"/>
</fig>
<sec id="sec7">
<title>Neuron inflammation</title>
<p>Numerous studies have shown that inflammation is highly activated and plays a key role in the progress of AD (<xref ref-type="bibr" rid="ref30">Rogers et al., 1996</xref>; <xref ref-type="bibr" rid="ref1">Akiyama et al., 2000</xref>; <xref ref-type="bibr" rid="ref17">Halliday et al., 2000</xref>; <xref ref-type="bibr" rid="ref27">Mathys et al., 2019</xref>). From the enrichment results, we can see that many inflammation-related pathways/GO terms are enriched across multiple cell types. For example, <italic>cytokine-mediated signaling pathway, cellular response to cytokine stimulus,</italic> and <italic>inflammatory mediator regulation of TRP channels</italic>. This result aligns with the findings of previous studies and further confirms that the existence of APOE4 in the astrocyte stimulates the inflammatory response. More specifically, several genes related to neuron inflammation are identified by PathFinder across multiple cell types. STAT1 and STAT3 are identified as hub genes connected to multiple targets in both the network of neurons and microglia. It has been shown that STAT1 plays a key role in regulating inflammatory responses and cellular death (<xref ref-type="bibr" rid="ref21">Hu et al., 2002</xref>; <xref ref-type="bibr" rid="ref6">Butturini et al., 2018</xref>). Moreover, the differential expression analysis (<xref ref-type="fig" rid="fig4">Figure 4B</xref>) reveals that STAT1 is highly differentially expressed in the TAFE4 group, which further confirms the important role of STAT1.</p>
</sec>
<sec id="sec8">
<title>Autophagy</title>
<p>In addition to inflammation, the <italic>Apoptotic</italic> and <italic>Apoptotic signaling pathways</italic> are enriched in the neuron and the microglia. Autophagy is a lysosome-dependent, homeostatic process, in which organelles and proteins are degraded and recycled into energy. Autophagy has been linked to Alzheimer&#x2019;s disease pathogenesis through its merger with the endosomal-lysosomal system, which has been shown to play a role in the formation of the latter amyloid-&#x03B2; plaques (<xref ref-type="bibr" rid="ref14">Funderburk et al., 2010</xref>). One hypothesis states that irregular autophagy stimulation results in increased amyloid-&#x03B2; production (<xref ref-type="bibr" rid="ref47">Yu et al., 2005</xref>). The existence of APOE4 may also affect the process of autophagy, leading to the accumulation of amyloid-&#x03B2; in the brain affected by AD. Particularly, CLU and FOXO1 genes are identified in the intra-network of microglia and astrocytes. CLU is one of the top AD candidate genes. Some study shows that it is a causal gene of AD-affected hippocampal connectivity (<xref ref-type="bibr" rid="ref49">Zhang et al., 2015</xref>). Moreover, it is shown that CLU protein interacts with A&#x03B2;, reduces its aggregation, and protects against its toxic effects (<xref ref-type="bibr" rid="ref2">Beeg et al., 2016</xref>). Many studies have shown that FOXO1 induces autophagy in cardiomyocytes and cancer cells. FOXO1 has been identified as a gene that encodes for a transcription factor involved in modulating autophagy in neurons (<xref ref-type="bibr" rid="ref42">Xu et al., 2011</xref>).</p>
</sec>
<sec id="sec9">
<title>Lipid transportation</title>
<p><italic>The regulation of lipid metabolic process and cellular response to lipids</italic> are enriched in the intra-communication network of all three cell types. The enriched genes included NR1D1, EGR1, and BRCA1. It has been proved that APOE4 is involved in the lipid transportation and metabolism (<xref ref-type="bibr" rid="ref35">Tindale et al., 2017</xref>). The existence of APOE4 in the astrocyte may disturb the brain lipid composition and thus affect the blood&#x2013;brain barrier (BBB) function (<xref ref-type="bibr" rid="ref9">Chew et al., 2020</xref>). All these results confirm the influence of APOE4 in the progress of AD and the dysfunction and death of the neuron.</p>
</sec>
<sec id="sec10">
<title>JAK-STAT signaling pathway</title>
<p>In the intra-communication network of the astrocyte, the receptor signaling pathway via JAK&#x2013;STAT is enriched with the corresponding gene: STAT3, SOCS3, HMGA2, and STAT1. The JAK&#x2013;STAT signaling pathway has been reported to be the inducer of astrocyte reactivity (<xref ref-type="bibr" rid="ref3">Ben Haim et al., 2015</xref>). The enrichment of the pathway indicates that the existence of APOE4 in astrocytes can influence the function of the JAK&#x2013;STAT signaling pathway, and the pathway reversely affects the activity of the astrocyte.</p>
</sec>
</sec>
<sec id="sec11">
<title>Evaluation of the intra-cell signaling networks on human cirrhosis</title>
<p>In this section, we further evaluate the intra-cell signaling networks on human cirrhosis on endotheilal, marcrophages, and T cells. The network extraction procedure is the same as the AD cohort. The gene expression and the pathway enrichment analysis result are shown in <xref ref-type="fig" rid="fig5">Figure 5</xref>. The final intra-networks for each cell type are shown in <xref ref-type="fig" rid="fig6">Figure 6</xref>. Before the analysis, we compare the extracted intra-cell network of cirrhosis with that obtained from the AD cohort. We merge the genes from all three cell types together for AD and cirrhosis separately and then compare the common genes from both cohorts. There are 269 genes from cirrhosis and 110 genes from AD. However, there are only 14 common genes, which demonstrate that PathFinder is disease- and expression-specific. We further explore the networks identified by the PathFinder model and their relationship with cirrhosis.</p>
<fig position="float" id="fig5">
<label>Figure 5</label>
<caption>
<p>Analysis of the results for the cirrhosis cohort. <bold>(A)</bold> Differential expression analysis for all three cell types. <bold>(B)</bold> KEGG pathway enrichment analysis using the intra-cell networks discovered by PathFinder.</p>
</caption>
<graphic xlink:href="fncel-18-1369242-g005.tif"/>
</fig>
<fig position="float" id="fig6">
<label>Figure 6</label>
<caption>
<p>Intra-cell communication networks discovered by the PathFinder model for the human cirrhosis cohort. <bold>(A)</bold> Endothelial; <bold>(B)</bold> Macrophages; <bold>(C)</bold> T cell.</p>
</caption>
<graphic xlink:href="fncel-18-1369242-g006.tif"/>
</fig>
<sec id="sec12">
<title>The role of immune cells in liver diseases</title>
<p>Immune cells and various signaling pathways play an important role in the pathogenesis of liver diseases. Gene CCR9 is activated in the intra-cell signaling network of both endothelial and T cells. Studies have found that, in a mouse model of NASH, the CCR9/CCL25 axis promotes the recruitment of macrophages and the formation of fibrosis, providing a new potential therapeutic target for NASH (<xref ref-type="bibr" rid="ref28">Morikawa et al., 2021</xref>). On the other hand, liver NKT cells accumulate in a CXCR6-dependent manner early after injury, exacerbating the inflammatory response and promoting the progression of liver fibrosis, suggesting that the CXCR6/CXCL16 pathway may be an effective target for the treatment of liver fibrosis (<xref ref-type="bibr" rid="ref41">Wehr et al., 2013</xref>). CXCR6 is discovered by PathFinder for the intra-cell signaling network of both endothelial and macrophages, which further confirms it. Additionally, &#x03B2;-arrestin1 (ARRB1) activated at the signaling network of all three cell types was reported to interact with pro-GDF15, promoting its cleavage and maturation in the Golgi apparatus, and the absence of ARRB1 significantly exacerbates hepatic steatosis, fibrosis, and inflammation (<xref ref-type="bibr" rid="ref50">Zhang et al., 2020</xref>).</p>
</sec>
<sec id="sec13">
<title>Liver fibrosis and its reversibility</title>
<p>The development of liver fibrosis is a complex and potentially reversible process. In its early stages, liver fibrosis may not immediately present severe symptoms but can eventually progress to cirrhosis and affect multiple organs. CREB is a highly activated gene discovered by PathFinder. Research has found that CREB, a molecule downstream of the cAMP signaling pathway, can serve as a therapeutic target for fibrosis (<xref ref-type="bibr" rid="ref26">Li et al., 2019</xref>). Furthermore, insulin-like growth factor 1 (IGF1) and its receptor IGF1R play a crucial role in liver health and function, primarily expressed in the liver tissue. Studies on liver fibrosis have revealed the core role of the IGF1/IGF1R signaling system in controlling the liver fibrosis process (<xref ref-type="bibr" rid="ref15">Gui et al., 2023</xref>). In the intra-cell signaling network of all three cell types identified by PathFinder, IGF1R is activated and further triggers target GNLY and HBEGF through FGFR3. Although there is not enough literature discussing their relationship with cirrhosis, exploiting the molecular mechanisms and functionality may provide new insights into studying cirrhosis and be helpful in developing more effective treatments to solve liver disease problems.</p>
</sec>
<sec id="sec14">
<title>Liver disease transition process</title>
<p>In the intra-cell signaling networks identified by PathFinder, genes EGR1 and ERBB3 are highly activated. In the liver disease transition process, chronic hepatitis and cirrhosis are major factors leading to the majority of hepatocellular carcinomas (HCC). Concurrently, non-alcoholic fatty liver disease (NAFLD) has become a global epidemic, not only associated with the development of metabolic syndrome but also regarded as a pathway leading to severe liver diseases such as cirrhosis and hepatocellular carcinoma. In this transition process, EGR1 has been discovered as a key regulator of NAFLD, presenting potential as a potent target for intervening in NAFLD (<xref ref-type="bibr" rid="ref16">Guo et al., 2023</xref>). Additionally, research has identified ERBB3 as a potential serum marker for early HCC in patients with chronic hepatitis and cirrhosis (<xref ref-type="bibr" rid="ref29">Nasiri et al., 2020</xref>). A deeper understanding of the mechanisms underlying liver disease transition will provide insights into therapeutic strategies for related diseases.</p>
</sec>
</sec>
<sec id="sec15">
<title>Core multi-cell inter-cell communication networks associated with the APOE4 genotype</title>
<p>To further understand the complex signaling flow and mechanism behind the APOE4 and AD pathology, we further generate inter-cell communication networks between three different cell types using PathFinder, as shown in <xref ref-type="fig" rid="fig4">Figure 4C</xref>. First, we can see that, compared to astrocytes, microglia have much more interactions with neurons. This may indicate that the existence of APOE4 in the astrocyte may activate the functionality of microglia and then cause abnormal activities in the neurons. Among all interactions, several interesting interactions appealed to the result. First, the MIF secreted by the astrocyte interacts with the EGFR in the neuron and follows downstream signaling. The MIF is a well-known proinflammatory cytokine that promotes the production of other immune mediators. Increased expression of MIF can contribute to chronic neuroinflammation and neurodegeneration (<xref ref-type="bibr" rid="ref34">Tavassoly et al., 2020</xref>). EGFR is a potential target for treating AD-induced memory loss (<xref ref-type="bibr" rid="ref52">Zhu et al., 2011</xref>; <xref ref-type="bibr" rid="ref39">Wang et al., 2012</xref>). The increased expression level of MIF could be the signature of activated astrocytes, and the MIF further triggers the expression of EGFR and the subsequent downstream network in the neuron, which contributes to neuron inflammation and degeneration.</p>
<p>In addition to MIF in astrocytes, many ligands for receptor EGFR are also identified in microglia, including ICAM1, IGF1, HLA-A, CNTN2, PCDH15, FLRT2, TAC3, PTN, and PTPRC. The downregulation of PTPRC is reported to contribute to the overproduction of A&#x03B2; and neuron loss (<xref ref-type="bibr" rid="ref4">Brito-Moreira et al., 2017</xref>). Another interaction is the <italic>NLGN1</italic> gene which is expressed in neurons that interact with the <italic>NRXN1</italic> gene in the astrocyte. The amyloid-&#x03B2; oligomers are synaptotoxins that build up in the brains of patients and are thought to contribute to the memory impairment in AD. It has been shown that the interaction of neurexins (Nrxs) and neuroligins (NLs) is critical for synapse structure, stability, and function (<xref ref-type="bibr" rid="ref36">Tyzack et al., 2017</xref>). The dysregulation of the interaction between Nrxs and NLs may contribute to the formation of amyloid-&#x03B2; oligomer. The <italic>EFNA5</italic> in the neuron is upregulated in the neuron and interacts with <italic>EPHB1</italic> and downstream <italic>STAT3</italic> signaling in the astrocyte. This interaction is closely related to the ephrin-B1-mediated stimulation. The analysis has shown that the ephrin-B1-mediated stimulation induces a protective and anti-inflammatory signature in astrocytes and can be regarded as &#x201C;help-me&#x201D; signal of neurons that failed in early amyotrophic lateral sclerosis (ALS) (<xref ref-type="bibr" rid="ref25">Lambert et al., 2018</xref>). Such signals could also play an important role in triggering inflammation and neuron degeneration in the CNS system.</p>
</sec>
</sec>
<sec id="sec16">
<title>Conclusion and discussion</title>
<p>In this study, we propose PathFinder, which is the first deep-learning model with a graph transformer that can be used to extract both intra- and inter-cell communication networks using scRNA-seq data. Through a case study using an AD scRNA-seq dataset from mice, we evaluate the effectiveness of PathFinder from multiple perspectives. First, the quantitative analysis confirms that PathFinder performs well in separating cells from different conditions by leveraging the difference of expression patterns in the signaling paths. Furthermore, the learned path score is robust and consistent in repeat runs. We further evaluate the correctness of extracted networks through extensive literature searches. The resulting network aligned well with many recent discoveries on the AD pathology, which further proved the effectiveness of the proposed PathFinder. Additionally, the current version of PathFinder has a few potential limitations to be improved in the future studies. First, it requires many samples in training to produce reasonable results. Second, it relies on the pre-defined paths from the database to learn and extract meaningful patterns and is unable to discover new signaling flows. Third, currently, it is hard to validate the discovered signaling pathway quantitatively as there is no existing benchmark for conducting this process. All these limitations warrant further investigation. For example, we can construct a common benchmark to evaluate the performance of all signaling network inference methods quantitatively. We will also improve the model in our future work.</p>
</sec>
<sec sec-type="methods" id="sec17">
<title>Methodology</title>
<sec id="sec18">
<title>Gene-gene interaction database collection and processing</title>
<p>To construct the gene&#x2013;gene interaction database, the raw interaction data were collected from NicheNet software (<xref ref-type="bibr" rid="ref5">Browaeys et al., 2020</xref>). The raw interaction data were divided into three types: ligand-receptor network, signaling network, and gene-regulation network. The original network contained 12,019 interactions/1,430 genes, 12,780 interactions/8,278 genes, and 11,231 interactions/8,450 genes, respectively. To construct the intra- and inter-network database, the data were further processed by the following steps.</p>
<p>First, ligands and receptors were collected by gathering the source and target of the ligand-receptor network. There were a total of 688 ligands and 857 receptors. Then, interactions in the ligand-receptor network were divided into two types. If one interaction exists in both directions in the database, we labeled it as bidirectional. Otherwise, we labeled it as directional. After processing, there were 11,880 directional interactions and 139 bidirectional interactions.</p>
<p>The gene-regulation network was processed as follows. First, 1,639 transcriptional factors (TFs) were collected (<xref ref-type="bibr" rid="ref12">Fan et al., 2021</xref>). For convenience, TFs that exist in either the ligand or receptor list were removed. Finally, 1,632 TFs were collected. Then, three different types of regulation were collected in the gene-regulation interaction network, which are ligand regulation, receptor regulation, and TF regulation. To label each interaction into one of three types, all the interactions in the network were removed if the source gene was not in the ligand, receptor, or TF list. Then, the interactions were labeled based on the type of source (e.g., if the source of interaction is a receptor, we label it as receptor regulation). After processing, there were 1,329 ligand-regulation interactions, 272 receptor-regulation interactions, and 6,706 TF-regulation interactions.</p>
<p>Finally, the signaling network was processed as follows. First, all interactions were removed if they existed in either the ligand-receptor or the gene-regulation network. Then, the interactions were further divided into receptor-TF, receptor-signaling, signaling-TF, and signaling-signaling. To be more specific, if the source of interaction is in the receptor list and the target of interaction is in the TF list, the interaction was labeled as receptor-TF. If the source of interaction is in the receptor list and the target is not in the tTF list, the interaction was labeled as receptor-signaling. If the source of interaction is not in the receptor list and the target of interaction is in the TF list, the interaction was labeled as signaling-TF. If neither the source nor target of interaction is in the TF and receptor lists, the interaction was labeled as signaling-signaling. The interactions that cannot be classified into one of the specified groups were removed for convenience. Finally, there are 31 receptor-TF interactions, 524 receptor-signaling interactions, 975 signaling-TF interactions, and 9,745 signaling-signaling interactions.</p>
</sec>
<sec id="sec19">
<title>Notations and terminologies</title>
<sec id="sec20">
<title>Terminologies</title>
<p>An embedding or a representation is a vector of size <inline-formula>
<mml:math id="M3">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mi>d</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> that represents an entity, such as a gene or a path. The input embedding is the embedding input to the model, the hidden embedding is the embedding output by the middle layers of the model, and the output embedding is the embedding output by the model. With the final output embedding for an entity, we can do the classification or regression by passing it to a logistic regression or linear regression layer. An encoding is a function that transforms an entity to the embedding. Typically, the goal of a deep learning or machine learning model is to learn a model that can take the input embedding of the entity we want to predict and output the output embedding which is more reliable and powerful for the prediction. A single neural network layer will contain one or multiple trainable weight matrices. These matrices is responsible for transforming the input embedding into the output embedding. They will be updated and refined by the backward propagation and gradient descent used in the neural network.</p>
</sec>
<sec id="sec21">
<title>Notations</title>
<p>A gene graph is denoted as <inline-formula>
<mml:math id="M4">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mi mathvariant="normal">,</mml:mi>
<mml:mi>E</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math id="M5">
<mml:mi>V</mml:mi>
</mml:math>
</inline-formula> is the set of gene nodes with <inline-formula>
<mml:math id="M6">
<mml:mrow>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mi>V</mml:mi>
<mml:mo>|</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math id="M7">
<mml:mi>E</mml:mi>
</mml:math>
</inline-formula> is the set of edges and <inline-formula>
<mml:math id="M8">
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mo>&#x2286;</mml:mo>
<mml:mi>V</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The node embedding set is denoted by <inline-formula>
<mml:math id="M9">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mi mathvariant="normal">,</mml:mi>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mi mathvariant="normal">,</mml:mi>
<mml:mo>&#x2026;</mml:mo>
<mml:mi mathvariant="normal">,</mml:mi>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math id="M10">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>u</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mi>d</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the embedding vector of the node <inline-formula>
<mml:math id="M11">
<mml:mi>u</mml:mi>
</mml:math>
</inline-formula>. The graph structure is defined by an adjacency matrix <inline-formula>
<mml:math id="M12">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mi mathvariant="normal">,</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math id="M13">
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> indicate there is an edge from the node <inline-formula>
<mml:math id="M14">
<mml:mi>u</mml:mi>
</mml:math>
</inline-formula> to node <inline-formula>
<mml:math id="M15">
<mml:mi>v</mml:mi>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math id="M16">
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> otherwise. Furthermore, a set of paths sampled from a graph is denoted as <inline-formula>
<mml:math id="M17">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mi mathvariant="normal">,</mml:mi>
<mml:mspace width="0.25em"/>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mi mathvariant="normal">,</mml:mi>
<mml:mo>&#x2026;</mml:mo>
<mml:mi mathvariant="normal">,</mml:mi>
<mml:mspace width="0.25em"/>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math id="M18">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the <inline-formula>
<mml:math id="M19">
<mml:mi>m</mml:mi>
</mml:math>
</inline-formula>-th path, which is a list to store the nodes of the path in order. Paths can have different lengths, and we denote the length of path <inline-formula>
<mml:math id="M20">
<mml:mi>m</mml:mi>
</mml:math>
</inline-formula> be <inline-formula>
<mml:math id="M21">
<mml:mrow>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</sec>
</sec>
<sec id="sec22">
<title>Preliminary of transformer and Graphormer</title>
<p>The transformer is a powerful architecture in the deep learning field. It consists of multiple transformer layers. Each transformer layer has two parts: a multi-head self-attention and a point-wise feed-forward network (FFN) with residual connection applied between each part. Let <inline-formula>
<mml:math id="M22">
<mml:mrow>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> be the embedding of nodes in layer <inline-formula>
<mml:math id="M23">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula>
<mml:math id="M24">
<mml:mrow>
<mml:msubsup>
<mml:mi>H</mml:mi>
<mml:mi>u</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the embedding of the node <inline-formula>
<mml:math id="M25">
<mml:mi>u</mml:mi>
</mml:math>
</inline-formula> in layer <inline-formula>
<mml:math id="M26">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, the computation of multi-head self-attention is:</p>
<disp-formula id="E1">
<mml:math id="M27">
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mi>K</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>V</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="E2">
<mml:math id="M28">
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>A</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mi mathvariant="normal">,</mml:mi>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mi mathvariant="normal">,</mml:mi>
<mml:msup>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mi>S</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>M</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msup>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="E3">
<mml:math id="M29">
<mml:mrow>
<mml:msup>
<mml:mi>O</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mi>C</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mi mathvariant="normal">,</mml:mi>
<mml:mo>&#x2026;</mml:mo>
<mml:mi mathvariant="normal">,</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math id="M30">
<mml:mrow>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mi>K</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x00D7;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula>
<mml:math id="M31">
<mml:mrow>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x00D7;</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> are all trainable weight matrix, <inline-formula>
<mml:math id="M32">
<mml:mi>h</mml:mi>
</mml:math>
</inline-formula> is the number of heads, <inline-formula>
<mml:math id="M33">
<mml:mrow>
<mml:msup>
<mml:mi>O</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the output from the multi-head self-attention in layer <inline-formula>
<mml:math id="M34">
<mml:mi>l</mml:mi>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math id="M35">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the concatenation function to combine multiple vectors into one single large vector. For simplicity, we let <inline-formula>
<mml:math id="M36">
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. The output <inline-formula>
<mml:math id="M37">
<mml:mrow>
<mml:msup>
<mml:mi>O</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> will then be fed into a point-wise feed-forward network. The computation of the point-wise feed-forward network is:</p>
<disp-formula id="E4">
<mml:math id="M38">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>u</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mi>b</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>l</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mi>b</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math id="M39">
<mml:mrow>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x00D7;</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x00D7;</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math id="M40">
<mml:mrow>
<mml:msubsup>
<mml:mi>b</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula>
<mml:math id="M41">
<mml:mrow>
<mml:msubsup>
<mml:mi>b</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> are all trainable weight matrix and bias. Notice that here we slightly modify the hidden size of the feed-forward network of the original model. The embedding of each node <inline-formula>
<mml:math id="M42">
<mml:mrow>
<mml:msubsup>
<mml:mi>O</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> will be input into this FFN for further processing.</p>
<p>However, the vanilla transformer cannot be used directly on the graph structure data as it lacks a critical part for encoding the topological information into the model. To deal with this issue, Graphormer proposed several novel encodings into the model. Specifically, they introduced centrality encoding, spatial encoding, and edge encoding. The centrality encoding is used to embed the graph centrality information into the model. Given the input data <inline-formula>
<mml:math id="M43">
<mml:mi>X</mml:mi>
</mml:math>
</inline-formula>, the computation of centrality encoding is:</p>
<disp-formula id="E5">
<mml:math id="M44">
<mml:mrow>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mn>0</mml:mn>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mi>X</mml:mi>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>Z</mml:mi>
<mml:mo>&#x2212;</mml:mo>
</mml:msup>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:msup>
<mml:mi>g</mml:mi>
<mml:mo>&#x2212;</mml:mo>
</mml:msup>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>G</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>Z</mml:mi>
<mml:mo>+</mml:mo>
</mml:msup>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:msup>
<mml:mi>g</mml:mi>
<mml:mo>+</mml:mo>
</mml:msup>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>G</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where the <inline-formula>
<mml:math id="M45">
<mml:mrow>
<mml:msup>
<mml:mi>Z</mml:mi>
<mml:mo>&#x2212;</mml:mo>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>Z</mml:mi>
<mml:mo>+</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> are all trainable embedding vectors and <inline-formula>
<mml:math id="M46">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:msup>
<mml:mi>g</mml:mi>
<mml:mo>&#x2212;</mml:mo>
</mml:msup>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>G</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>,<inline-formula>
<mml:math id="M47">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:msup>
<mml:mi>g</mml:mi>
<mml:mo>+</mml:mo>
</mml:msup>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>G</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>G</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mi>n</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> are the function to compute the in-degree and out-degree of each node in the graph <inline-formula>
<mml:math id="M48">
<mml:mi>G</mml:mi>
</mml:math>
</inline-formula>. The spatial and edge encoding is used to encode the graph structure into the model. With the spatial and edge encoding, the self-attention is revised as:</p>
<disp-formula id="E6">
<mml:math id="M49">
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>S</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>M</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mi>&#x03D5;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>G</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msup>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math id="M50">
<mml:mrow>
<mml:msup>
<mml:mi>b</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is trainable embedding vectors to encode the spatial information at head <inline-formula>
<mml:math id="M51">
<mml:mi>i</mml:mi>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math id="M52">
<mml:mrow>
<mml:mi>&#x03D5;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>G</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>G</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mspace width="thickmathspace"/>
</mml:mrow>
</mml:math>
</inline-formula>is the function to compute the shortest path length between each two nodes. If two nodes are not connected, a special value will be used. <inline-formula>
<mml:math id="M53">
<mml:mrow>
<mml:msup>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the edge embedding and <inline-formula>
<mml:math id="M54">
<mml:mrow>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>N</mml:mi>
</mml:mfrac>
<mml:munderover>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mi>w</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math id="M55">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the edge feature of the <inline-formula>
<mml:math id="M56">
<mml:mi>n</mml:mi>
</mml:math>
</inline-formula>-th edge in the shortest path between node <inline-formula>
<mml:math id="M57">
<mml:mi>u</mml:mi>
</mml:math>
</inline-formula> and node <inline-formula>
<mml:math id="M58">
<mml:mi>v</mml:mi>
</mml:math>
</inline-formula> and the <inline-formula>
<mml:math id="M59">
<mml:mrow>
<mml:msubsup>
<mml:mi>w</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is trainable weight vector of <inline-formula>
<mml:math id="M60">
<mml:mi>n</mml:mi>
</mml:math>
</inline-formula>-th edge of head <inline-formula>
<mml:math id="M61">
<mml:mi>i</mml:mi>
</mml:math>
</inline-formula>. Note that both the spatial and edge encodings are unique across different layers.</p>
</sec>
<sec id="sec23">
<title>Architecture of PathFinder</title>
<p>The PathFinder model consists of three components, namely, the node encoder, path encoder, and graph encoder. The overall architecture of the PathFinder model is shown in <xref ref-type="fig" rid="fig1">Figure 1</xref>, lower. The rationale behind PathFinder is that, if a model can identify disease cells from normal cells, it must learn useful knowledge from the gene expression profile to help it make that prediction. In PathFinder, we introduce the path encoder to let the model make the prediction based on the importance of the signaling paths with their corresponding expression. In this way, if the model can make a reasonable prediction, it must have the ability to distinguish differential expressed signaling paths from the other paths, and that is exactly what we are looking for. Furthermore, since the paths are pre-defined from the physical interaction database in a biologically meaningful way, the extracted signaling paths are inherently biologically meaningful. PathFinder can be seen as a simulator to simulate the signaling path in the cell and use it to make the prediction. Below, we discuss each component in detail.</p>
<sec id="sec24">
<title>Node encoder</title>
<p>The architecture of the node encoder is similar to the Graphormer, which stacks <inline-formula>
<mml:math id="M62">
<mml:mi>L</mml:mi>
</mml:math>
</inline-formula> transformer layer with centrality encoding, spatial encoding, and edge encoding. The input to PathFinder is the expression value of each gene in a cell sample. However, we made several modifications to the original architecture. First, the hidden size in the point-wise feed-forward network is all <inline-formula>
<mml:math id="M63">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> in both two layers for simplicity. Second, the edge encoding in PathFinder is modified. In the original Graphormer, the edge encoding is computed by all the edges in the shortest path between two nodes, which can capture long-range information in the graph. However, the localized feature in the graph will be smoothed in such a manner. Instead, PathFinder aims for the node embedding learned from the node encoder to focus on the localized information in the graph. Therefore, direct edge encoding is proposed. The direct edge encoding is computed by:</p>
<disp-formula id="E7">
<mml:math id="M64">
<mml:mrow>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msup>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math id="M65">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the edge feature of the edge between node <inline-formula>
<mml:math id="M66">
<mml:mi>u</mml:mi>
</mml:math>
</inline-formula> and node <inline-formula>
<mml:math id="M67">
<mml:mi>v</mml:mi>
</mml:math>
</inline-formula>. If there is not an edge between two nodes, the direct edge encoding is set to a special vector for simplicity. By doing this, the node encoder becomes adept at learning node embedding that capture localized information. Finally, the spatial encoding is also revised in PathFinder. Since here the graph structure is identical for all samples and the node order invariant is automatically held, we can learn a specific spatial encoding for each pair of two nodes. Therefore, we design the node index encoding in the PathFinder model. The node index encoding is not computed from the length of the shortest path between each pair of nodes but is directly learned for each pair of two genes, namely, for each pair of two genes, a unique encoding is learned for each head in each layer of the node encoder.</p>
</sec>
<sec id="sec25">
<title>Path encoder</title>
<p>Furthermore, the path encoder is responsible for learning gene signaling path embedding, utilizing the node embedding in the graph and the pre-defined path list of the graph. The details of the pre-defined path list are illustrated below. Suppose there are <inline-formula>
<mml:math id="M68">
<mml:mi>p</mml:mi>
</mml:math>
</inline-formula> unique paths in the path list <inline-formula>
<mml:math id="M69">
<mml:mi>P</mml:mi>
</mml:math>
</inline-formula>, where the length of the <inline-formula>
<mml:math id="M70">
<mml:mi>m</mml:mi>
</mml:math>
</inline-formula>-th path is <inline-formula>
<mml:math id="M71">
<mml:mrow>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and the total number of nodes in the path list is <inline-formula>
<mml:math id="M72">
<mml:mi>k</mml:mi>
</mml:math>
</inline-formula> (count repeated nodes in different paths). Denote the node embedding output from the layer <inline-formula>
<mml:math id="M73">
<mml:mi>l</mml:mi>
</mml:math>
</inline-formula> as <inline-formula>
<mml:math id="M74">
<mml:mrow>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, we first learn a path-specific embedding through:</p>
<disp-formula id="E8">
<mml:math id="M75">
<mml:mrow>
<mml:msubsup>
<mml:mi>U</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mi>b</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math id="M76">
<mml:mrow>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math id="M77">
<mml:mrow>
<mml:msubsup>
<mml:mi>b</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mi>u</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> are all trainable weight matrix, <inline-formula>
<mml:math id="M78">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a function to reorder and scatter the node in the graph into the order of the pre-defined path list. For example, suppose there are five embedding genes output from the node encoder. That is <inline-formula>
<mml:math id="M79">
<mml:mrow>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mn>5</mml:mn>
<mml:mo>&#x00D7;</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. We label each gene from 1 to 5. Suppose there are two paths. The first path is 1-&#x2009;&#x003E;&#x2009;3-&#x2009;&#x003E;&#x2009;4. The second path is 2-&#x2009;&#x003E;&#x2009;3-&#x2009;&#x003E;&#x2009;4-&#x2009;&#x003E;&#x2009;5. Then, the <inline-formula>
<mml:math id="M80">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> will output a new matrix with the size of 7 and each row represents a gene in a path. For instance, the third row is <inline-formula>
<mml:math id="M81">
<mml:mrow>
<mml:msubsup>
<mml:mi>H</mml:mi>
<mml:mn>4</mml:mn>
<mml:mi>l</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> since it is the third gene in the first path. <inline-formula>
<mml:math id="M82">
<mml:mrow>
<mml:msup>
<mml:mi>U</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the learned path-specific embedding. For convenience, we denote <inline-formula>
<mml:math id="M83">
<mml:mrow>
<mml:msubsup>
<mml:mi>U</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> as the embedding of <inline-formula>
<mml:math id="M84">
<mml:mi>i</mml:mi>
</mml:math>
</inline-formula>-th node in the <inline-formula>
<mml:math id="M85">
<mml:mi>m</mml:mi>
</mml:math>
</inline-formula>-th path. Then, path positional and path edge encodings are introduced to encode additional information for all paths. Let <inline-formula>
<mml:math id="M86">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>U</mml:mi>
<mml:mo>&#x00AF;</mml:mo>
</mml:mover>
<mml:mi>l</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> be the result embedding after the special encodings. We have:</p>
<disp-formula id="E9">
<mml:math id="M87">
<mml:mrow>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>U</mml:mi>
<mml:mo>&#x00AF;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:msubsup>
<mml:mi>U</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math id="M88">
<mml:mrow>
<mml:msubsup>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>l</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the learnable positional encoding vector and its value only depends on the position <inline-formula>
<mml:math id="M89">
<mml:mi>i</mml:mi>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math id="M90">
<mml:mrow>
<mml:msubsup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the learnable edge encoding to encode the edge type between <inline-formula>
<mml:math id="M91">
<mml:mi>i</mml:mi>
</mml:math>
</inline-formula>-th node and <inline-formula>
<mml:math id="M92">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>-th node. Then, the score of each node within the path is computed by:</p>
<disp-formula id="E10">
<mml:math id="M93">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mi>tanh</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>U</mml:mi>
<mml:mo>&#x00AF;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="E11">
<mml:math id="M94">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>S</mml:mi>
<mml:mo>&#x00AF;</mml:mo>
</mml:mover>
<mml:mi>l</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mi>S</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>M</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math id="M95">
<mml:mrow>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math id="M96">
<mml:mrow>
<mml:msubsup>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math id="M97">
<mml:mrow>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula>
<mml:math id="M98">
<mml:mrow>
<mml:msubsup>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> are all trainable parameters. <inline-formula>
<mml:math id="M99">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the softmax function working within each path. The <inline-formula>
<mml:math id="M100">
<mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the final <inline-formula>
<mml:math id="M101">
<mml:mi>r</mml:mi>
</mml:math>
</inline-formula> set important score for each node in each path. We let <inline-formula>
<mml:math id="M102">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>u</mml:mi>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> for simplicity. After we obtain <inline-formula>
<mml:math id="M103">
<mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, the path embedding is computed by:</p>
<disp-formula id="E12">
<mml:math id="M104">
<mml:mrow>
<mml:msup>
<mml:mi>P</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>m</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
<mml:mo>&#x2217;</mml:mo>
<mml:msup>
<mml:mover accent="true">
<mml:mi>U</mml:mi>
<mml:mo>&#x00AF;</mml:mo>
</mml:mover>
<mml:mi>l</mml:mi>
</mml:msup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p><inline-formula>
<mml:math id="M105">
<mml:mo>&#x2217;</mml:mo>
</mml:math>
</inline-formula> is the point-wise product working on each set of important scores. That is, for each set of important scores, we do a point-wise product of that set of scores and <inline-formula>
<mml:math id="M106">
<mml:mrow>
<mml:msup>
<mml:mi>U</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, which results in total <inline-formula>
<mml:math id="M107">
<mml:mi>r</mml:mi>
</mml:math>
</inline-formula> sets. The <inline-formula>
<mml:math id="M108">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> function is the summation on each path. <inline-formula>
<mml:math id="M109">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the function to flatten the embedding of all sets. <inline-formula>
<mml:math id="M110">
<mml:mrow>
<mml:msup>
<mml:mi>P</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the final path embedding in the layer <inline-formula>
<mml:math id="M111">
<mml:mi>l</mml:mi>
</mml:math>
</inline-formula>.</p>
</sec>
<sec id="sec26">
<title>Graph encoder</title>
<p>In the original Graphormer, the graph embedding is learned by introducing a special node and letting it connect to all the nodes in the graph. After forwarding, the embedding of that special node is regarded as the graph embedding for the graph-level task. In PathFinder, our goal is to learn the graph embedding from the path embedding. Meanwhile, we aim to extract the important paths from the model after training it for the graph-level task. To simultaneously achieve both goals, the graph encoder is proposed. The graph encoder consists of two parts. The first part is a trainable path weight and the sigmoid function to assign each path with different scores. The second part is the jumping knowledge network to combine the graph embedding in each layer and compute the final embedding.</p>
<p>In PathFinder, the graph embedding is learned by integrating all the path embeddings from each layer, which requires an important score for each path. Normally, the score is computed based on one sample. However, such a score is not robust and may vary a lot even with a minor variation of the path embedding (<xref ref-type="bibr" rid="ref44">Xu et al., 2018</xref>; <xref ref-type="bibr" rid="ref8">Chen et al., 2019</xref>; <xref ref-type="bibr" rid="ref12">Fan et al., 2021</xref>). To avoid the issue and learn a robust important score across the whole dataset, the trainable path score <inline-formula>
<mml:math id="M112">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mi>p</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is introduced. <inline-formula>
<mml:math id="M113">
<mml:mi>M</mml:mi>
</mml:math>
</inline-formula> is identical to all samples and layers and learned through backpropagation. The path important score is computed by:</p>
<disp-formula id="E13">
<mml:math id="M114">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>S</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>M</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math id="M115">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mi>p</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the important score for each path. Then, the graph embedding of layer <inline-formula>
<mml:math id="M116">
<mml:mi>l</mml:mi>
</mml:math>
</inline-formula> is computed by:</p>
<disp-formula id="E14">
<mml:math id="M117">
<mml:mrow>
<mml:msup>
<mml:mi>g</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mi>I</mml:mi>
<mml:msup>
<mml:mi>P</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math id="M118">
<mml:mrow>
<mml:msup>
<mml:mi>g</mml:mi>
<mml:mi mathvariant="normal">l</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the graph embedding of layer <inline-formula>
<mml:math id="M119">
<mml:mi>l</mml:mi>
</mml:math>
</inline-formula>. The final step of the graph encoder is to integrate the graph embedding of each layer and learn a final embedding. Here, we utilize the idea of JumpingKnowledge network (<xref ref-type="bibr" rid="ref9001">Xu et al., 2018</xref>) and compute the final graph embedding by:</p>
<disp-formula id="E15">
<mml:math id="M120">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>M</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>x</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mi>g</mml:mi>
<mml:mn>1</mml:mn>
</mml:msup>
<mml:mi mathvariant="normal">,</mml:mi>
<mml:msup>
<mml:mi>g</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mi mathvariant="normal">,</mml:mi>
<mml:mo>&#x2026;</mml:mo>
<mml:mi mathvariant="normal">,</mml:mi>
<mml:msup>
<mml:mi>g</mml:mi>
<mml:mi>L</mml:mi>
</mml:msup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math id="M121">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>x</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the max pooling function and <inline-formula>
<mml:math id="M122">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the final graph embedding learned by PathFinder. Finally, the graph embedding is used to classify the cell sample into the corresponding condition (control/test). The prediction is a typical binary prediction computed by:</p>
<disp-formula id="E16">
<mml:math id="M123">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>S</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>M</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math id="M124">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x00D7;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the trainable projection matrix and <inline-formula>
<mml:math id="M125">
<mml:mi>p</mml:mi>
</mml:math>
</inline-formula> is the predicted distribution.</p>
</sec>
</sec>
<sec id="sec27">
<title>Training and regularization of PathFinder</title>
<p>To train the PathFinder model, the negative log-likelihood (NLL) loss is applied. Let the <inline-formula>
<mml:math id="M126">
<mml:mrow>
<mml:msubsup>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> be the predicted probability of the true condition of cell <inline-formula>
<mml:math id="M127">
<mml:mi>i</mml:mi>
</mml:math>
</inline-formula>, then the NLL loss is computed by:</p>
<disp-formula id="E17">
<mml:math id="M128">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:munderover>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where the <inline-formula>
<mml:math id="M129">
<mml:mi>N</mml:mi>
</mml:math>
</inline-formula> is the number of cells in the dataset. Meanwhile, to regularize the training of the model and learn biological meaningful paths from the model, the regularization term is introduced to the path score <inline-formula>
<mml:math id="M130">
<mml:mi>M</mml:mi>
</mml:math>
</inline-formula>. Intuitively, the path that has a higher total fold change should have a higher path score. Furthermore, we designed three different regularization terms to generate different important paths by introducing the prior path score. Specifically, these three regularizations are upregulated path, downregulated path, and differentially expressed path regularization. Let the <inline-formula>
<mml:math id="M131">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> be the log fold change of gene <inline-formula>
<mml:math id="M132">
<mml:mi>j</mml:mi>
</mml:math>
</inline-formula> in path <inline-formula>
<mml:math id="M133">
<mml:mi>m</mml:mi>
</mml:math>
</inline-formula>, then the prior path score is computed by:</p>
<disp-formula id="E18">
<mml:math id="M134">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mi>m</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mi>N</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mi>j</mml:mi>
</mml:munder>
<mml:mi>f</mml:mi>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mi>j</mml:mi>
<mml:mi>m</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="E19">
<mml:math id="M135">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mi>m</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mi>N</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mi>j</mml:mi>
</mml:munder>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>f</mml:mi>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="E20">
<mml:math id="M136">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>deg</mml:mi>
</mml:mrow>
<mml:mi>m</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mi>N</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mi>j</mml:mi>
</mml:munder>
<mml:mi mathvariant="normal">|</mml:mi>
<mml:mi>f</mml:mi>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
</mml:msubsup>
<mml:mi mathvariant="normal">|</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where the <inline-formula>
<mml:math id="M137">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mi>m</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math id="M138">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mi>m</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula>
<mml:math id="M139">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>deg</mml:mi>
</mml:mrow>
<mml:mi>m</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> are the prior path scores for upregulated, downregulated, and differential expressed regularization, respectively. <inline-formula>
<mml:math id="M140">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the min-max normalization across all paths. Suppose we use the upregulated prior score, the regularization loss is computed by:</p>
<disp-formula id="E21">
<mml:math id="M141">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mo>&#x2225;</mml:mo>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mi>m</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The final loss is:</p>
<disp-formula id="E22">
<mml:math id="M142">
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mi>&#x03B2;</mml:mi>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math id="M143">
<mml:mi>&#x03B2;</mml:mi>
</mml:math>
</inline-formula> is the weight of the regularization term.</p>
</sec>
<sec id="sec28">
<title>Predefined path list</title>
<p>To train the PathFinder model, the path list needs to be defined before the training. Given the collected gene&#x2013;gene interaction database and the input variable gene list, we designed several choices to generate a predefined path list. The first choice is the shortest path. For this choice, the shortest path between each pair of genes in the dataset will be computed and collected given the gene&#x2013;gene interaction network. The second choice is to generate all the possible paths that start from the receptor and end in the target, which can also be performed using the gene&#x2013;gene interaction database. To constrain the path, the minimum length of the path is set to be 3 unless the path is a receptor regulation interaction. The maximum length of the path is set to be 10.</p>
</sec>
<sec id="sec29">
<title>Experimental details</title>
<p>We conduct experiments to validate the effectiveness of PathFinder on TAFE_ex, TAFE_mic, and TAFE_ast cell sample datasets. For each dataset, we randomly split datasets into train/validation/test sets with a ratio of 0.7/0.1/0.2. We train the model using the train set and validate the performance of the model using the validation set. Finally, we save the model that achieves the best performance on the validation set and report the performance of the saved model on the test set. We use the area under the curve (AUC) as the performance metric for selecting the best model. We repeat experiments on each dataset five times (with a different random split applied to the dataset each time) and report the mean results and the standard deviation. The model and training hyperparameters are described as follows: We set the number of layers as 6 and the hidden size <inline-formula>
<mml:math id="M144">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> as 128. The number of heads and scores set <inline-formula>
<mml:math id="M145">
<mml:mi>r</mml:mi>
</mml:math>
</inline-formula> as 8. For each experiment, we set the number of training epochs as 30, the learning rate as 0.0005, the dropout rate as 0.1, the regularization weight <inline-formula>
<mml:math id="M146">
<mml:mi>&#x03B2;</mml:mi>
</mml:math>
</inline-formula> as 0.1 for TAFE_ex and TAFE_mic, and 1.0 for TAFE_ast.</p>
</sec>
<sec id="sec30">
<title>Generation of the intra- and inter-cell communication network</title>
<p>After the PathFinder model is trained, the generation of an intra-cell communication network is straightforward. Concretely, we first average the path weight learned from five repeated experiments to get the final path weights. Furthermore, the top <inline-formula>
<mml:math id="M147">
<mml:mi>K</mml:mi>
</mml:math>
</inline-formula> paths are extracted and combined to generate the intra-cell communication network. The generation of the inter-cell communication network is as follows. Let the cell that provides ligands be the ligand cell and the cell providing receptors be the receptor cell. The intra-cell communication network is first generated. Then, the ligands of the ligand cell and receptors of the receptor cell will be extracted from their respective intra-networks. Then, the ligand&#x2013;receptor pairs are selected given the ligand&#x2013;receptor database. Finally, the kept pairs will be linked and the inter-network is generated.</p>
</sec>
</sec>
<sec sec-type="data-availability" id="sec32">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found at: <ext-link xlink:href="https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE164507" ext-link-type="uri">https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE164507</ext-link>. The source code of PathFinder is publicly accessible on github: <ext-link xlink:href="https://github.com/fuhaililab/PathFinder" ext-link-type="uri">https://github.com/fuhaililab/PathFinder</ext-link>.</p>
</sec>
<sec sec-type="author-contributions" id="sec33">
<title>Author contributions</title>
<p>JF: Data curation, Formal analysis, Methodology, Software, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. MP: Writing &#x2013; review &#x0026; editing. GL: Writing &#x2013; review &#x0026; editing. PP: Writing &#x2013; review &#x0026; editing. YC: Methodology, Writing &#x2013; review &#x0026; editing. FL: Conceptualization, Funding acquisition, Methodology, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. HS: Writing &#x2013; original draft, Formal analysis, Validation.</p>
</sec>
</body>
<back>
<sec sec-type="funding-information" id="sec34">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This study was partially supported by NIA R56AG065352 (to Li), 1R21AG078799-01A1 (to Li), and 1RM1NS132962-01 (to Dickson/Sardiello/Cooper/Li).</p>
</sec>
<sec sec-type="COI-statement" id="sec35">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="sec36">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="sec37">
<title>Supplementary material</title>
<p>The Supplementary material for this article can be found online at: <ext-link xlink:href="https://www.frontiersin.org/articles/10.3389/fncel.2024.1369242/full#supplementary-material" ext-link-type="uri">https://www.frontiersin.org/articles/10.3389/fncel.2024.1369242/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Figure_1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>SUPPLEMENTARY Figure S1</label>
<caption>
<p>Additional evaluation results. <bold>(A)</bold> Comparison of the differential expression level between paths identified by PathFinder and the rest in cirrhosis cohort. <bold>(B)</bold> The learned path scores of PathFinder on different runs on cirrhosis cohort.</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="Data_Sheet_1.docx" id="SM2" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Akiyama</surname> <given-names>H.</given-names></name> <name><surname>Barger</surname> <given-names>S.</given-names></name> <name><surname>Barnum</surname> <given-names>S.</given-names></name> <name><surname>Bradt</surname> <given-names>B.</given-names></name> <name><surname>Bauer</surname> <given-names>J.</given-names></name> <name><surname>Cole</surname> <given-names>G. M.</given-names></name> <etal/></person-group>. (<year>2000</year>). <article-title>Inflammation and Alzheimer&#x2019;s disease</article-title>. <source>Neurobiol. Aging</source> <volume>21</volume>, <fpage>383</fpage>&#x2013;<lpage>421</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S0197-4580(00)00124-X</pub-id>, PMID: <pub-id pub-id-type="pmid">10858586</pub-id></citation></ref>
<ref id="ref2"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Beeg</surname> <given-names>M.</given-names></name> <name><surname>Stravalaci</surname> <given-names>M.</given-names></name> <name><surname>Romeo</surname> <given-names>M.</given-names></name> <name><surname>Carr&#x00E1;</surname> <given-names>A. D.</given-names></name> <name><surname>Cagnotto</surname> <given-names>A.</given-names></name> <name><surname>Rossi</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Clusterin binds to A&#x03B2;1&#x2013;42 oligomers with high affinity and interferes with peptide aggregation by inhibiting primary and secondary nucleation&#x002A;</article-title>. <source>J. Biol. Chem.</source> <volume>291</volume>, <fpage>6958</fpage>&#x2013;<lpage>6966</lpage>. doi: <pub-id pub-id-type="doi">10.1074/jbc.M115.689539</pub-id>, PMID: <pub-id pub-id-type="pmid">26884339</pub-id></citation></ref>
<ref id="ref3"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ben Haim</surname> <given-names>L.</given-names></name> <name><surname>Ceyz&#x00E9;riat</surname> <given-names>K.</given-names></name> <name><surname>Sauvage</surname> <given-names>M. A. C.-d.</given-names></name> <name><surname>Aubry</surname> <given-names>F.</given-names></name> <name><surname>Auregan</surname> <given-names>G.</given-names></name> <name><surname>Guillermier</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>The JAK/STAT3 pathway is a common inducer of astrocyte reactivity in Alzheimer&#x2019;s and Huntington&#x2019;s diseases</article-title>. <source>J. Neurosci.</source> <volume>35</volume>, <fpage>2817</fpage>&#x2013;<lpage>2829</lpage>. doi: <pub-id pub-id-type="doi">10.1523/JNEUROSCI.3516-14.2015</pub-id>, PMID: <pub-id pub-id-type="pmid">25673868</pub-id></citation></ref>
<ref id="ref4"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brito-Moreira</surname> <given-names>J.</given-names></name> <name><surname>Lourenco</surname> <given-names>M. V.</given-names></name> <name><surname>Oliveira</surname> <given-names>M. M.</given-names></name> <name><surname>Ribeiro</surname> <given-names>F. C.</given-names></name> <name><surname>Ledo</surname> <given-names>J. H.</given-names></name> <name><surname>Diniz</surname> <given-names>L. P.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Interaction of A&#x03B2; oligomers with Neurexin 2&#x03B1; and Neuroligin 1 mediates synapse damage and memory loss in mice</article-title>. <source>J. Biol. Chem.</source> <volume>292</volume>, <fpage>7327</fpage>&#x2013;<lpage>7337</lpage>. doi: <pub-id pub-id-type="doi">10.1074/jbc.M116.761189</pub-id></citation></ref>
<ref id="ref5"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Browaeys</surname> <given-names>R.</given-names></name> <name><surname>Saelens</surname> <given-names>W.</given-names></name> <name><surname>Saeys</surname> <given-names>Y.</given-names></name></person-group> (<year>2020</year>). <article-title>NicheNet: modeling intercellular communication by linking ligands to target genes</article-title>. <source>Nat. Methods</source> <volume>17</volume>, <fpage>159</fpage>&#x2013;<lpage>162</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41592-019-0667-5</pub-id>, PMID: <pub-id pub-id-type="pmid">31819264</pub-id></citation></ref>
<ref id="ref6"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Butturini</surname> <given-names>E.</given-names></name> <name><surname>Cozzolino</surname> <given-names>F.</given-names></name> <name><surname>Boriero</surname> <given-names>D.</given-names></name> <name><surname>Carcereri de Prati</surname> <given-names>A.</given-names></name> <name><surname>Monti</surname> <given-names>M.</given-names></name> <name><surname>Rossin</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>S-glutathionylation exerts opposing roles in the regulation of STAT1 and STAT3 signaling in reactive microglia</article-title>. <source>Free Radic. Biol. Med.</source> <volume>117</volume>, <fpage>191</fpage>&#x2013;<lpage>201</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.freeradbiomed.2018.02.005</pub-id>, PMID: <pub-id pub-id-type="pmid">29427792</pub-id></citation></ref>
<ref id="ref7"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Cai</surname> <given-names>D.</given-names></name> <name><surname>Lam</surname> <given-names>W.</given-names></name></person-group> <source>Graph transformer for graph-to-sequence learning. In AAAI</source> (<year>2020</year>).</citation></ref>
<ref id="ref8"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>J.</given-names></name> <name><surname>Wu</surname> <given-names>X.</given-names></name> <name><surname>Rastogi</surname> <given-names>V.</given-names></name> <name><surname>Liang</surname> <given-names>Y.</given-names></name> <name><surname>Jha</surname> <given-names>S.</given-names></name></person-group> <article-title>Robust attribution regularization</article-title>. In: <conf-name>NeurIPS</conf-name> (<year>2019</year>).</citation></ref>
<ref id="ref9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chew</surname> <given-names>H.</given-names></name> <name><surname>Solomon</surname> <given-names>V. A.</given-names></name> <name><surname>Fonteh</surname> <given-names>A. N.</given-names></name></person-group> (<year>2020</year>). <article-title>Involvement of lipids in Alzheimer&#x2019;s disease pathology and potential therapies</article-title>. <source>Front. Physiol.</source> <volume>11</volume>:<fpage>598</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fphys.2020.00598</pub-id>, PMID: <pub-id pub-id-type="pmid">32581851</pub-id></citation></ref>
<ref id="ref10"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Choi</surname> <given-names>H.</given-names></name> <name><surname>Sheng</surname> <given-names>J.</given-names></name> <name><surname>Gao</surname> <given-names>D.</given-names></name> <name><surname>Li</surname> <given-names>F.</given-names></name> <name><surname>Durrans</surname> <given-names>A.</given-names></name> <name><surname>Ryu</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>Transcriptome analysis of individual stromal cell populations identifies stroma-tumor crosstalk in mouse lung Cancer model</article-title>. <source>Cell Rep.</source> <volume>10</volume>, <fpage>1187</fpage>&#x2013;<lpage>1201</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.celrep.2015.01.040</pub-id>, PMID: <pub-id pub-id-type="pmid">25704820</pub-id></citation></ref>
<ref id="ref11"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Efremova</surname> <given-names>M.</given-names></name> <name><surname>Vento-Tormo</surname> <given-names>M.</given-names></name> <name><surname>Teichmann</surname> <given-names>S. A.</given-names></name> <name><surname>Vento-Tormo</surname> <given-names>R.</given-names></name></person-group> (<year>2020</year>). <article-title>CellPhoneDB: inferring cell&#x2013;cell communication from combined expression of multi-subunit ligand&#x2013;receptor complexes</article-title>. <source>Nat. Protoc.</source> <volume>15</volume>, <fpage>1484</fpage>&#x2013;<lpage>1506</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41596-020-0292-x</pub-id>, PMID: <pub-id pub-id-type="pmid">32103204</pub-id></citation></ref>
<ref id="ref12"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fan</surname> <given-names>W.</given-names></name> <name><surname>Jin</surname> <given-names>W.</given-names></name> <name><surname>Liu</surname> <given-names>X.</given-names></name> <name><surname>Xu</surname> <given-names>H.</given-names></name> <name><surname>Tang</surname> <given-names>X.</given-names></name> <name><surname>Wang</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Jointly attacking graph neural network and its explanations</article-title>. <source>ArXiv abs/2108.03388</source>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2108.03388</pub-id></citation></ref>
<ref id="ref13"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Feng</surname> <given-names>J.</given-names></name> <name><surname>Zeng</surname> <given-names>A.</given-names></name> <name><surname>Chen</surname> <given-names>Y.</given-names></name> <name><surname>Payne</surname> <given-names>P.</given-names></name> <name><surname>Li</surname> <given-names>F.</given-names></name></person-group> (<year>2020</year>). <article-title>Signaling interaction link prediction using deep graph neural networks integrating protein-protein interactions and omics data</article-title>. <source>bioRxiv 2020.12.23.424230</source>. doi: <pub-id pub-id-type="doi">10.1101/2020.12.23.424230</pub-id></citation></ref>
<ref id="ref14"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Funderburk</surname> <given-names>S. F.</given-names></name> <name><surname>Marcellino</surname> <given-names>B. K.</given-names></name> <name><surname>Yue</surname> <given-names>Z.</given-names></name></person-group> (<year>2010</year>). <article-title>Cell &#x2018;self-eating&#x2019; (autophagy) mechanism in Alzheimer&#x2019;s disease</article-title>. <source>Mt. Sinai J. Med.</source> <volume>77</volume>, <fpage>59</fpage>&#x2013;<lpage>68</lpage>. doi: <pub-id pub-id-type="doi">10.1002/msj.20161</pub-id>, PMID: <pub-id pub-id-type="pmid">20101724</pub-id></citation></ref>
<ref id="ref15"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gui</surname> <given-names>R.</given-names></name> <name><surname>Li</surname> <given-names>W.</given-names></name> <name><surname>Li</surname> <given-names>Z.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name> <name><surname>Wu</surname> <given-names>Y.</given-names></name> <name><surname>Jiao</surname> <given-names>W.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Effects and potential mechanisms of IGF1/IGF1R in the liver fibrosis: a review</article-title>. <source>Int. J. Biol. Macromol.</source> <volume>251</volume>:<fpage>126263</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ijbiomac.2023.126263</pub-id>, PMID: <pub-id pub-id-type="pmid">37567540</pub-id></citation></ref>
<ref id="ref16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guo</surname> <given-names>Y.</given-names></name> <name><surname>Miao</surname> <given-names>X.</given-names></name> <name><surname>Sun</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>L.</given-names></name> <name><surname>Zhou</surname> <given-names>A.</given-names></name> <name><surname>Zhu</surname> <given-names>X.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Zinc finger transcription factor Egf1 promotes non-alcoholic fatty liver disease</article-title>. <source>JHEP Rep.</source> <volume>5</volume>:<fpage>100724</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jhepr.2023.100724</pub-id>, PMID: <pub-id pub-id-type="pmid">37234276</pub-id></citation></ref>
<ref id="ref17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Halliday</surname> <given-names>G.</given-names></name> <name><surname>Robinson</surname> <given-names>S. R.</given-names></name> <name><surname>Shepherd</surname> <given-names>C.</given-names></name> <name><surname>Kril</surname> <given-names>J.</given-names></name></person-group> (<year>2000</year>). <article-title>Alzheimer&#x2019;s disease and inflammation: a review of cellular and therapeutic mechanisms</article-title>. <source>Clin. Exp. Pharmacol. Physiol.</source> <volume>27</volume>, <fpage>1</fpage>&#x2013;<lpage>8</lpage>. doi: <pub-id pub-id-type="doi">10.1046/j.1440-1681.2000.03200.x</pub-id></citation></ref>
<ref id="ref18"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hamilton</surname> <given-names>W. L.</given-names></name> <name><surname>Ying</surname> <given-names>R.</given-names></name> <name><surname>Leskovec</surname> <given-names>J.</given-names></name></person-group> (<year>2017</year>). <article-title>Inductive representation learning on large graphs</article-title>. <source>arXiv:1706.02216v4</source>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1706.02216</pub-id></citation></ref>
<ref id="ref19"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hao</surname> <given-names>Y.</given-names></name> <name><surname>Hao</surname> <given-names>S.</given-names></name> <name><surname>Andersen-Nissen</surname> <given-names>E.</given-names></name> <name><surname>Mauck</surname> <given-names>W. M.</given-names> <suffix>III</suffix></name> <name><surname>Zheng</surname> <given-names>S.</given-names></name> <name><surname>Butler</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Integrated analysis of multimodal single-cell data</article-title>. <source>Cell</source> <volume>184</volume>, <fpage>3573</fpage>&#x2013;<lpage>3587.e29</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cell.2021.04.048</pub-id>, PMID: <pub-id pub-id-type="pmid">34062119</pub-id></citation></ref>
<ref id="ref20"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>Z.</given-names></name> <name><surname>Dong</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>K.</given-names></name> <name><surname>Sun</surname> <given-names>Y.</given-names></name></person-group> (<year>2020</year>). &#x201C;<article-title>Heterogeneous graph transformer</article-title>&#x201D; in <source>Proceedings of the web conference 2020 2704&#x2013;2710 (Association for Computing Machinery)</source> (<publisher-loc>New York, NY</publisher-loc>).</citation></ref>
<ref id="ref21"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>X.</given-names></name> <name><surname>Herrero</surname> <given-names>C.</given-names></name> <name><surname>Li</surname> <given-names>W. P.</given-names></name> <name><surname>Antoniv</surname> <given-names>T. T.</given-names></name> <name><surname>Falck-Pedersen</surname> <given-names>E.</given-names></name> <name><surname>Koch</surname> <given-names>A. E.</given-names></name> <etal/></person-group>. (<year>2002</year>). <article-title>Sensitization of IFN-&#x03B3; Jak-STAT signaling during macrophage activation</article-title>. <source>Nat. Immunol.</source> <volume>3</volume>, <fpage>859</fpage>&#x2013;<lpage>866</lpage>. doi: <pub-id pub-id-type="doi">10.1038/ni828</pub-id>, PMID: <pub-id pub-id-type="pmid">12172544</pub-id></citation></ref>
<ref id="ref22"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>Y.</given-names></name> <name><surname>Peng</surname> <given-names>T.</given-names></name> <name><surname>Gao</surname> <given-names>L.</given-names></name> <name><surname>Tan</surname> <given-names>K.</given-names></name></person-group> (<year>2021</year>). <article-title>CytoTalk: De novo construction of signal transduction networks using single-cell transcriptomic data</article-title>. <source>Sci. Adv.</source> <volume>7</volume>:<fpage>eabf1356</fpage>. doi: <pub-id pub-id-type="doi">10.1126/sciadv.abf1356</pub-id>, PMID: <pub-id pub-id-type="pmid">33853780</pub-id></citation></ref>
<ref id="ref23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hwang</surname> <given-names>B.</given-names></name> <name><surname>Lee</surname> <given-names>J. H.</given-names></name> <name><surname>Bang</surname> <given-names>D.</given-names></name></person-group> (<year>2018</year>). <article-title>Single-cell RNA sequencing technologies and bioinformatics pipelines</article-title>. <source>Exp. Mol. Med.</source> <volume>50</volume>, <fpage>1</fpage>&#x2013;<lpage>14</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s12276-018-0071-8</pub-id>, PMID: <pub-id pub-id-type="pmid">30089861</pub-id></citation></ref>
<ref id="ref24"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kolodziejczyk</surname> <given-names>A. A.</given-names></name> <name><surname>Kim</surname> <given-names>J. K.</given-names></name> <name><surname>Svensson</surname> <given-names>V.</given-names></name> <name><surname>Marioni</surname> <given-names>J. C.</given-names></name> <name><surname>Teichmann</surname> <given-names>S. A.</given-names></name></person-group> (<year>2015</year>). <article-title>The technology and biology of single-cell RNA sequencing</article-title>. <source>Mol. Cell</source> <volume>58</volume>, <fpage>610</fpage>&#x2013;<lpage>620</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.molcel.2015.04.005</pub-id></citation></ref>
<ref id="ref25"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lambert</surname> <given-names>S. A.</given-names></name> <name><surname>Jolma</surname> <given-names>A.</given-names></name> <name><surname>Campitelli</surname> <given-names>L. F.</given-names></name> <name><surname>das</surname> <given-names>P. K.</given-names></name> <name><surname>Yin</surname> <given-names>Y.</given-names></name> <name><surname>Albu</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>The human transcription factors</article-title>. <source>Cell</source> <volume>172</volume>, <fpage>650</fpage>&#x2013;<lpage>665</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cell.2018.01.029</pub-id></citation></ref>
<ref id="ref26"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>G.</given-names></name> <name><surname>Jiang</surname> <given-names>Q.</given-names></name> <name><surname>Xu</surname> <given-names>K.</given-names></name></person-group> (<year>2019</year>). <article-title>CREB family: a significant role in liver fibrosis</article-title>. <source>Biochimie</source> <volume>163</volume>, <fpage>94</fpage>&#x2013;<lpage>100</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.biochi.2019.05.014</pub-id>, PMID: <pub-id pub-id-type="pmid">31112743</pub-id></citation></ref>
<ref id="ref27"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mathys</surname> <given-names>H.</given-names></name> <name><surname>Davila-Velderrain</surname> <given-names>J.</given-names></name> <name><surname>Peng</surname> <given-names>Z.</given-names></name> <name><surname>Gao</surname> <given-names>F.</given-names></name> <name><surname>Mohammadi</surname> <given-names>S.</given-names></name> <name><surname>Young</surname> <given-names>J. Z.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Single-cell transcriptomic analysis of Alzheimer&#x2019;s disease</article-title>. <source>Nature</source> <volume>570</volume>, <fpage>332</fpage>&#x2013;<lpage>337</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41586-019-1195-2</pub-id>, PMID: <pub-id pub-id-type="pmid">31042697</pub-id></citation></ref>
<ref id="ref28"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Morikawa</surname> <given-names>R.</given-names></name> <name><surname>Nakamoto</surname> <given-names>N.</given-names></name> <name><surname>Amiya</surname> <given-names>T.</given-names></name> <name><surname>Chu</surname> <given-names>P. S.</given-names></name> <name><surname>Koda</surname> <given-names>Y.</given-names></name> <name><surname>Teratani</surname> <given-names>T.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Role of CC chemokine receptor 9 in the progression of murine and human non-alcoholic steatohepatitis</article-title>. <source>J. Hepatol.</source> <volume>74</volume>, <fpage>511</fpage>&#x2013;<lpage>521</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jhep.2020.09.033</pub-id>, PMID: <pub-id pub-id-type="pmid">33038434</pub-id></citation></ref>
<ref id="ref29"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nasiri</surname> <given-names>E.</given-names></name> <name><surname>Sankowski</surname> <given-names>R.</given-names></name> <name><surname>Dietrich</surname> <given-names>H.</given-names></name> <name><surname>Oikonomidi</surname> <given-names>A.</given-names></name> <name><surname>Huerta</surname> <given-names>P. T.</given-names></name> <name><surname>Popp</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Key role of MIF-related neuroinflammation in neurodegeneration and cognitive impairment in Alzheimer&#x2019;s disease</article-title>. <source>Mol. Med.</source> <volume>26</volume>:<fpage>34</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s10020-020-00163-5</pub-id>, PMID: <pub-id pub-id-type="pmid">32303185</pub-id></citation></ref>
<ref id="ref30"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rogers</surname> <given-names>J.</given-names></name> <name><surname>Webster</surname> <given-names>S.</given-names></name> <name><surname>Lue</surname> <given-names>L. F.</given-names></name> <name><surname>Brachova</surname> <given-names>L.</given-names></name> <name><surname>Harold Civin</surname> <given-names>W.</given-names></name> <name><surname>Emmerling</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>1996</year>). <article-title>Inflammation and Alzheimer&#x2019;s disease pathogenesis</article-title>. <source>Neurobiol. Aging</source> <volume>17</volume>, <fpage>681</fpage>&#x2013;<lpage>686</lpage>. doi: <pub-id pub-id-type="doi">10.1016/0197-4580(96)00115-7</pub-id></citation></ref>
<ref id="ref31"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rong</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Self-supervised graph transformer on large-scale molecular data</article-title>. <source>arXiv:2007.02835v2</source>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2007.0283</pub-id></citation></ref>
<ref id="ref32"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Saint-Antoine</surname> <given-names>M. M.</given-names></name> <name><surname>Singh</surname> <given-names>A.</given-names></name></person-group> (<year>2019</year>). <article-title>Network inference in systems biology: Recent developments, challenges, and applications</article-title>. <source>Curr. Opin. Biotechnol.</source> <volume>63</volume>, <fpage>89</fpage>&#x2013;<lpage>98</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.copbio.2019.12.002</pub-id></citation></ref>
<ref id="ref33"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tanay</surname> <given-names>A.</given-names></name> <name><surname>Regev</surname> <given-names>A.</given-names></name></person-group> (<year>2017</year>). <article-title>Scaling single-cell genomics from phenomenology to mechanism</article-title>. <source>Nature</source> <volume>541</volume>, <fpage>331</fpage>&#x2013;<lpage>338</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nature21350</pub-id>, PMID: <pub-id pub-id-type="pmid">28102262</pub-id></citation></ref>
<ref id="ref34"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tavassoly</surname> <given-names>O.</given-names></name> <name><surname>Sato</surname> <given-names>T.</given-names></name> <name><surname>Tavassoly</surname> <given-names>I.</given-names></name></person-group> (<year>2020</year>). <article-title>Inhibition of brain EGFR activation: a novel target in neurodegenerative diseases and brain injuries</article-title>. <source>Mol. Pharmacol.</source> <volume>98</volume>, <fpage>13</fpage>&#x2013;<lpage>22</lpage>. doi: <pub-id pub-id-type="doi">10.1124/mol.120.119909</pub-id>, PMID: <pub-id pub-id-type="pmid">32350120</pub-id></citation></ref>
<ref id="ref35"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tindale</surname> <given-names>L. C.</given-names></name> <name><surname>Leach</surname> <given-names>S. R.</given-names></name> <name><surname>Spinelli</surname> <given-names>J. J.</given-names></name> <name><surname>Brooks-Wilson</surname> <given-names>A. R.</given-names></name></person-group> (<year>2017</year>). <article-title>Lipid and Alzheimers disease genes associated with healthy aging and longevity in healthy oldest-old</article-title>. <source>Oncotarget</source> <volume>8</volume>, <fpage>20612</fpage>&#x2013;<lpage>20621</lpage>. doi: <pub-id pub-id-type="doi">10.18632/oncotarget.15296</pub-id>, PMID: <pub-id pub-id-type="pmid">28206976</pub-id></citation></ref>
<ref id="ref36"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tyzack</surname> <given-names>G. E.</given-names></name> <name><surname>Hall</surname> <given-names>C. E.</given-names></name> <name><surname>Sibley</surname> <given-names>C. R.</given-names></name> <name><surname>Cymes</surname> <given-names>T.</given-names></name> <name><surname>Forostyak</surname> <given-names>S.</given-names></name> <name><surname>Carlino</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>A neuroprotective astrocyte state is induced by neuronal signal EphB1 but fails in ALS models</article-title>. <source>Nat. Commun.</source> <volume>8</volume>:<fpage>1164</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-017-01283-z</pub-id>, PMID: <pub-id pub-id-type="pmid">29079839</pub-id></citation></ref>
<ref id="ref37"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vaswani</surname> <given-names>A.</given-names></name> <name><surname>Shazeer</surname> <given-names>N.</given-names></name> <name><surname>Parmar</surname> <given-names>N.</given-names></name> <name><surname>Uszkoreit</surname> <given-names>J.</given-names></name> <name><surname>Jones</surname> <given-names>L.</given-names></name> <name><surname>Gomez</surname> <given-names>A. N.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Attention is all you need</article-title>. <source>arXiv:1706.03762v7</source>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1706.03762</pub-id></citation></ref>
<ref id="ref38"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Veli&#x010D;kovi&#x0107;</surname> <given-names>P.</given-names></name> <name><surname>Cucurull</surname> <given-names>G.</given-names></name> <name><surname>Casanova</surname> <given-names>A.</given-names></name> <name><surname>Romero</surname> <given-names>A.</given-names></name> <name><surname>Li&#x00F2;</surname> <given-names>P.</given-names></name> <name><surname>Bengio</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Graph attention networks</article-title>. <source>arXiv:1710.10903v3</source>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1710.10903</pub-id></citation></ref>
<ref id="ref39"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>L.</given-names></name> <name><surname>Chiang</surname> <given-names>H. C.</given-names></name> <name><surname>Wu</surname> <given-names>W.</given-names></name> <name><surname>Liang</surname> <given-names>B.</given-names></name> <name><surname>Xie</surname> <given-names>Z.</given-names></name> <name><surname>Yao</surname> <given-names>X.</given-names></name> <etal/></person-group>. (<year>2012</year>). <article-title>Epidermal growth factor receptor is a preferred target for treating amyloid-&#x03B2;&#x2013;induced memory loss</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>109</volume>, <fpage>16743</fpage>&#x2013;<lpage>16748</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.1208011109</pub-id>, PMID: <pub-id pub-id-type="pmid">23019586</pub-id></citation></ref>
<ref id="ref40"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>C.</given-names></name> <name><surname>Xiong</surname> <given-names>M.</given-names></name> <name><surname>Gratuze</surname> <given-names>M.</given-names></name> <name><surname>Bao</surname> <given-names>X.</given-names></name> <name><surname>Shi</surname> <given-names>Y.</given-names></name> <name><surname>Andhey</surname> <given-names>P. S.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Selective removal of astrocytic APOE4 strongly protects against tau-mediated neurodegeneration and decreases synaptic phagocytosis by microglia</article-title>. <source>Neuron</source> <volume>109</volume>, <fpage>1657</fpage>&#x2013;<lpage>1674.e7</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neuron.2021.03.024</pub-id>, PMID: <pub-id pub-id-type="pmid">33831349</pub-id></citation></ref>
<ref id="ref41"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wehr</surname> <given-names>A.</given-names></name> <name><surname>Baeck</surname> <given-names>C.</given-names></name> <name><surname>Heymann</surname> <given-names>F.</given-names></name> <name><surname>Niemietz</surname> <given-names>P. M.</given-names></name> <name><surname>Hammerich</surname> <given-names>L.</given-names></name> <name><surname>Martin</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2013</year>). <article-title>Chemokine receptor CXCR6-dependent hepatic NK T cell accumulation promotes inflammation and liver fibrosis</article-title>. <source>J. Immunol.</source> <volume>190</volume>, <fpage>5226</fpage>&#x2013;<lpage>5236</lpage>. doi: <pub-id pub-id-type="doi">10.4049/jimmunol.1202909</pub-id>, PMID: <pub-id pub-id-type="pmid">23596313</pub-id></citation></ref>
<ref id="ref9001"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>K.</given-names></name> <name><surname>Li</surname> <given-names>C.</given-names></name> <name><surname>Tian</surname> <given-names>Y.</given-names></name> <name><surname>Sonobe</surname> <given-names>T.</given-names></name> <name><surname>Kawarabayashi</surname> <given-names>K.</given-names></name> <name><surname>Jegelka</surname> <given-names>S.</given-names></name></person-group> (<year>2018</year>). <article-title>Representation Learning on Graphs with Jumping Knowledge Networks</article-title>. <source>ArXiv</source>.</citation></ref>
<ref id="ref42"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>P.</given-names></name> <name><surname>Das</surname> <given-names>M.</given-names></name> <name><surname>Reilly</surname> <given-names>J.</given-names></name> <name><surname>Davis</surname> <given-names>R.</given-names></name></person-group> (<year>2011</year>). <article-title>JNK regulates FoxO-dependent autophagy in neurons</article-title>. <source>Genes Dev.</source> <volume>25</volume>, <fpage>310</fpage>&#x2013;<lpage>322</lpage>. doi: <pub-id pub-id-type="doi">10.1101/gad.1984311</pub-id>, PMID: <pub-id pub-id-type="pmid">21325132</pub-id></citation></ref>
<ref id="ref43"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>K.</given-names></name> <name><surname>Hu</surname> <given-names>W.</given-names></name> <name><surname>Leskovec</surname> <given-names>J.</given-names></name> <name><surname>Jegelka</surname> <given-names>S.</given-names></name></person-group> <source>How powerful are graph neural networks?</source> (<year>2018</year>).</citation></ref>
<ref id="ref44"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>K.</given-names></name> <name><surname>Li</surname> <given-names>C.</given-names></name> <name><surname>Tian</surname> <given-names>Y.</given-names></name> <name><surname>Sonobe</surname> <given-names>T.</given-names></name> <name><surname>Kawarabayashi</surname> <given-names>K.</given-names></name> <name><surname>Jegelka</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Representation learning on graphs with jumping knowledge networks</article-title>. <source>arXiv:1806.03536v2</source>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1806.03536</pub-id></citation></ref>
<ref id="ref45"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Xiao</surname> <given-names>S.</given-names></name> <name><surname>Li</surname> <given-names>C.</given-names></name> <name><surname>Lian</surname> <given-names>D.</given-names></name> <name><surname>Agrawal</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>GraphFormers: GNN-nested transformers for representation learning on textual graph</article-title>. <source>arXiv:2105.02605v3</source>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2105.02605</pub-id></citation></ref>
<ref id="ref46"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ying</surname> <given-names>C.</given-names></name> <name><surname>Cai</surname> <given-names>T.</given-names></name> <name><surname>Luo</surname> <given-names>S.</given-names></name> <name><surname>Zheng</surname> <given-names>S.</given-names></name> <name><surname>Ke</surname> <given-names>G.</given-names></name> <name><surname>He</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Do transformers really perform bad for graph representation?</article-title> <source>ArXiv abs/2106.05234</source>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2106.05234</pub-id></citation></ref>
<ref id="ref47"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>W. H.</given-names></name> <name><surname>Cuervo</surname> <given-names>A. M.</given-names></name> <name><surname>Kumar</surname> <given-names>A.</given-names></name> <name><surname>Peterhoff</surname> <given-names>C. M.</given-names></name> <name><surname>Schmidt</surname> <given-names>S. D.</given-names></name> <name><surname>Lee</surname> <given-names>J. H.</given-names></name> <etal/></person-group>. (<year>2005</year>). <article-title>Macroautophag--a novel &#x03B2;-amyloid peptide-generating pathway activated in Alzheimer&#x2019;s disease</article-title>. <source>J. Cell Biol.</source> <volume>171</volume>, <fpage>87</fpage>&#x2013;<lpage>98</lpage>. doi: <pub-id pub-id-type="doi">10.1083/jcb.200505082</pub-id>, PMID: <pub-id pub-id-type="pmid">16203860</pub-id></citation></ref>
<ref id="ref48"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>M.</given-names></name> <name><surname>Cui</surname> <given-names>Z.</given-names></name> <name><surname>Neumann</surname> <given-names>M.</given-names></name> <name><surname>Chen</surname> <given-names>Y.</given-names></name></person-group> <article-title>An end-to-end deep learning architecture for graph classification</article-title>. In <source>32nd AAAI Conference on artificial intelligence, AAAI 2018</source>, (<year>2018</year>).</citation></ref>
<ref id="ref49"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>P.</given-names></name> <name><surname>Qin</surname> <given-names>W.</given-names></name> <name><surname>Wang</surname> <given-names>D.</given-names></name> <name><surname>Liu</surname> <given-names>B.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Jiang</surname> <given-names>T.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>Impacts of PICALM and CLU variants associated with Alzheimer&#x2019;s disease on the functional connectivity of the hippocampus in healthy young adults</article-title>. <source>Brain Struct. Funct.</source> <volume>220</volume>, <fpage>1463</fpage>&#x2013;<lpage>1475</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00429-014-0738-4</pub-id>, PMID: <pub-id pub-id-type="pmid">24578178</pub-id></citation></ref>
<ref id="ref50"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Xu</surname> <given-names>X.</given-names></name> <name><surname>Tian</surname> <given-names>W.</given-names></name> <name><surname>Jiang</surname> <given-names>R.</given-names></name> <name><surname>Lu</surname> <given-names>Y.</given-names></name> <name><surname>Sun</surname> <given-names>Q.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>ARRB1 inhibits non-alcoholic steatohepatitis progression by promoting GDF15 maturation</article-title>. <source>J. Hepatol.</source> <volume>72</volume>, <fpage>976</fpage>&#x2013;<lpage>989</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jhep.2019.12.004</pub-id>, PMID: <pub-id pub-id-type="pmid">31857195</pub-id></citation></ref>
<ref id="ref51"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>H.</given-names></name> <name><surname>Sun</surname> <given-names>L.</given-names></name> <name><surname>Xia</surname> <given-names>C. G.-B.</given-names></name></person-group> (<year>2020</year>). <article-title>Only attention is needed for learning graph representations</article-title>. <source>ArXiv 2001.05140v2</source>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2001.05140</pub-id></citation></ref>
<ref id="ref52"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhu</surname> <given-names>Y.</given-names></name> <name><surname>Hou</surname> <given-names>H.</given-names></name> <name><surname>Rezai-Zadeh</surname> <given-names>K.</given-names></name> <name><surname>Giunta</surname> <given-names>B.</given-names></name> <name><surname>Ruscin</surname> <given-names>A.</given-names></name> <name><surname>Gemma</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>CD45 deficiency drives amyloid-&#x03B2; peptide oligomers and neuronal loss in Alzheimer's disease mice</article-title>. <source>J. Neurosci.</source> <volume>31</volume>, <fpage>1355</fpage>&#x2013;<lpage>1365</lpage>. doi: <pub-id pub-id-type="doi">10.1523/JNEUROSCI.3268-10.2011</pub-id>, PMID: <pub-id pub-id-type="pmid">21273420</pub-id></citation></ref>
</ref-list>
</back>
</article>