<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="abstract" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">767602</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2021.767602</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Methods</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>MultiCapsNet: A General Framework for Data Integration and Interpretable Classification</article-title>
<alt-title alt-title-type="left-running-head">Wang et&#x20;al.</alt-title>
<alt-title alt-title-type="right-running-head">MultiCapsNet for Data-Integration and Classification</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Lifei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1465897/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Miao</surname>
<given-names>Xuexia</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1530321/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Nie</surname>
<given-names>Rui</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1509617/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Zhang</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Zhang</surname>
<given-names>Jiang</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Cai</surname>
<given-names>Jun</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1460899/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Shulan (Hangzhou) Hospital Affiliated to Zhejiang Shuren University Shulan International Medical College</institution>, <addr-line>Hangzhou</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>China National Center for Bioinformation</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Key Laboratory of Genomic and Precision Medicine, Beijing Institute of Genomics, Chinese Academy of Sciences</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>University of Chinese Academy of Sciences</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>School of Systems Science, Beijing Normal University</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/13066/overview">Jin Chen</ext-link>, University of Kentucky, United&#x20;States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1472335/overview">Md Selim</ext-link>, University of Kentucky, United&#x20;States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1472400/overview">Lucas Jing Liu</ext-link>, University of Kentucky, United&#x20;States</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Jiang Zhang, <email>zhangjiang@bnu.edu.cn</email>; Jun Cai, <email>juncai@big.ac.cn</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Computational Genomics, a section of the journal Frontiers in Genetics</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>18</day>
<month>01</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>12</volume>
<elocation-id>767602</elocation-id>
<history>
<date date-type="received">
<day>31</day>
<month>08</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>25</day>
<month>10</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Wang, Miao, Nie, Zhang, Zhang and Cai.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Wang, Miao, Nie, Zhang, Zhang and Cai</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these&#x20;terms.</p>
</license>
</permissions>
<abstract>
<p>The latest progresses of experimental biology have generated a large number of data with different formats and lengths. Deep learning is an ideal tool to deal with complex datasets, but its inherent &#x201c;black box&#x201d; nature needs more interpretability. At the same time, traditional interpretable machine learning methods, such as linear regression or random forest, could only deal with numerical features instead of modular features often encountered in the biological field. Here, we present MultiCapsNet (<ext-link ext-link-type="uri" xlink:href="https://github.com/wanglf19/MultiCapsNet">https://github.com/wanglf19/MultiCapsNet</ext-link>), a new deep learning model built on CapsNet and scCapsNet, which possesses the merits such as easy data integration and high model interpretability. To demonstrate the ability of this model as an interpretable classifier to deal with modular inputs, we test MultiCapsNet on three datasets with different data type and application scenarios. Firstly, on the labeled variant call dataset, MultiCapsNet shows a similar classification performance with neural network model, and provides importance scores for data sources directly without an extra importance determination step required by the neural network model. The importance scores generated by these two models are highly correlated. Secondly, on single cell RNA sequence (scRNA-seq) dataset, MultiCapsNet integrates information about protein-protein interaction (PPI), and protein-DNA interaction (PDI). The classification accuracy of MultiCapsNet is comparable to the neural network and random forest model. Meanwhile, MultiCapsNet reveals how each transcription factor (TF) or PPI cluster node contributes to classification of cell type. Thirdly, we made a comparison between MultiCapsNet and SCENIC. The results show several cell type relevant TFs identified by both methods, further proving the validity and interpretability of the MultiCapsNet.</p>
</abstract>
<kwd-group>
<kwd>capsule network</kwd>
<kwd>classification</kwd>
<kwd>data integration</kwd>
<kwd>interpretability</kwd>
<kwd>modular feature</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>Recent advances in experimental biology have generated huge amounts of data. More detectable biological targets and various new measuring methods produce data at an unprecedented speed. For example, Microwell-Seq, a single cell RNA sequencing technology, has been used to analyze the transcriptome of more than 4,00,000 mouse single cells, covering all major mouse organs (<xref ref-type="bibr" rid="B21">Han et&#x20;al., 2018</xref>); Single cell bisulfite sequencing (scBS-seq) has been designed to measure genome-wide DNA methylation at the single-cell level (<xref ref-type="bibr" rid="B36">Smallwood et&#x20;al., 2014</xref>); and mass-spectrometry based technologies could explore the composition, structure, function, and control of the proteome (<xref ref-type="bibr" rid="B1">Aebersold and Mann, 2016</xref>). In addition, large and complex data sets are produced by large-scale projects, such as &#x201c;The Cancer Genome Atlas&#x201d; (TCGA) (<xref ref-type="bibr" rid="B38">Tomczak et&#x20;al., 2015</xref>), and &#x201c;Encyclopedia of DNA Elements&#x201d; (ENCODE) (<xref ref-type="bibr" rid="B11">Consortium, 2004</xref>), which were established through community cooperation. There is an urgent need for next generation methods to deal with large, heterogeneous and complex data sets (<xref ref-type="bibr" rid="B8">Camacho et&#x20;al., 2018</xref>).</p>
<p>As a promising data processing method, deep learning methods have been employed in biological data processing (<xref ref-type="bibr" rid="B4">Alipanahi et&#x20;al., 2015</xref>; <xref ref-type="bibr" rid="B8">Camacho et&#x20;al., 2018</xref>; <xref ref-type="bibr" rid="B42">Zhou et&#x20;al., 2018</xref>; <xref ref-type="bibr" rid="B16">Eraslan et&#x20;al., 2019</xref>). Various deep learning models could deal with various input data with different types and formats. For example, RNA sequence data as real-value vectors could be processed by simple feed forward neural network, which is a component of more complex models, such as auto-encoder (AE) (<xref ref-type="bibr" rid="B26">Lin et&#x20;al., 2017</xref>; <xref ref-type="bibr" rid="B10">Chen et&#x20;al., 2018</xref>), variational auto-encoder (VAE) (<xref ref-type="bibr" rid="B15">Ding et&#x20;al., 2018</xref>), and Generative adversarial network (GAN) (<xref ref-type="bibr" rid="B27">Lopez et&#x20;al., 2018</xref>). Sequence information, which is coded by ATCG, could be converted into real valued vectors by deep learning model using convolution neural networks (CNN) after model training (<xref ref-type="bibr" rid="B4">Alipanahi et&#x20;al., 2015</xref>). Furthermore, deep learning models could integrate data with different types and formats. For example, DeepCpG utilizes both DNA sequence patterns and neighboring methylation states for predicting single-cell methylation state and modeling the sources of DNA methylation variability (<xref ref-type="bibr" rid="B6">Angermueller et&#x20;al., 2017</xref>). However, the deep learning methods usually run as a &#x201c;black box&#x201d;, which is hard to interpret (<xref ref-type="bibr" rid="B5">Almas Jabeen and Raza, 2017</xref>). Great efforts have been made to improve the interpretability of deep learning models. The prior biological information, such as regulation between transcription factors (TF) and target genes or priori defined gene sets that retain the crucial biological features, could specify connections between neurons in the neural networks in order to associate the internal node (neuron) in the neural networks with TFs and thereby ease the difficulty of interpreting models (<xref ref-type="bibr" rid="B26">Lin et&#x20;al., 2017</xref>; <xref ref-type="bibr" rid="B10">Chen et&#x20;al., 2018</xref>). New probabilistic generative models with more interpretability, such as variational inference neural networks, are applied to scRNA-seq data for dimension reduction (<xref ref-type="bibr" rid="B15">Ding et&#x20;al., 2018</xref>).</p>
<p>Traditional interpretable machine learning methods, such as linear regression (logistic regression) or decision tree (random forest), could only deal with numerical or categorical feature (<xref ref-type="bibr" rid="B29">Molnar, 2019</xref>) (<xref ref-type="fig" rid="F1">Figure&#x20;1A</xref>). However, in the field of biology, especially in the field of network biology, the data is highly modular in nature. For example, in drug discovery, many independent features with multiple labels (e.g., response to drug, and disease state) across a multitude of data types (e.g., expression profiles, chemical structures) are needed; and in synthetic biology, the input may include sequence data, composition data and functional data (<xref ref-type="bibr" rid="B8">Camacho et&#x20;al., 2018</xref>). An interpretable machine learning method adapted with modular input is demanded.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>MultiCapsNet is an interpretable classifier and data integrator with modular inputs<bold>. (A)</bold> The traditional interpretable machine learning methods. The input of this model is numerical. After training, the model will reveal the inputs (features) importance for classification (or regression). The size and color depth of the circle indicate the importance of the features, while the larger and darker circle indicates that the feature is more important. <bold>(B)</bold> The MultiCapsNet is an interpretable classifier with modular input. The inputs (features) with different format (real-valued vector, one-hot encoding vector, or sequence data) and different lengths are first converted into real-valued vectors with equal length through trainable networks. Then, classification was based on those real-valued vectors of the same length. After training, the model will reveal the inputs (features) importance to classification. The size and color depth of the circle indicate the importance of the feature, while larger and darker circle indicates that the feature is more important. <bold>(C)</bold> The MultiCapsNet could integrate data from different sources. <bold>(D)</bold> The MultiCapsNet could integrate prior knowledge, such as gene regulatory information. Left: Gene regulatory networks, transcription factor and its targets are marked with same color. Right: expression of genes that are regulated by the same transcription factor could be regarded as a data source.</p>
</caption>
<graphic xlink:href="fgene-12-767602-g001.tif"/>
</fig>
<p>The capsule network (CapsNet) is a newly developed deep learning model for digital recognition tasks (<xref ref-type="bibr" rid="B34">Sabour et&#x20;al., 2017</xref>). In the realm of biology, the CapsNet model has been directly applied for protein structure classification and prediction (<xref ref-type="bibr" rid="B13">Dan Rosa de Jesus et&#x20;al., 2018</xref>; <xref ref-type="bibr" rid="B17">Fang et&#x20;al., 2018</xref>) and is ripe for application in network biology and disease biology with data from multi-omics dataset (<xref ref-type="bibr" rid="B8">Camacho et&#x20;al., 2018</xref>). In our previous work, we proposed a modified CapsNet model, called single cell capsule network (scCapsNet), which is suitable for single-cell RNA sequencing (scRNA-seq) data (<xref ref-type="bibr" rid="B40">Wang et&#x20;al., 2020</xref>). The scCapsNet is a highly interpretable cell type classifier, with the capability of revealing cell type associated genes by model internal parameters.</p>
<p>Here, we introduce MultiCapsNet, a deep learning classifier and data integrator built on CapsNet and scCapsNet. As a general framework, the MultiCapsNet model should be able to deal with modular data from multiple sources with different formats and lengths, and give the importance scores of each data source for prediction after training (<xref ref-type="fig" rid="F1">Figures 1B&#x2013;D</xref>). In order to demonstrate its wide biological application, the MultiCapsNet model was tested on three data sets. In the first example, we applied the MultiCapsNet model to the labeled variant call data set, which was originally used to test the models for automating somatic variant refinement (<xref ref-type="bibr" rid="B3">Ainscough et&#x20;al., 2018</xref>). According to data source and data attributes, the 71 features listed in the data set were divided into eight groups. Then the features in one group were viewed as a whole to train the MultiCapsNet model. After training, the performance of our MultiCapsNet matches well with the previous feed forward neural network model and random forest model. As an advantage our MultiCapsNet model directly provides the importance score for each data source, while the previous feed forward neutral network model needs an extra importance determination step through shuffling individual features to do so. Despite that our MultiCapsNet model is substantially different from the previous feed forward neural network model and the source importance measuring methods are also different, the correlation between the importance scores generated by those two models is highly correlated. In the second example, we demonstrate how to integrate prior knowledge and scRNA-seq data through MultiCapsNet model. The protein-protein interactions (PPI) information stored in BIOGRID (<xref ref-type="bibr" rid="B37">Stark et&#x20;al., 2006</xref>) and HPRD (<xref ref-type="bibr" rid="B23">Keshava Prasad et&#x20;al., 2009</xref>), and protein-DNA interactions (PDI) from DREM 2.0 (<xref ref-type="bibr" rid="B35">Schulz et&#x20;al., 2012</xref>), are used as prior knowledge to specify network connections, as in previous work (<xref ref-type="bibr" rid="B26">Lin et&#x20;al., 2017</xref>). In this example, the structures of the first part of the MultiCapsNet model, i.e.,&#x20;the connections between inputs and primary capsules, are determined by the PPI and PDI information. As a result of these specified structures, each primary capsule is labeled either as TF or PPI subnetwork (PPI), and inputs of each primary capsule could be regarded as a data source. We use data from mouse scRNA-seq dataset (<xref ref-type="bibr" rid="B21">Han et&#x20;al., 2018</xref>) to train this MultiCapsNet model and the classification accuracy of MultiCapsNet is comparable to neural network and random forest model. After training, the MultiCapsNet model reveals how each primary capsule, which is labeled either as TF or PPI subnetwork (PPI), contributes to cell type classification. The top contributors of a particular cell type are usually related to that cell type. In the third example, we make a comparison between our MultiCapsNet and the established single-cell regulatory network inference method: SCENIC (Single-cell regulatory network inference and clustering) (<xref ref-type="bibr" rid="B2">Aibar et&#x20;al., 2017</xref>). The results show that many cell types relevant TFs are identified by both methods, which further proves the validity and interpretability of MultiCapsNet.</p>
</sec>
<sec sec-type="methods" id="s2">
<title>Methods</title>
<sec id="s2-1">
<title>Datasets and Data Preprocessing</title>
<p>Labeled variant call dataset from previous work was used to test the MultiCapsNet model (<xref ref-type="bibr" rid="B3">Ainscough et&#x20;al., 2018</xref>). This dataset contains more than 41,000 samples, which are assembled to train models for automating somatic variant refinement. Each sample in the dataset is manually labeled as one of four tags by the reviewer: &#x201c;somatic&#x201d;, &#x201c;ambiguous&#x2019;, &#x201c;germline&#x201d;, and &#x201c;fail&#x201d;, which represent the confidence of a variant call by upstream somatic variant caller. As in previous work, we merged the variant calls labeled as &#x201c;germline&#x201d; and &#x201c;fail&#x201d; into a class named &#x201c;fail&#x201d;. The number of instances in each class are around 10,000, 13,000, 18,000 for &#x201c;ambiguous&#x201d;, &#x201c;fail&#x201d;, and &#x201c;somatic&#x201d;. There are 71 features that are associated with each sample, including cancer types, reviewers, tumor read depth, normal read depth, and so on. According to the data sources and data attributes, we divided these 71 features into eight groups (<xref ref-type="sec" rid="s10">Supplementary Table S1</xref>). Group 1 contains nine cancer types, and is encoded as one-hot encoding vector. We call group 1 as &#x201c;Disease&#x201d; because it indicates the disease to which each variant call belongs. Group 2 contains four reviewers, and is encoded as one-hot encoding vector. We call group 2 as &#x201c;Reviewer&#x201d;. Group 3 contains information of &#x201c;normal VAF&#x201d;, &#x201c;normal depth&#x201d;, &#x201c;normal other bases count&#x201d;, and is called as &#x201c;Normal_pro&#x201d;, short for &#x201c;Normal properties&#x201d;. Group 4 contains 13 features that describe reference reads in normal, including base quality, mapping quality, numbers of mismatches, numbers of minus and plus strand, and so on. We call group 4 as &#x201c;Normal_ref&#x201d;. Group 5 contains 13 features extracted from variant reads in normal, also including base quality, mapping quality, numbers of mismatches, numbers of minus and plus strand, and so on. We call group 5 as &#x201c;Normal_var&#x201d;. The last three groups contain features drawn from tumor instead of normal in previous three groups. As same as Group 3, 4, and 5, we label group 6, 7, and 8 as &#x201c;Tumor_pro&#x201d;, &#x201c;Tumor_ref&#x201d;, and &#x201c;Tumor_var&#x201d; respectively.</p>
<p>The mouse scRNA-seq is measured by Microwell-Seq (<xref ref-type="bibr" rid="B21">Han et&#x20;al., 2018</xref>). We downloaded scRNA-seq data and the annotation information through the link provided by the authors (<ext-link ext-link-type="uri" xlink:href="https://fshare.com/s/865e694ad06d5857db4b">https://figshare.com/s/865e694ad06d5857db4b</ext-link>). Then we use the annotation information to select parts of data from whole dataset. The cell types we chose include &#x201c;Cartilage cell&#x201d;, &#x201c;Secretory alveoli cell&#x201d;, &#x201c; Epithelial cell &#x201d;, &#x201c;Kupffer cell&#x201d;, &#x201c;Muscle cell&#x201d;, &#x201c;Dendritic cell&#x201d;, &#x201c; Spermatocyte&#x201d;, and the number of instances in each cell type are 527, 1,195, 1,219, 356, 626, 717, 353. Moreover, we only use the genes contained in prior knowledge (<xref ref-type="bibr" rid="B26">Lin et&#x20;al., 2017</xref>) to fit the model structure, and set the default value to zero when the downloaded scRNA-seq data does not contain this gene (<xref ref-type="bibr" rid="B21">Han et&#x20;al., 2018</xref>).</p>
<p>A SCENIC example dataset was used to compare the performances of MultiCapsNet and SCENIC (<ext-link ext-link-type="uri" xlink:href="https://scenic.aertslab.org/examples/">https://scenic.aertslab.org/examples/</ext-link>). The dataset (sceMouseBrain.RData) contains seven cell types of mouse cortex and hippocampus (<xref ref-type="bibr" rid="B41">Zeisel et&#x20;al., 2015</xref>) [&#x201c;astrocytes_ependymal&#x201d; (224), &#x201c;endothelial_mural&#x201d; (235), &#x201c;interneurons&#x201d; (290), &#x201c;microglia&#x201d; (98), &#x201c;oligodendrocytes&#x201d; (820), &#x201c;pyramidal_CA1&#x201d; (939), and &#x201c;pyramidal_SS&#x201d; (399)].</p>
</sec>
<sec id="s2-2">
<title>The Architecture and Parameters of the MultiCapsNet Model</title>
<p>In the architecture of our multiCapsNet model, there are <italic>l</italic> neural networks corresponding to <italic>l</italic> input modular data.<disp-formula id="e1">
<mml:math id="m1">
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mi mathvariant="italic">tanh</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:mn>1,2</mml:mn>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
<italic>x</italic>
<sub>
<italic>i</italic>
</sub> represents <italic>i</italic>&#x2019;s input modular data. <inline-formula id="inf1">
<mml:math id="m2">
<mml:mrow>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> represents weight matrices of neural networks with dimension (<italic>n, r</italic>
<sub>
<italic>i</italic>
</sub>), where the <italic>r</italic>
<sub>
<italic>i</italic>
</sub> is the length of the input modular data <italic>x</italic>
<sub>
<italic>i</italic>
</sub>. The output <inline-formula id="inf2">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> of each neural network <inline-formula id="inf3">
<mml:math id="m4">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:mn>1,2</mml:mn>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is a vector with length <italic>n</italic>, viewed as &#x201c;primary capsule&#x201d; in the model. The inputs standardization part converts the modular data with different type and length into real valued vectors with equal length n (<italic>n</italic>&#x20;&#x3d; 8 by default).</p>
<p>The standardized information is subsequently delivered through primary capsule to the capsule in the final layer by &#x201c;dynamic routing&#x201d; (<xref ref-type="sec" rid="s10">Supplementary Figure S1</xref>). Each capsule in the final layer, named &#x201c;type capsule&#x201d;, corresponds to each cell type. They are denoted as vectors <inline-formula id="inf4">
<mml:math id="m5">
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf5">
<mml:math id="m6">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:mn>1,2</mml:mn>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf6">
<mml:math id="m7">
<mml:mi>k</mml:mi>
</mml:math>
</inline-formula> is the number of cell types and <italic>m</italic> is the length of vectors. The capsule network module is implemented in Keras (<ext-link ext-link-type="uri" xlink:href="https://github.com/bojone/Capsule">https://github.com/bojone/Capsule</ext-link>).</p>
<p>Prior to the &#x201c;dynamic routing&#x201d; process, the primary capsules are multiplied by weight matrices <inline-formula id="inf7">
<mml:math id="m8">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> to produce &#x201c;prediction vectors&#x201d; <inline-formula id="inf8">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>u</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.<disp-formula id="e2">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>u</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>Then the iterative dynamic routing begins. Firstly, the &#x201c;coupling coefficients&#x201d; <inline-formula id="inf9">
<mml:math id="m11">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>is calculated by formula:<disp-formula id="e3">
<mml:math id="m12">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>exp</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>exp</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>Where <inline-formula id="inf10">
<mml:math id="m13">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is an intermediate parameter with initial value of zero, representing the inner product of the prediction vector and type capsule vector.</p>
<p>In order to compute the <inline-formula id="inf11">
<mml:math id="m14">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> for next round iteration, the weighted sum <inline-formula id="inf12">
<mml:math id="m15">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> over all <inline-formula id="inf13">
<mml:math id="m16">
<mml:mi>k</mml:mi>
</mml:math>
</inline-formula> prediction vectors <inline-formula id="inf14">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>u</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is calculated by formula:<disp-formula id="e4">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mi>i</mml:mi>
</mml:munder>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>u</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
<p>Secondly <inline-formula id="inf15">
<mml:math id="m19">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is computed by the dot product of <inline-formula id="inf16">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>u</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf17">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> as the last step of one round dynamic routing process.<disp-formula id="e5">
<mml:math id="m22">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>u</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>.</mml:mo>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
</p>
<p>After several rounds of dynamic routing, the type capsule <inline-formula id="inf18">
<mml:math id="m23">
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is calculated by a non-linear &#x201c;squashing&#x201d; function:<disp-formula id="e6">
<mml:math id="m24">
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo>&#x2016;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>&#x2016;</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mn>0.5</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo>&#x2016;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>&#x2016;</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo>&#x2016;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>&#x2016;</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<p>The following pseudocode illustrates the implementation of MultiCapsNet.<list list-type="simple">
<list-item>
<p>1) <inline-formula id="inf19">
<mml:math id="m25">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>y</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>:</mml:mo>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>2) for <inline-formula id="inf20">
<mml:math id="m26">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>y</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>and type capsule <italic>j</italic>: <inline-formula id="inf21">
<mml:math id="m27">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>u</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>3) procedure ROUTING<inline-formula id="inf22">
<mml:math id="m28">
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>u</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>4) for all primary capsule I and type capsule <italic>j</italic>: <inline-formula id="inf23">
<mml:math id="m29">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2190;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>5) For <inline-formula id="inf24">
<mml:math id="m30">
<mml:mi>r</mml:mi>
</mml:math>
</inline-formula> iterations&#x20;do</p>
</list-item>
<list-item>
<p>6) for all primary capsule <italic>i</italic>: <inline-formula id="inf25">
<mml:math id="m31">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> &#x2190; softmax <inline-formula id="inf26">
<mml:math id="m32">
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>7) for all type capsule <italic>j</italic>: <inline-formula id="inf27">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x2190;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mi>i</mml:mi>
</mml:munder>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>u</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>8) for all primary capsule <italic>i</italic> and type capsule <italic>j</italic>: <inline-formula id="inf28">
<mml:math id="m34">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x2190;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>u</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>.</mml:mo>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>return <inline-formula id="inf29">
<mml:math id="m35">
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x2190;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> squash&#x20;(<inline-formula id="inf30">
<mml:math id="m36">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>)</p>
</list-item>
</list>
</p>
<p>The implementation of MultiCapsNet can be found in <ext-link ext-link-type="uri" xlink:href="https://github.com/wanglf19/MultiCapsNet">https://github.com/wanglf19/MultiCapsNet</ext-link>.</p>
</sec>
<sec id="s2-3">
<title>MultiCapsNet Model in Somatic Variant Refinement Task</title>
<p>In the somatic variant refinement task, the eight groups mentioned above in the section of &#x201c;Datasets and data preprocessing&#x201d; correspond to eight input sources. Therefore, there are eight neural networks corresponding to eight groups of input modular data (<italic>l &#x3d; 8</italic>).<disp-formula id="e7">
<mml:math id="m37">
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mi mathvariant="italic">tanh</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:mn>1,2</mml:mn>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mn>8</mml:mn>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
</p>
<p>After the input standardization part, the input data <italic>x</italic>
<sub>
<italic>i</italic>
</sub> is converted into a primary capsule <italic>u</italic>
<sub>
<italic>i</italic>
</sub> having the same length. Next, the standardized information stored in the primary capsules would be delivered to the final layer capsules by &#x201c;dynamic routing&#x201d;. The capsules in the final layer, which corresponds to labels of variant calls, is called &#x201c;label capsule&#x201d;. In capsnet, the non-linear &#x201c;squashing&#x201d; function ensure that short vectors get shrunk to almost zero length and long vectors get shrunk to a length slightly below 1 (<xref ref-type="bibr" rid="B34">Sabour et al., 2017</xref>). The length of the label capsule represents the probability that a variant call is either &#x201c;ambiguous&#x201d;,&#x201c;fail&#x201d;, or &#x201c;somatic&#x201d; (<xref ref-type="fig" rid="F2">Figure&#x20;2</xref>). To evaluate the performance of the model, we use the &#x201c;area under the curve&#x201d; (AUC) score as previous (<xref ref-type="bibr" rid="B3">Ainscough et&#x20;al., 2018</xref>) and prediction accuracy.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Architecture of MultiCapsNet with two layers. The first layer consists of eight parallel neural networks, corresponding to eight data sources (groups). The outputs of neural networks are the primary capsules (real valued vectors) with equal length. The second layer is the Keras implementation of CapsNet for classification. The length of each label capsule represents the probability that the input data belongs to the corresponding classification category.</p>
</caption>
<graphic xlink:href="fgene-12-767602-g002.tif"/>
</fig>
</sec>
<sec id="s2-4">
<title>MultiCapsNet Model That Integrates Prior Knowledge</title>
<p>The MultiCapsNet could integrate prior knowledge into its structure. In brief, PPI information store in BIOGRID (<xref ref-type="bibr" rid="B37">Stark et&#x20;al., 2006</xref>) and HPRD (<xref ref-type="bibr" rid="B23">Keshava Prasad et&#x20;al., 2009</xref>), and PDI coming from DREM 2.0 (<xref ref-type="bibr" rid="B35">Schulz et&#x20;al., 2012</xref>), are used as prior knowledge for specifying network connections between the inputs and the primary capsules (<xref ref-type="fig" rid="F4">Figure&#x20;4A</xref>), just as previous work used this prior knowledge to specify network connections between the inputs and neurons (<xref ref-type="bibr" rid="B26">Lin et&#x20;al., 2017</xref>). For example, the prior knowledge indicates that Gene<sub>1</sub>,&#x2026;, Gene<sub>n</sub> are regulated by a TF (colored with green), so there are connections between Gene<sub>1</sub>,&#x2026;, Gene<sub>n</sub> and primary capsule representing corresponding TF (green connection); the prior knowledge indicate that Gene<sub>2</sub>,&#x2026;, Gene<sub>n</sub> are regulated by a TF (colored with blue), then there are connections between Gene<sub>2</sub>,&#x2026;, Gene<sub>n</sub> and primary capsule representing corresponding TF (blue connection); and the prior knowledge indicates that Gene<sub>2</sub>, Gene<sub>3</sub>,&#x2026;, are in a subnetwork of PPI network (colored with red), then there are connections between Gene<sub>2</sub>, Gene<sub>3</sub>,&#x2026;, and primary capsule representing corresponding PPI subnetwork (red connection). Although there is only one input source, namely scRNA-seq data, the input source can be decomposed into several parts by integrating prior knowledge, and each part is connected to a primary capsule. Therefore, we also took a single input source integrated with prior knowledge as an input from multiple sources, each of which is associated with a TF or a PPI subnetwork (<xref ref-type="fig" rid="F4">Figure&#x20;4B</xref>).</p>
<p>In total there are 696 input modular data, with 348&#x20;TF-targets relationships extracted from PDI information and 348 PPI subnetworks. Therefore, there are 696 neural networks corresponding to 696 modular data (<italic>l &#x3d; 696</italic>).<disp-formula id="e8">
<mml:math id="m38">
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="italic">tanh</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:mn>1,2</mml:mn>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mn>696</mml:mn>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
</p>
<p>After the input standardization part, the input data <italic>x</italic>
<sub>
<italic>i</italic>
</sub> is converted into primary capsule <italic>u</italic>
<sub>
<italic>i</italic>
</sub> with the same length. Next, the standardized information stored in the primary capsules would be delivered to the final layer capsules by &#x201c;dynamic routing&#x201d;. The capsules in the final layer, which correspond to cell types, is called &#x201c;type capsule&#x201d;.</p>
</sec>
<sec id="s2-5">
<title>MultiCapsNet Model Compared with SCENIC</title>
<p>The SCENIC is a workflow for simultaneous reconstruction of gene regulatory networks and identification of cell states using scRNA-seq data (<xref ref-type="bibr" rid="B2">Aibar et&#x20;al., 2017</xref>). The workflow consists of three modules (R/bioconductor packages): GENIE3 (GRNboost), RcisTarget, AUCell. The first two modules were responsible to find potential TF-targets relationships based on co-expression and subsequently select the highly confident TF-target regulation according to TF-motif enrichment analysis. After that, several potential TF-target relationships across all cell types, called regulons, were identified in the dataset. The AUCell would score the activity of these regulons in each single cell. Finally, the unsupervised method is used to cluster cell, identify cell types and states based on the scores of the regulongs, which are used as features for each cell. In our model, we utilized the regulon information identified by the first two modules of SCENIC as the prior knowledge to specify the connections between input and primary capsules (<xref ref-type="sec" rid="s10">Supplementary Figure S2A</xref>). The dataset, intermediate results and the output of SCENIC for a mouse brain example were downloaded from the website (<ext-link ext-link-type="uri" xlink:href="https://scenic.aertslab.org/examples/">https://scenic.aertslab.org/examples/</ext-link>). The regulon information was extracted from the intermediate result file (regulons_asGeneSet.Rds).</p>
<p>In total there are 253 regulons, which specify TFs and their target genes. Therefore, there are 253 neural networks corresponding to 253 modular data (<italic>l &#x3d;</italic> 253).<disp-formula id="e9">
<mml:math id="m39">
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="italic">tanh</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:mn>1,2</mml:mn>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mn>253</mml:mn>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>
</p>
<p>After the input standardization part, the input data <italic>x</italic>
<sub>
<italic>i</italic>
</sub> is converted into primary capsule <italic>u</italic>
<sub>
<italic>i</italic>
</sub> with same length. Next, the standardized information stored in the primary capsules would be delivered to the final layer capsules by &#x201c;dynamic routing&#x201d;. The capsules in the final layer, which correspond to cell types, is called &#x201c;type capsule&#x201d;.</p>
</sec>
<sec id="s2-6">
<title>Average Coupling Coefficients and Data Source Importance</title>
<p>In scCapsNet, we showed that the average coupling coefficients represent the contribution of the primary capsules to the final layer type capsules for each cell type (<xref ref-type="bibr" rid="B40">Wang et&#x20;al., 2020</xref>). Similarly, in the multiCapsNet model, the type (label) capsule <inline-formula id="inf31">
<mml:math id="m40">
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> derives from a weighted sum of prediction vectors <inline-formula id="inf32">
<mml:math id="m41">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>u</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. The weights are the coupling coefficients <inline-formula id="inf33">
<mml:math id="m42">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and the magnitude of these coefficients could roughly be regarded as the contribution of the primary capsules <inline-formula id="inf34">
<mml:math id="m43">
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> to the type capsules <inline-formula id="inf35">
<mml:math id="m44">
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Each sample (single cell, somatic variant) generates its own coupling coefficients. The average coupling coefficients for samples with same type (label) are calculated by the formular:<disp-formula id="e10">
<mml:math id="m45">
<mml:mrow>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>y</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>y</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>y</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>y</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mn>1</mml:mn>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>
</p>
<p>Therefore, each classification category (cell type/variant call label) corresponds to an average coupling coefficients matrix (<inline-formula id="inf36">
<mml:math id="m46">
<mml:mrow>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>y</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>), called type average coupling coefficients, with rows representing type capsules and columns representing primary capsules. The type average coupling coefficients matrix could be plotted as heatmap for visualization of data. For each classification category (cell type/variant call label), the corresponding type average coupling coefficients matrix contain an effective type capsule row, which is the row whose&#x20;type is consistent with this classification category. For example, the effective type capsule row in the type average coupling coefficients matrix (<inline-formula id="inf37">
<mml:math id="m47">
<mml:mrow>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>y</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>) is&#x20;the row <inline-formula id="inf38">
<mml:math id="m48">
<mml:mrow>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>y</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>. In this row, the magnitude of each element could be regarded as the importance score of the corresponding primary capsule to this classification category. The effective type&#x20;capsule rows of all classification categories (<inline-formula id="inf39">
<mml:math id="m49">
<mml:mrow>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>y</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>y</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mn>3</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>y</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mn>3</mml:mn>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>&#x2026;) could be organized into a new matrix, visually represented as an overall heatmap.</p>
</sec>
<sec id="s2-7">
<title>Algorithm Implementation for Comparisons</title>
<p>A neural network with sigmoid activation function was implemented in Keras. The random forest and nearest-neighbour are implemented with the Python package &#x201c;scikit-learn&#x201d;. The comparison transformers model was originally used for IMDB movie review sentiment classification dataset. This&#x20;transformer model contains the embedding layer for embedding the words into vectors and the Multi-head attention layer (<ext-link ext-link-type="uri" xlink:href="https://github.com/bojone/attention/">https://github.com/bojone/attention/</ext-link>). We replace the embedding layer with our data standardization layer, and retain the Multi-head attention layer for classification.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<sec id="s3-1">
<title>MultiCapsNet Achieves High Classification Accuracy and High Interpretability for Modular Data From Variant Call Dataset</title>
<p>The variant call dataset (Please refer to Datasets and data preprocessing in METHODS section for the details) was randomly divided into training set and validation set with a ratio of 9:1. Our MultiCapsNet model performs well in the classification of variant call (<xref ref-type="fig" rid="F4">Figure&#x20;4</xref>). The results show that the AUC of the MultiCapsNet model is 0.94, 0.99, and 0.97, respectively, in the classification categories of &#x201c;ambiguous&#x201d;, &#x201c;fail&#x201d;, and &#x201c;somatic&#x201d; (<xref ref-type="fig" rid="F4">Figure&#x20;4A</xref>). These AUC scores are similar with those obtained by the Multi-head Attention model (0.93, 0.98, 0.96), feed forward neural network (0.93, 0.99, 0.96), and random forest (0.96, 0.99, 0.98) (<xref ref-type="bibr" rid="B3">Ainscough et&#x20;al., 2018</xref>). Meanwhile, the average prediction accuracy of the MultiCapsNet model is around 0.873, similar to those obtained by the Multi-head Attention model (0.866), and slightly lower than that of feed forward neural network (0.887), and random forest (0.895).</p>
<p>In MultiCapsNet, the coupling coefficient <italic>c</italic>
<sub>
<italic>ij</italic>
</sub> is viewed as important scores, which is the weight that measure the contribution of each primary capsule to the final layer type capsule. Each input would generate its own coupling coefficient, and the type average coupling coefficient is the average over all the inputs with same classification category. After MultiCapsNet model training, the type average coupling coefficients for each variant label (&#x201c;ambiguous&#x201d;, &#x201c;fail&#x201d;, and &#x201c;somatic&#x201d;) were calculated and visualized as heatmaps (<xref ref-type="sec" rid="s10">Supplementary Figure S3A</xref>) (Please refer to &#x201c;METHODS&#x201d; section for the detailed calculation formula of type average coupling coefficients). In each type average coupling coefficient, the most important row, named as &#x201c;effective type capsule row&#x201d;, is the row whose type is consistent with this classification category. The overall heatmap is assembled with the &#x201c;effective type capsule row&#x201d; which describes the importance scores of all the data sources for distinct category classification (<xref ref-type="sec" rid="s10">Supplementary Figure S3B</xref>). Therefore, the overall heatmap also shows the contribution of each data source to the recognition of each variant labels (&#x201c;ambiguous&#x201d;, &#x201c;fail&#x201d;, and &#x201c;somatic&#x201d;). For example, the data source of &#x201c;Disease&#x201d; has the contribution to the classification of &#x201c;somatic&#x201d; category and the &#x201c;Reviewer&#x201d; source contributes to the classification of &#x201c;ambiguous&#x201d; category. The &#x201c;Tumor_var&#x201d; source is the most important one for the classification of all the three categories (<xref ref-type="sec" rid="s10">Supplementary Figure S3B</xref>). Over 9 repetitions, the values of each row in 9 overall heatmap are averaged to determine the importance scores of each data sources for the classification of all the categories in MultiCapsNet model (<xref ref-type="fig" rid="F3">Figure&#x20;3B</xref>). In feed forward neural network model, the feature importance is measured by average change of AUC after randomly shuffling individual features. Based on the step of features grouping, we added the feature importance scores belonging to the same group together, and take these values as importance of data sources (each group) in feed forward neural networks model (<xref ref-type="fig" rid="F3">Figure&#x20;3B</xref>). Then, we calculated the correlation between the data source importance scores obtained by our MultiCapsNet model and those provided by feed forward neural network model. Although our MultiCapsNet model is substantially different from the previous feed forward neural network, and the source importance measuring methods are also different, there is very high correlation between them (Pearson Correlation Coefficient &#x3d; 0.876) (<xref ref-type="fig" rid="F4">Figure&#x20;4B</xref>). Both models indicate that tumor variant group is very important for variant call classification.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Architecture of MultiCapsNet integrated with prior knowledge. <bold>(A)</bold> The model has two layers. The first layer consists of 696 parallel neural networks corresponding to 696 primary capsules labeled with either transcription factor (348) or protein-protein interaction cluster node (348). The inputs of each primary capsule include genes regulated by a transcription factor or in a protein-protein interactions sub-network. The second layer is the Keras implementation of CapsNet for classification. The length of each final layer type capsule represents the probability of input data belonging to the corresponding classification category. <bold>(B)</bold> Alternative representation of MultiCapsNet integrated with prior knowledge. Genes that are regulated by a transcription factor or in a protein-protein interactions sub-network, are groups together as a data source for MultiCapsNet. <xref ref-type="fig" rid="F3">Figures 3A,B</xref> are equivalent with different representation.</p>
</caption>
<graphic xlink:href="fgene-12-767602-g003.tif"/>
</fig>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>The comparison between MultiCapsNet and feed forward neural network shows the high performance and interpretability of MultiCapsNet. <bold>(A)</bold> The AUC scores demonstrate that the MultiCapsNet model achieves very high classification performances in all three classification categories. <bold>(B)</bold> The normalized group (data source) importance scores generated by MultiCapsNet and feed forward neural network are highly correlated.</p>
</caption>
<graphic xlink:href="fgene-12-767602-g004.tif"/>
</fig>
</sec>
<sec id="s3-2">
<title>MultiCapsNet Integrated with Prior Knowledge Could Function as Classifier and Identify Cell Type Relevant TF</title>
<p>The dataset is a portion of mouse scRNA-seq data measured by Microwell-Seq, which consists of nearly 5,000 cells of seven types and 9,437 genes (Please refer to METHODS section for the details). The MultiCapsNet model that integrates prior knowledge (<xref ref-type="fig" rid="F4">Figure&#x20;4</xref>) was trained and tested by using this dataset. The average validation accuracy and F1 score are around 97%, comparable with those generated by the feed forward neural network, Multi-head Attention model and random forest (<xref ref-type="sec" rid="s10">Supplementary Figures S4A, B</xref>). After training, the average coupling coefficients, which represent the contribution of the primary capsules (TF/PPI) to the type capsules (Cell type), were calculated and visualized as heatmaps for each cell type (<xref ref-type="fig" rid="F5">Figure&#x20;5A</xref>). In each heatmaps, we should clearly observe that the high value elements in the average coupling coefficients (dark line in the plot) are exclusively located in the effective type capsule row. Then, the corresponding type capsule row was selected from each heat map in <xref ref-type="fig" rid="F5">Figure&#x20;5A</xref>, and organized into an overall heatmap (<xref ref-type="fig" rid="F5">Figure&#x20;5B</xref>).</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>MultiCapsNet integrated with prior knowledge could identify cell type associated transcription factor. <bold>(A)</bold> heatmaps of the matrices of average coupling coefficients for each cell type. In each heatmap, there are 696 columns for 696 primary capsules (TF/PPI) and seven row for seven type capsules (cell types), and each element in the average coupling coefficients is represented by a thin line. The brightness of these thin lines (elements in the average coupling coefficients) indicate the contribution of the primary capsules (TF/PPI) to the specific cell type recognition. The dark lines (high score elements in average coupling coefficients) exclusively reside in the corresponding effective type capsule row in each heatmap. <bold>(B)</bold> Overall heatmap of the combined matrix of average coupling coefficients. The combined matrix contains the effective type capsule rows in <xref ref-type="fig" rid="F5">Figure&#x20;5A</xref> where its recognition type is in accordance with the type of single cells input. <bold>(C)</bold> The table list several top ranked contributors for specific cell type recognition, given by the MuiltCapsNet model, are associated with corresponding cell types which have been reported before.</p>
</caption>
<graphic xlink:href="fgene-12-767602-g005.tif"/>
</fig>
<p>We repeat the training process 9&#x20;times and generate nine overall heatmaps accordingly. Based on the average value of the nine overall heatmaps, the top 10 relevant TFs/PPI subnetwork was generated (<xref ref-type="sec" rid="s10">Supplementary Table S2</xref>). Most of the top 10 relevant TFs/PPI subnetwork were specific to one cell type, and many of them have been reported to be associated with corresponding cell types previously (<xref ref-type="fig" rid="F5">Figure&#x20;5C</xref>). For example, <italic>Gata1</italic> and <italic>Gata2</italic> are top contributors for dendritic cell recognition. Previous work indicated that <italic>Gata1</italic> regulates dendritic cell development and survival (<xref ref-type="bibr" rid="B20">Guti&#xe9;rrez et&#x20;al., 2007</xref>), <italic>Gata2</italic> regulates dendritic cell differentiation (<xref ref-type="bibr" rid="B32">Onodera et&#x20;al., 2016</xref>). <italic>Srf</italic> and <italic>Yy1</italic> are ranked as the top contributors for muscle cell recognition by the model. However, <italic>Srf</italic> is required for skeletal muscle growth and maturation (<xref ref-type="bibr" rid="B25">Li et&#x20;al., 2005</xref>), <italic>Yy1</italic> is associated with increased smooth muscle specific gene expression (<xref ref-type="bibr" rid="B18">Favot et&#x20;al., 2005</xref>). <italic>FoxA2</italic> and <italic>FoxA3</italic> are ranked as top contributors for Cartilage cell recognition, and <italic>FoxA2</italic> and <italic>FoxA3</italic> are necessary to promote high-level expression of several hypertrophic chondrocyte markers (<xref ref-type="bibr" rid="B22">Ionescu et&#x20;al., 2012</xref>). The model reports <italic>Rxrg</italic>, <italic>Rara</italic>, <italic>Rarg</italic>, <italic>Rarb</italic>, <italic>Rxra</italic>, and <italic>Rxrb</italic> as top contributors for Kupffer cell recognition. Previous research report RA receptor (RAR) and retinoid X receptor (RXR) were expressed by Kupffer cells (<xref ref-type="bibr" rid="B39">Ulven et&#x20;al., 1998</xref>; <xref ref-type="bibr" rid="B31">Ohata et&#x20;al., 2000</xref>). <italic>Pgr</italic> is ranked as a top contributor for secretory alveoli cell recognition, and the progesterone receptor (<italic>Pgr</italic>) knockout mouse demonstrated that Pg is required for alveolar morphogenesis (<xref ref-type="bibr" rid="B30">Oakes et&#x20;al., 2006</xref>). <italic>Topors</italic> is ranked as a top contributor for spermatocyte recognition. Previous work indicates <italic>dtopors</italic>, the <italic>Drosophila</italic> homolog of the mammalian <italic>Topors</italic>, plays a structural role in spermatocyte lamina that is critical for multiple aspects of meiotic chromosome transmission (<xref ref-type="bibr" rid="B28">Matsui et&#x20;al., 2011</xref>).</p>
</sec>
<sec id="s3-3">
<title>The Comparison of MultiCapsNet Model with SCENIC Shows That Several Cell Type Relevant TFs Are Identified by Both Methods</title>
<p>To further demonstrate the effectiveness of our MultiCapsNet model to reveal cell type related TFs from scRNA-seq data, we compare it with established single-cell regulatory network inference methods: SCENIC (Single-cell regulatory network inference and clustering) (<xref ref-type="sec" rid="s10">Supplementary Figure S2A</xref>). The scRNA-seq data from mouse cortex and hippocampus were used to evaluate these two methods (Please refer to METHODS section for the details).</p>
<p>After MultiCapsNet training, the average coupling coefficients in the overall heatmap would indicate the most relevant TFs associated with each cell type (<xref ref-type="sec" rid="s10">Supplementary Figure S5</xref>). We repeated the experiment 9 times, the average validation accuracy was 97%, and the average F1 score was around 95%, which were comparable to the results generated by feed forward neural network, Multi-head Attention model and random forest (<xref ref-type="sec" rid="s10">Supplementary Figures S4C, D</xref>). According to the average value of nine overall heatmaps, the top 30 relevant TFs could be generated (<xref ref-type="fig" rid="F6">Figure&#x20;6A</xref> left; <xref ref-type="sec" rid="s10">Supplementary Table S3</xref> top). The original regulon may contain TFs that label the 253 regulons. In order to eliminate the influence caused by the expression of those labeling TF, the potential TF-target relationships that exclude the labeling TF in the set of target genes are also made (<xref ref-type="sec" rid="s10">Supplementary Figure S2B</xref>). We also repeated the training process of MultiCapsNet that integrated with those new potential TF-target relationships. After training, the top 30 relevant TFs could also be generated according to the average value of the nine overall heatmaps (<xref ref-type="fig" rid="F6">Figure&#x20;6A</xref> right; <xref ref-type="sec" rid="s10">Supplementary Table S3</xref> bottom). The results show that the inclusion or exclusion of labeling TF has little influence on prediction accuracy and interpretability of the model. The overlap rates of top 30 most relevant TF of each cell type (around top 10% of total TFs) between model including labeling TF and that excluding labeling TF are very high, around 90% for every cell type (<xref ref-type="fig" rid="F6">Figure&#x20;6B</xref>).</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>The comparison of MultiCapsNet and SCENIC shows the robustness and interpretability of MultiCapsNet. <bold>(A)</bold> Averaged overall heatmaps for mouse cortex and hippocampus dataset show that MultiCapsNet perform consistently whether including<bold>(left)</bold> or excluding <bold>(right)</bold> the labelling TF from regulon. <bold>(B)</bold> The top ranked contributors for specific cell type classification identified from dataset either including <bold>(left)</bold> or excluding <bold>(right)</bold> the labelling TF are highly overlapped. <bold>(C)</bold> The table list several top ranked contributors for specific cell type recognition, given by both the MuiltCapsNet model and SCENIC.</p>
</caption>
<graphic xlink:href="fgene-12-767602-g006.tif"/>
</fig>
<p>Many high score TFs predicted by MultiCapsNet are consistent with that reported by SCENIC (<xref ref-type="bibr" rid="B2">Aibar et&#x20;al., 2017</xref>). For example, in both methods, <italic>Rorb</italic> is identified as a relevant TF for astrocytes; <italic>Ets1</italic>, <italic>Elk3</italic>, and <italic>Gata2</italic> are identified as relevant TFs for endothelial-mural cells; <italic>Zmat4</italic>, <italic>Dlx5</italic>, <italic>Dlx2</italic>, and <italic>Dlx1</italic> are identified as relevant TFs for interneurons; <italic>Maf</italic>, <italic>Rel</italic>, <italic>Cebpa</italic>, <italic>Cebpb</italic>, <italic>Nfatc2</italic>, <italic>Prdm1</italic>, <italic>Nfkb1</italic>, and <italic>Stat6</italic> are identified as relevant TFs for microglia; <italic>Sox10</italic> and <italic>Sox8</italic> are identified as relevant TFs for oligodendrocytes. Besides the TFs listed above, MultiCapsNet&#x20;also detected several high confidence cell type relevant TFs that are also found by SCENIC. For example, <italic>Rfx3</italic> shows a high association with both pyramidal SS and CA1 cells. Previous studies reported that downstream target of <italic>Rfx3</italic> displayed cytosolic expression in pyramidal neurons (<xref ref-type="bibr" rid="B33">Remnest&#xe5;l, 2015</xref>) and <italic>Rfx3</italic> expresses in cortical pyramidal neurons (<xref ref-type="bibr" rid="B7">Benadiba et&#x20;al., 2012</xref>). <italic>Neurod2</italic> is also identified as a relevant TF for both pyramidal SS and CA1 cells. Previous studies reported that <italic>Neurod2</italic> coordinates synaptic innervation and cell intrinsic properties to control excitability of cortical pyramidal neurons (<xref ref-type="bibr" rid="B9">Chen et&#x20;al., 2016</xref>). <italic>Cux1</italic> has been identified as a relevant TF for pyramidal SS cells, and <italic>Cux1</italic> has been reported as a restricted molecular marker for the upper layer (II-IV) pyramidal neurons in murine cerebral cortex (<xref ref-type="bibr" rid="B24">Li et&#x20;al., 2010</xref>). <italic>smarca4</italic> has been identified as relevant TF for pyramidal CA1 cells, and <italic>Brg1/smarca4</italic> deficiency leads to mouse pyramidal neuron degeneration (<xref ref-type="bibr" rid="B14">Deng et&#x20;al., 2015</xref>). <italic>Ezh2</italic> has been suggested as a relevant TF for oligodendrocytes, and the expression of <italic>Ezh2</italic> in OPCs (oligodendrocytes precursor cells), even up to the stage of pre-myelinating immature oligodendrocytes, remains high (<xref ref-type="bibr" rid="B12">Copray et&#x20;al., 2009</xref>) (<xref ref-type="fig" rid="F6">Figure&#x20;6C</xref>). Furthermore, the MultiCapsNet found that <italic>Rpp25</italic> is strongly associated with interneurons which SCENIC did not, and <italic>Rpp25</italic> has been reported up-regulated in GABAergic interneuron (<xref ref-type="bibr" rid="B19">Fukumoto et&#x20;al., 2018</xref>).</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>Discussion</title>
<p>In the first example, we demonstrated that the proposed MultiCapsNet model performed well in the variant call classification. Data sources with different data types, such as one-hot encoding vector and real valued vectors, could be standardized into equal length vectors as primary capsules, and then pass the information into final layer capsules by dynamic routing. The importance of the data sources was measured by the sum of the overall average coupling coefficients as the co-product of the model training. These importance scores are highly correlated with the importance scores calculated by feed forward neural network, which are measured by average change in the AUC after randomly shuffling individual features.</p>
<p>In the second example, we incorporated PPI and PDI information into the structure of the MultiCapsNet model. This specified structure decomposed the input scRNA-seq data into several parts, each part corresponding to a group of genes regulated by a TF or from a protein interaction sub-network. Therefore, each part of the decomposition input was regarded as a data source, and the associated primary capsule could be marked as corresponding TF or PPI subnetwork. Although the number of the primary capsules was one order of magnitude more than that of previous CapsNet model, the model performed well, and its classification accuracy was comparable with those generated by feed forward neural network and random forest. After training, the contributions of each primary capsule and its corresponding data source to the cell type recognition were revealed by the MultiCapsNet model as co-product of classification. The TF or the PPI subnetwork that labeled the top ranked contributors were often relevant to the cell type they contributed. The comparison of our MultiCapsNet model with SCENIC showed several cell type relevant TFs identified by both methods, which further proves the validity and interpretability of the MultiCapsNet&#x20;model.</p>
<p>To sum up, our MultiCapsNet model could integrate multiple input sources and standardize the inputs, then use the standardized information for classification through capsule network. In the variant call classification example, the data types are limited to one-hot encoding vectors or real valued vectors. With appropriate dataset, the MultiCapsNet could integrate and standardize more data types, such as sequence data, which can be integrated through convolutional neural network. In addition, our MultiCapsNet model could also incorporate the prior knowledge through adjusting the connection between layers according to the specification of the prior knowledge. In the example of scRNA-seq, we include only PPI and PDI information. In the future, the complex and hierarchical information of biological network will be introduced into the MultiCapsNet model to better understand the intricacies of disease biology (<xref ref-type="bibr" rid="B8">Camacho et&#x20;al., 2018</xref>). Compared with other interpretable machine learning methods, MultiCapsNet could obtain similar classification accuracy under the condition of modular inputs, making it more suitable for the modular biological&#x20;data.</p>
<p>MultiCapsNet model provides a framework for data integration, especially for multi-omics datasets, which have data from different sources and with different types and formats, or require prior knowledge. Once the data could be transformed into real valued vectors through trainable parameters, the data and transformation process could be integrated into the MultiCapsNet model as a building block. In this sense, the MultiCapsNet model possesses enormous flexibility, and is applicable in many scenes, let&#x20;alone that it can measure the importance of data sources accompanying the training step without any extra calculation&#x20;step.</p>
</sec>
</body>
<back>
<sec id="s5">
<title>Data Availability Statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: <ext-link ext-link-type="uri" xlink:href="https://github.com/wanglf19/MultiCapsNet">https://github.com/wanglf19/MultiCapsNet</ext-link>.</p>
</sec>
<sec id="s6">
<title>Author Contributions</title>
<p>JC, JZ, and LW envisioned the project. LW implemented the model and performed the analysis. LW and JC wrote the paper. XM, RN, ZZ, and JZ provided assistance in writing and analysis.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>This work was supported by grants from the Strategic Priority Research Program of the Chinese Academy of Sciences (XDB38030400 to C.J.); the National Key R&#x26;D Program of China (2018YFC0910402 to C.J.); the National Natural Science Foundation of China (32070795 to C.J.&#x20;and 61673070 to&#x20;JZ).</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2021.767602/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2021.767602/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet2.PDF" id="SM1" mimetype="application/PDF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet1.XLSX" id="SM2" mimetype="application/XLSX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aebersold</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Mann</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Mass-spectrometric Exploration of Proteome Structure and Function</article-title>. <source>Nature.</source> <volume>537</volume>, <fpage>347</fpage>&#x2013;<lpage>355</lpage>. <pub-id pub-id-type="doi">10.1038/nature19949</pub-id> </citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aibar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Gonz&#xe1;lez-Blas</surname>
<given-names>C. B.</given-names>
</name>
<name>
<surname>Moerman</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Huynh-Thu</surname>
<given-names>V. A.</given-names>
</name>
<name>
<surname>Imrichova</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Hulselmans</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>SCENIC: Single-Cell Regulatory Network Inference and Clustering</article-title>. <source>Nat. Methods.</source> <volume>14</volume>, <fpage>1083</fpage>&#x2013;<lpage>1086</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.4463</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ainscough</surname>
<given-names>B. J.</given-names>
</name>
<name>
<surname>Barnell</surname>
<given-names>E. K.</given-names>
</name>
<name>
<surname>Ronning</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Campbell</surname>
<given-names>K. M.</given-names>
</name>
<name>
<surname>Wagner</surname>
<given-names>A. H.</given-names>
</name>
<name>
<surname>Fehniger</surname>
<given-names>T. A.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>A Deep Learning Approach to Automate Refinement of Somatic Variant Calling from Cancer Sequencing Data</article-title>. <source>Nat. Genet.</source> <volume>50</volume>, <fpage>1735</fpage>&#x2013;<lpage>1743</lpage>. <pub-id pub-id-type="doi">10.1038/s41588-018-0257-y</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alipanahi</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Delong</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Weirauch</surname>
<given-names>M. T.</given-names>
</name>
<name>
<surname>Frey</surname>
<given-names>B. J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Predicting the Sequence Specificities of DNA- and RNA-Binding Proteins by Deep Learning</article-title>. <source>Nat. Biotechnol.</source> <volume>33</volume>, <fpage>831</fpage>&#x2013;<lpage>838</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.3300</pub-id> </citation>
</ref>
<ref id="B5">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Jabeen</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ahmad</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Raza</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2018</year>). "<article-title>Machine Learning-Based State-of-the-Art Methods for the Classification of RNA-Seq Data</article-title>," <source>Classification in BioApps. Lecture Notes in Computational Vision and Biomechanicsin</source>. Editors <person-group person-group-type="editor">
<name>
<surname>Dey</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Ashour</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Borra</surname>
<given-names>S.</given-names>
</name>
</person-group> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <volume>vol. 26</volume>. <pub-id pub-id-type="doi">10.1007/978-3-319-65981-7_6</pub-id> </citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Angermueller</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>H. J.</given-names>
</name>
<name>
<surname>Reik</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Stegle</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>DeepCpG: Accurate Prediction of Single-Cell DNA Methylation States Using Deep Learning</article-title>. <source>Genome Biol.</source> <volume>18</volume>, <fpage>67</fpage>. <pub-id pub-id-type="doi">10.1186/s13059-017-1189-z</pub-id> </citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Benadiba</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Magnani</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Niquille</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Morl&#xe9;</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Valloton</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Nawabi</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>The Ciliogenic Transcription Factor RFX3 Regulates Early Midline Distribution of Guidepost Neurons Required for Corpus Callosum Development</article-title>. <source>Plos Genet.</source> <volume>8</volume>, <fpage>e1002606</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pgen.1002606</pub-id> </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Camacho</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>Collins</surname>
<given-names>K. M.</given-names>
</name>
<name>
<surname>Powers</surname>
<given-names>R. K.</given-names>
</name>
<name>
<surname>Costello</surname>
<given-names>J.&#x20;C.</given-names>
</name>
<name>
<surname>Collins</surname>
<given-names>J.&#x20;J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Next-Generation Machine Learning for Biological Networks</article-title>. <source>Cell.</source> <volume>173</volume>, <fpage>1581</fpage>&#x2013;<lpage>1592</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2018.05.015</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Moran</surname>
<given-names>J.&#x20;T.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ates</surname>
<given-names>K. M.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Schrader</surname>
<given-names>L. A.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>The Transcription Factor NeuroD2 Coordinates Synaptic Innervation and Cell Intrinsic Properties to Control Excitability of Cortical Pyramidal Neurons</article-title>. <source>J.&#x20;Physiol.</source> <volume>594</volume>, <fpage>3729</fpage>&#x2013;<lpage>3744</lpage>. <pub-id pub-id-type="doi">10.1113/jp271953</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>H.-I. H.</given-names>
</name>
<name>
<surname>Chiu</surname>
<given-names>Y.-C.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>GSAE: an Autoencoder with Embedded Gene-Set Nodes for Genomics Functional Characterization</article-title>. <source>BMC Syst. Biol.</source> <volume>12</volume>, <fpage>142</fpage>. <pub-id pub-id-type="doi">10.1186/s12918-018-0642-2</pub-id> </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Consortium</surname>
<given-names>E. P.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>The ENCODE (ENCyclopedia of DNA Elements) Project</article-title>. <source>Science.</source> <volume>306</volume>, <fpage>636</fpage>&#x2013;<lpage>640</lpage>. <pub-id pub-id-type="doi">10.1126/science.1105136</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Copray</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Huynh</surname>
<given-names>J.&#x20;L.</given-names>
</name>
<name>
<surname>Sher</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Casaccia-Bonnefil</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Boddeke</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Epigenetic Mechanisms Facilitating Oligodendrocyte Development, Maturation, and Aging</article-title>. <source>Glia.</source> <volume>57</volume>, <fpage>1579</fpage>&#x2013;<lpage>1587</lpage>. <pub-id pub-id-type="doi">10.1002/glia.20881</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Dan Rosa de Jesus</surname>
<given-names>J.&#x20;C.</given-names>
</name>
<name>
<surname>Wilson</surname>
<given-names>Rivera.</given-names>
</name>
<name>
<surname>Crivelli</surname>
<given-names>Silvia.</given-names>
</name>
</person-group> (<year>2018</year>). <source>Capsule Networks for Protein Structure Classification and Prediction</source>. <comment>arXiv:180807475</comment>. </citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Deng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Rao</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Central Nervous System-specific Knockout of Brg1 Causes Growth Retardation and Neuronal Degeneration</article-title>. <source>Brain Res.</source> <volume>1622</volume>, <fpage>186</fpage>&#x2013;<lpage>195</lpage>. <pub-id pub-id-type="doi">10.1016/j.brainres.2015.06.027</pub-id> </citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ding</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Condon</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Shah</surname>
<given-names>S. P.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Interpretable Dimensionality Reduction of Single Cell Transcriptome Data with Deep Generative Models</article-title>. <source>Nat. Commun.</source> <volume>9</volume>, <fpage>2002</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-018-04368-5</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Eraslan</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Avsec</surname>
<given-names>&#x17d;.</given-names>
</name>
<name>
<surname>Gagneur</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Theis</surname>
<given-names>F. J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Deep Learning: New Computational Modelling Techniques for Genomics</article-title>. <source>Nat. Rev. Genet.</source> <volume>20</volume>, <fpage>389</fpage>&#x2013;<lpage>403</lpage>. <pub-id pub-id-type="doi">10.1038/s41576-019-0122-6</pub-id> </citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Shang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Improving Protein Gamma-Turn Prediction Using Inception Capsule Networks</article-title>. <source>Sci. Rep.</source> <volume>8</volume>, <fpage>15741</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-018-34114-2</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Favot</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Hall</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Haworth</surname>
<given-names>S. G.</given-names>
</name>
<name>
<surname>Kemp</surname>
<given-names>P. R.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Cytoplasmic YY1 Is Associated with Increased Smooth Muscle-specific Gene Expression</article-title>. <source>Am. J.&#x20;Pathol.</source> <volume>167</volume>, <fpage>1497</fpage>&#x2013;<lpage>1509</lpage>. <pub-id pub-id-type="doi">10.1016/s0002-9440(10)61236-9</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fukumoto</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Tamada</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Toya</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Nishino</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Yanagawa</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Takumi</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Identification of Genes Regulating GABAergic Interneuron Maturation</article-title>. <source>Neurosci. Res.</source> <volume>134</volume>, <fpage>18</fpage>&#x2013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.1016/j.neures.2017.11.010</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guti&#xe9;rrez</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Nikolic</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>van Dijk</surname>
<given-names>T. B.</given-names>
</name>
<name>
<surname>Hammad</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Vos</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Willart</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2007</year>). <article-title>Gata1 Regulates Dendritic-Cell Development and Survival</article-title>. <source>Blood.</source> <volume>110</volume>, <fpage>1933</fpage>&#x2013;<lpage>1941</lpage>. <pub-id pub-id-type="doi">10.1182/blood-2006-09-048322</pub-id> </citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Han</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Fei</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Lai</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Mapping the Mouse Cell Atlas by Microwell-Seq</article-title>. <source>Cell.</source> <volume>173</volume>, <fpage>1307</fpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2018.05.012</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ionescu</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kozhemyakina</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Nicolae</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Kaestner</surname>
<given-names>K. H.</given-names>
</name>
<name>
<surname>Olsen</surname>
<given-names>B. R.</given-names>
</name>
<name>
<surname>Lassar</surname>
<given-names>A. B.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>FoxA Family Members Are Crucial Regulators of the Hypertrophic Chondrocyte Differentiation Program</article-title>. <source>Dev. Cel.</source> <volume>22</volume>, <fpage>927</fpage>&#x2013;<lpage>939</lpage>. <pub-id pub-id-type="doi">10.1016/j.devcel.2012.03.011</pub-id> </citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Keshava Prasad</surname>
<given-names>T. S.</given-names>
</name>
<name>
<surname>Goel</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Kandasamy</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Keerthikumar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kumar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mathivanan</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>Human Protein Reference Database--2009 Update</article-title>. <source>Nucleic Acids Res.</source> <volume>37</volume>, <fpage>D767</fpage>&#x2013;<lpage>D772</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkn892</pub-id> </citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>C. T.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>X. B.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>The Transcription Factor Cux1 Regulates Dendritic Morphology of Cortical Pyramidal Neurons</article-title>. <source>PLoS One</source> <volume>5</volume>, <fpage>e10596</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0010596</pub-id> </citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Czubryt</surname>
<given-names>M. P.</given-names>
</name>
<name>
<surname>McAnally</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bassel-Duby</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Richardson</surname>
<given-names>J.&#x20;A.</given-names>
</name>
<name>
<surname>Wiebel</surname>
<given-names>F. F.</given-names>
</name>
<etal/>
</person-group> (<year>2005</year>). <article-title>Requirement for Serum Response Factor for Skeletal Muscle Growth and Maturation Revealed by Tissue-specific Gene Deletion in Mice</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>102</volume>, <fpage>1082</fpage>&#x2013;<lpage>1087</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0409103102</pub-id> </citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Jain</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Bar-Joseph</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Using Neural Networks for Reducing the Dimensions of Single-Cell RNA-Seq Data</article-title>. <source>Nucleic Acids Res.</source> <volume>45</volume>, <fpage>e156</fpage>. <pub-id pub-id-type="doi">10.1093/nar/gkx681</pub-id> </citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lopez</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Regier</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cole</surname>
<given-names>M. B.</given-names>
</name>
<name>
<surname>Jordan</surname>
<given-names>M. I.</given-names>
</name>
<name>
<surname>Yosef</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Deep Generative Modeling for Single-Cell Transcriptomics</article-title>. <source>Nat. Methods</source> <volume>15</volume>, <fpage>1053</fpage>&#x2013;<lpage>1058</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-018-0229-2</pub-id> </citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Matsui</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sharma</surname>
<given-names>K. C.</given-names>
</name>
<name>
<surname>Cooke</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wakimoto</surname>
<given-names>B. T.</given-names>
</name>
<name>
<surname>Rasool</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hayworth</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>Nuclear Structure and Chromosome Segregation in Drosophila Male Meiosis Depend on the Ubiquitin Ligase dTopors</article-title>. <source>Genetics</source> <volume>189</volume>, <fpage>779</fpage>&#x2013;<lpage>793</lpage>. <pub-id pub-id-type="doi">10.1534/genetics.111.133819</pub-id> </citation>
</ref>
<ref id="B29">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Molnar</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2019</year>). <source>Interpretable Machine Learning</source>. </citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Oakes</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Hilton</surname>
<given-names>H. N.</given-names>
</name>
<name>
<surname>Ormandy</surname>
<given-names>C. J.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>The Alveolar Switch: Coordinating the Proliferative Cues and Cell Fate Decisions that Drive the Formation of Lobuloalveoli from Ductal Epithelium</article-title>. <source>Breast Cancer Res.</source> <volume>8</volume>, <fpage>207</fpage>&#x2013;<lpage>210</lpage>. <pub-id pub-id-type="doi">10.1186/bcr1411</pub-id> </citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ohata</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yamauchi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Takeda</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Toda</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kamimura</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Motomura</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2000</year>). <article-title>RAR and RXR Expression by Kupffer Cells</article-title>. <source>Exp. Mol. Pathol.</source> <volume>68</volume>, <fpage>13</fpage>&#x2013;<lpage>20</lpage>. <pub-id pub-id-type="doi">10.1006/exmp.1999.2284</pub-id> </citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Onodera</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Fujiwara</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Onishi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Itoh-Nakadai</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Okitsu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Fukuhara</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>GATA2 Regulates Dendritic Cell Differentiation</article-title>. <source>Blood J.&#x20;Am. Soc. Hematol.</source> <volume>128</volume>, <fpage>508</fpage>&#x2013;<lpage>518</lpage>. <pub-id pub-id-type="doi">10.1182/blood-2016-02-698118</pub-id> </citation>
</ref>
<ref id="B33">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Remnest&#xe5;l</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2015</year>). <source>Expression and Distribution of Transcription Factors NPAS3 Och RFX3 in Alzheimer&#x27;s Disease</source>. <publisher-name>KTH</publisher-name>. <comment>Skolan f&#xf6;r bioteknologi (BIO)</comment>. </citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sabour</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Frosst</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>G. E.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Dynamic Routing between Capsules</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>30</volume> (<issue>Nips</issue>), <fpage>30</fpage>. <pub-id pub-id-type="doi">10.1097/01.asw.0000521116.18779.7c</pub-id> </citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schulz</surname>
<given-names>M. H.</given-names>
</name>
<name>
<surname>Devanny</surname>
<given-names>W. E.</given-names>
</name>
<name>
<surname>Gitter</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ernst</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bar-Joseph</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>DREM 2.0: Improved Reconstruction of Dynamic Regulatory Networks from Time-Series Expression Data</article-title>. <source>BMC Syst. Biol.</source> <volume>6</volume>, <fpage>104</fpage>. <pub-id pub-id-type="doi">10.1186/1752-0509-6-104</pub-id> </citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Smallwood</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>H. J.</given-names>
</name>
<name>
<surname>Angermueller</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Krueger</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Saadeh</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Peat</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Single-cell Genome-wide Bisulfite Sequencing for Assessing Epigenetic Heterogeneity</article-title>. <source>Nat. Methods.</source> <volume>11</volume>, <fpage>817</fpage>&#x2013;<lpage>820</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.3035</pub-id> </citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stark</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Breitkreutz</surname>
<given-names>B. J.</given-names>
</name>
<name>
<surname>Reguly</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Boucher</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Breitkreutz</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Tyers</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>BioGRID: a General Repository for Interaction Datasets</article-title>. <source>Nucleic Acids Res.</source> <volume>34</volume>, <fpage>D535</fpage>&#x2013;<lpage>D539</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkj109</pub-id> </citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tomczak</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Czerwi&#x144;ska</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Wiznerowicz</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>The Cancer Genome Atlas (TCGA): an Immeasurable Source of Knowledge</article-title>. <source>Contemp. Oncol. (Pozn).</source> <volume>19</volume>, <fpage>A68</fpage>&#x2013;<lpage>A77</lpage>. <pub-id pub-id-type="doi">10.5114/wo.2014.47136</pub-id> </citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ulven</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Natarajan</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Holven</surname>
<given-names>K. B.</given-names>
</name>
<name>
<surname>L&#xf8;vdal</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Berg</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Blomhoff</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>1998</year>). <article-title>Expression of Retinoic Acid Receptor and Retinoid X Receptor Subtypes in Rat Liver Cells: Implications for Retinoid Signalling in Parenchymal, Endothelial, Kupffer and Stellate Cells</article-title>. <source>Eur. J.&#x20;Cel. Biol.</source> <volume>77</volume>, <fpage>111</fpage>&#x2013;<lpage>116</lpage>. <pub-id pub-id-type="doi">10.1016/s0171-9335(98)80078-2</pub-id> </citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Nie</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Xin</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>An Interpretable Deep-Learning Architecture of Capsule Networks for Identifying Cell-type Gene Expression Programs from Single-Cell RNA-Sequencing Data</article-title>. <source>Nat. Mach Intell.</source> <volume>2</volume>, <fpage>693</fpage>&#x2013;<lpage>703</lpage>. <pub-id pub-id-type="doi">10.1038/s42256-020-00244-4</pub-id> </citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zeisel</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mu&#xf1;oz-Manchado</surname>
<given-names>A. B.</given-names>
</name>
<name>
<surname>Codeluppi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>L&#xf6;nnerberg</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>La Manno</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Jur&#xe9;us</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Cell Types in the Mouse Cortex and hippocampus Revealed by Single-Cell RNA-Seq</article-title>. <source>Science</source> <volume>347</volume>, <fpage>1138</fpage>&#x2013;<lpage>1142</lpage>. <pub-id pub-id-type="doi">10.1126/science.aaa1934</pub-id> </citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Theesfeld</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>K. M.</given-names>
</name>
<name>
<surname>Wong</surname>
<given-names>A. K.</given-names>
</name>
<name>
<surname>Troyanskaya</surname>
<given-names>O. G.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Deep Learning Sequence-Based Ab Initio Prediction of Variant Effects on Expression and Disease Risk</article-title>. <source>Nat. Genet.</source> <volume>50</volume>, <fpage>1171</fpage>&#x2013;<lpage>1179</lpage>. <pub-id pub-id-type="doi">10.1038/s41588-018-0160-6</pub-id> </citation>
</ref>
</ref-list>
</back>
</article>