<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Bioinform.</journal-id>
<journal-title>Frontiers in Bioinformatics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Bioinform.</abbrev-journal-title>
<issn pub-type="epub">2673-7647</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1122559</article-id>
<article-id pub-id-type="doi">10.3389/fbinf.2023.1122559</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Bioinformatics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Orthogonal analysis of variants in APOE gene using <italic>in-silico</italic> approaches reveals novel disrupting variants</article-title>
<alt-title alt-title-type="left-running-head">Li et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fbinf.2023.1122559">10.3389/fbinf.2023.1122559</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Li</surname>
<given-names>Chang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1433917/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Hou</surname>
<given-names>Ian</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ma</surname>
<given-names>Mingjia</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Grace</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Bai</surname>
<given-names>Yongsheng</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1524127/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Liu</surname>
<given-names>Xiaoming</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1235809/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>USF Genomics and College of Public Health</institution>, <institution>University of South Florida</institution>, <addr-line>Tampa</addr-line>, <addr-line>FL</addr-line>, <country>United States</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>The John Cooper School</institution>, <addr-line>The Woodlands</addr-line>, <addr-line>TX</addr-line>, <country>United States</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Novi High School</institution>, <addr-line>Novi</addr-line>, <addr-line>MI</addr-line>, <country>United States</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Del Norte High School</institution>, <addr-line>San Diego</addr-line>, <addr-line>CA</addr-line>, <country>United States</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Next-Gen Intelligent Science Training</institution>, <addr-line>Ann Arbor</addr-line>, <addr-line>MI</addr-line>, <country>United States</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>Department of Biology</institution>, <institution>Eastern Michigan University</institution>, <addr-line>Ypsilanti</addr-line>, <addr-line>MI</addr-line>, <country>United States</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1213731/overview">Andrzej Kloczkowski</ext-link>, The Research Institute at Nationwide Children&#x2019;s Hospital, United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1633670/overview">Uday S. Evani</ext-link>, New York Genome Center, United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2240872/overview">Minita Shah</ext-link>, Research Laboratories Merck, United States</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Chang Li, <email>lic@usf.edu</email>; Yongsheng Bai, <email>bioinformaticsresearchtomorrow@gmail.com</email>; Xiaoming Liu, <email>xiaomingliu@usf.edu</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Protein Bioinformatics, a section of the journal Frontiers in Bioinformatics</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>06</day>
<month>04</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>3</volume>
<elocation-id>1122559</elocation-id>
<history>
<date date-type="received">
<day>14</day>
<month>12</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>31</day>
<month>03</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Li, Hou, Ma, Wang, Bai and Liu.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Li, Hou, Ma, Wang, Bai and Liu</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>
<bold>Introduction:</bold> Alzheimer&#x2019;s disease (AD) is one of the most prominent medical conditions in the world. Understanding the genetic component of the disease can greatly advance our knowledge regarding its progression, treatment and prognosis. Single amino-acid variants (SAVs) in the APOE gene have been widely investigated as a risk factor for AD Studies, including genome-wide association studies, meta-analysis based studies, and <italic>in-vivo</italic> animal studies, were carried out to investigate the functional importance and pathogenesis potential of APOE SAVs. However, given the high cost of such large-scale or experimental studies, there are only a handful of variants being reported that have definite explanations. The recent development of <italic>in-silico</italic> analytical approaches, especially large-scale deep learning models, has opened new opportunities for us to probe the structural and functional importance of APOE variants extensively.</p>
<p>
<bold>Method:</bold> In this study, we are taking an ensemble approach that simultaneously uses large-scale protein sequence-based models, including Evolutionary Scale Model and AlphaFold, together with a few <italic>in-silico</italic> functional prediction web services to investigate the known and possibly disease-causing SAVs in APOE and evaluate their likelihood of being functional and structurally disruptive.</p>
<p>
<bold>Results:</bold> As a result, using an ensemble approach with little to no prior field-specific knowledge, we reported 5 SAVs in APOE gene to be potentially disruptive, one of which (C112R) was classificed by previous studies as a key risk factor for AD.</p>
<p>
<bold>Discussion:</bold> Our study provided a novel framework to analyze and prioritize the functional and structural importance of SAVs for future experimental and functional validation.</p>
</abstract>
<kwd-group>
<kwd>AlphaFold</kwd>
<kwd>missense variant</kwd>
<kwd>APOE</kwd>
<kwd>Alzheimer&#x2019;s disease</kwd>
<kwd>deep learning</kwd>
<kwd>ensemble</kwd>
</kwd-group>
<contract-sponsor id="cn001">University of South Florida<named-content content-type="fundref-id">10.13039/100008900</named-content>
</contract-sponsor>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Alzheimer&#x2019;s disease (AD), a complex disease with a known genetic basis, is the most prominent cause of dementia in the elderly (<xref ref-type="bibr" rid="B2">Bettens et al., 2013</xref>). Understanding the genetic component of AD can be of great importance in its early diagnosis, effective treatment and improved prognosis. It has been widely studied and reported that the apolipoprotein E (APOE) gene, which is a key gene for lipid transportation, is closely associated with the risk of AD (<xref ref-type="bibr" rid="B14">Kamboh et al., 1995</xref>; <xref ref-type="bibr" rid="B35">Yamazaki et al., 2019</xref>; <xref ref-type="bibr" rid="B25">Martens et al., 2022</xref>). APOE gene has 3 different protein isoforms, namely, APOE2, APOE3, and APOE4 (<xref ref-type="bibr" rid="B11">Husain et al., 2021</xref>). These isoforms differ by two amino acids, APOE2 with Cys112 and Cys158, APOE3 with Cys112 and Arg158, and APOE4 with Arg112 and Arg158. APOE3 was considered the reference isoform and the APOE4 Cys112Arg variant was a strong risk factor for AD, while APOE2 Arg158Cys variant was reported to be protective (<xref ref-type="bibr" rid="B3">Bojanowski et al., 2006</xref>; <xref ref-type="bibr" rid="B9">Dolai et al., 2020</xref>). Given the functional importance and pathogenesis potential of APOE variants, many experimental studies using animal models, genome-wide association studies, and other meta-analyses have been performed to interrogate the impact of variants residing in the APOE gene (<xref ref-type="bibr" rid="B1">Bertram et al., 2008</xref>; <xref ref-type="bibr" rid="B23">Liu et al., 2014</xref>; <xref ref-type="bibr" rid="B19">Lewandowski et al., 2020</xref>). However, given the high cost of such large-scale or experimental studies, there are only a handful of variants being reported that have definite explanations.</p>
<p>The recent development of <italic>in silico</italic> analytical approaches, especially large-scale deep learning models, has opened new opportunities for us to probe the structural and functional importance of APOE variants extensively. Specifically, AlphaFold (<xref ref-type="bibr" rid="B13">Jumper et al., 2021</xref>), which exploited attention mechanisms from language modeling and multiple sequence alignment (MSA) data of protein homologs, has provided substantially increased coverage of high-confidence protein structure predictions. Additionally, the Evolutionary Scale Model (ESM) (<xref ref-type="bibr" rid="B20">Lin et al., 2022</xref>), which was pre-trained on 250 million protein sequences, has proven to be able to extract key functional domains and evaluate the functional importance of amino acid variants (<xref ref-type="bibr" rid="B4">Brandes et al., 2022</xref>) even in the absence of multiple sequence alignment (MSA) data which were required in AlphaFold modeling. Recent studies have tried to examine the ability of these tools individually to evaluate the impact of single amino-acid variants (SAVs), but reported conflicting results (<xref ref-type="bibr" rid="B28">Pak et al., 2021</xref>; <xref ref-type="bibr" rid="B6">Caswell et al., 2022</xref>). In this study, instead of using these tools separately, we are taking an ensemble approach that simultaneously uses these two large-scale protein sequence-based models together with a few <italic>in silico</italic> functional prediction web services to investigate the known and possibly disease-causing variants in APOE and evaluate their likelihood of being functional and structurally disruptive.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Materials and methods</title>
<sec id="s2-1">
<title>2.1 APOE sequence data retrieval</title>
<p>The protein sequence of the APOE gene was retrieved from Ensembl genome browser v107 in FASTA format (<ext-link ext-link-type="uri" xlink:href="https://useast.ensembl.org/index.html">https://useast.ensembl.org/index.html</ext-link>). Python package Biopython was used to load and process the retrieved sequence. Only the reference isoform (APOE3) and the precursor APOE (pre-APOE) sequences were used in this study. The difference between pre-APOE and mature APOE was the addition of an 18-residue signal peptide at the beginning of the sequence. As a result, previously reported variants with respect to mature APOE, such as C112R and R158C, were reported as C130R and R176C, respectively, in this study.</p>
<p>The C130R variant was manually introduced to create a separate sequence representing APOE4, and R176C was manually introduced to create a separate sequence representing APOE2.</p>
</sec>
<sec id="s2-2">
<title>2.2 ESM model retrieval and variant effect prediction</title>
<p>ESM-1b model was retrieved from GitHub (<ext-link ext-link-type="uri" xlink:href="https://github.com/facebookresearch/esm">https://github.com/facebookresearch/esm</ext-link>) using PyTorch Hub. The same tokenizer as the original ESM model was used to encode input protein sequences. The variant effect for each amino acid variant (ESM score) was calculated as the log-likelihood ratio between the variant and the corresponding reference amino acid. To show a positive score, we multiplied each prediction score by &#x2212;1.<disp-formula id="equ1">
<mml:math id="m1">
<mml:mrow>
<mml:mi mathvariant="bold-italic">E</mml:mi>
<mml:mi mathvariant="bold-italic">S</mml:mi>
<mml:mi mathvariant="bold-italic">M</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">e</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mfrac>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>The variant was predicted to be more damaging if it had a higher ESM score.</p>
</sec>
<sec id="s2-3">
<title>2.3 AlphaFold model retrieval and variant effect prediction</title>
<p>AlphaFold v2 model was run locally using a third-party implementation, namely, LocalColabFold (<ext-link ext-link-type="uri" xlink:href="https://github.com/YoshitakaMo/localcolabfold">https://github.com/YoshitakaMo/localcolabfold</ext-link>) (<xref ref-type="bibr" rid="B13">Jumper et al., 2021</xref>; <xref ref-type="bibr" rid="B26">Mirdita et al., 2022</xref>). The algorithm first implements MMseqs2 (<xref ref-type="bibr" rid="B33">Steinegger and S&#xf6;ding, 2017</xref>) to retrieve MSA for the target protein. Then, it predicts the 3D protein conformation for the given sequence.</p>
<p>Due to the high computational cost of running AlphaFold, it was extremely time-consuming to run predictions (<italic>in silico</italic> mutagenesis) for all possible SAVs in APOE, which would require running AlphaFold 6,023 times (<xref ref-type="sec" rid="s10">Supplementary Table S1</xref>). As a workaround, we retrieved all SAVs in APOE reported in ClinVar (<xref ref-type="bibr" rid="B17">Landrum et al., 2015</xref>). First, ClinVar database version 20220507 was downloaded from <ext-link ext-link-type="uri" xlink:href="https://ftp.ncbi.nlm.nih.gov/pub/clinvar/">https://ftp.ncbi.nlm.nih.gov/pub/clinvar/</ext-link>. Second, only variants annotated as inside the APOE gene were kept (<italic>n</italic> &#x3d; 69). Third, only non-synonymous single nucleotide variants were kept, and all insertions and deletions were excluded (<italic>n</italic> &#x3d; 38). As a result, a total of 38 SAVs were retrieved, and a separate protein sequence was created for each SAV. The predicted 3D protein structure for the wild-type and each mutant sequence was compared using the root-mean-square deviation (RMSD) of atomic positions, which was commonly used as a distance measurement between two protein structures. A variant with a higher RMSD score was expected to have a greater impact on the protein structure. Therefore, the RMSD score was used as a surrogate for AlphaFold&#x2019;s prediction of the variant&#x2019;s impact.</p>
</sec>
<sec id="s2-4">
<title>2.4 Missense3D and DynaMut2 web service tools</title>
<p>Besides the two computational tools described previously, we used two additional web services to measure/predict the stability of the protein with and without the variants. First, the Missense3D database for APOE was retrieved from <ext-link ext-link-type="uri" xlink:href="http://missense3d.bc.ic.ac.uk:8080">http://missense3d.bc.ic.ac.uk:8080</ext-link> (<xref ref-type="bibr" rid="B15">Khanna et al., 2021</xref>), which contains 307 pre-calculated predictions in APOE. Second, DynaMut2 was used to predict user supply variants (<xref ref-type="bibr" rid="B31">Rodrigues et al., 2020</xref>). The same SAVs retrieved from ClinVar were used and submitted to the DynaMut2 web service at: <ext-link ext-link-type="uri" xlink:href="https://biosig.lab.uq.edu.au/dynamut2/">https://biosig.lab.uq.edu.au/dynamut2/</ext-link>.</p>
</sec>
<sec id="s2-5">
<title>2.5 Retrieval of additional annotations</title>
<p>To evaluate the performance of the main predictor (ESM-1b model), we retrieved population allele frequencies from gnomAD (<ext-link ext-link-type="uri" xlink:href="https://gnomad.broadinstitute.org/news/2020-10-gnomad-v3-1/">https://gnomad.broadinstitute.org/news/2020-10-gnomad-v3-1/</ext-link>). Maximum population frequencies were retrieved for the same SAVs retrieved from ClinVar as described previously.</p>
<p>An Evolutionary conservation score, GERP&#x2b;&#x2b; (<xref ref-type="bibr" rid="B8">Davydov et al., 2010</xref>), was retrieved from the dbNSFP v4.3a database (<xref ref-type="bibr" rid="B21">Liu et al., 2011</xref>; <xref ref-type="bibr" rid="B22">Liu et al., 2020</xref>), available at <ext-link ext-link-type="uri" xlink:href="https://sites.google.com/site/jpopgen/dbNSFP">https://sites.google.com/site/jpopgen/dbNSFP</ext-link>.</p>
<p>Additionally, we have retrieved 3 popular tools for predicting protein stability change upon mutation, namely, FoldX (<xref ref-type="bibr" rid="B32">Schymkowitz et al., 2005</xref>), DDGun (<xref ref-type="bibr" rid="B27">Montanucci et al., 2019</xref>) and Maestro (<xref ref-type="bibr" rid="B16">Laimer et al., 2015</xref>). First, FoldX was downloaded from <ext-link ext-link-type="uri" xlink:href="https://foldxsuite.crg.eu/">https://foldxsuite.crg.eu/</ext-link> using the academic license. The &#x201c;Stability&#x201d; command was used to calculate the Gibbs energy of protein folding for all 38 potential SAVs. The difference in folding energy between wild-type and mutant sequences was calculated and their absolute values were used to represent each SAV&#x2019;s impact predicted by FoldX, since both stabilizing and destabilizing mutations may all have substantial impacts on the function of the protein. Second, the DDGun web service, available at: <ext-link ext-link-type="uri" xlink:href="https://folding.biofold.org/ddgun/index.html">https://folding.biofold.org/ddgun/index.html</ext-link>, was used to make predictions on protein stability change given a list of mutations. Specifically, the wild-type sequence of APOE with a list of IDs for all 38 SAVs was uploaded. A global Delta Delta G (DDG) value was predicted for each of the SAVs, and its absolute value was used to represent each SAV&#x2019;s impact predicted by DDGun. Third, Maestro v1.2.35 Linux executable file was downloaded from <ext-link ext-link-type="uri" xlink:href="https://pbwww.services.came.sbg.ac.at/?page_id=477">https://pbwww.services.came.sbg.ac.at/?page_id&#x3d;477</ext-link>. All 38 SAVs were submitted as input for the Maestro program with the wild-type 3D structure of APOE obtained using AlphaFold2. Similarly, the DDG values were obtained from the prediction and their absolute values were used to represent each SAV&#x2019;s impact predicted by Maestro.</p>
</sec>
<sec id="s2-6">
<title>2.6 Statistical tests and visualizations</title>
<p>To evaluate the correlation between allele frequencies of the variants and predictions made by computational tools, Pearson&#x2019;s correlation coefficient was calculated using the Python library SciPy (<ext-link ext-link-type="uri" xlink:href="https://scipy.org/">https://scipy.org/</ext-link>). We calculated the area under the receiver operating characteristic curve (auROC) and average precision scores to evaluate each predictor&#x2019;s ability in prioritizing potential clinically relevant variants. Specifically, an auROC was calculated by measuring the predictor&#x2019;s true positive rate (TPR) and false positive rate (FPR) using different score cutoffs. Similarly, the average precision (AP) score was calculated by measuring the predictor&#x2019;s precision and recall (same as TPR) using different score cutoffs. The formulas for calculating TPR, FPR, and precision are:<disp-formula id="equ2">
<mml:math id="m2">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>R</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ3">
<mml:math id="m3">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>R</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ4">
<mml:math id="m4">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>Where TP refers to the number of true positives (correctly predicted ClinVar pathogenic variants), FN refers to the number of false negatives (incorrectly predicted ClinVar pathogenic variants as benign), FP refers to the number of false positives (incorrectly predicted ClinVar benign variants as pathogenic), and TN refers to the number of true negatives (correctly predicted ClinVar benign variants). Both auROC and AP scores were calculated using the Python library sklearn with functions <italic>roc_auc_score</italic> and <italic>average_precision_score</italic>, respectively.</p>
<p>Additionally, PyTorch was used to calculate ESM model predictions, and Tensorflow was used to calculate AlphaFold model predictions.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>3 Results</title>
<sec id="s3-1">
<title>3.1 ESM-1b model can predict regions with high importance</title>
<p>As illustrated in <xref ref-type="fig" rid="F1">Figure 1</xref>, the entire length of the APOE protein was predicted by the ESM-1b model, and all potential amino acid variants were evaluated as the log odds ratio between the mutant and wild-type predictions. Variants with lighter colors indicate a low predicted likelihood of the existence of a variant at this position, which implies their functional importance. Key functional domains, including a signal peptide, receptor binding domain and lipid binding domain showed higher importance, as illustrated by light color bands. Interestingly, these regions of high importance showed higher conservation scores (GERP&#x2b;&#x2b; score), as illustrated by the top panel. In contrast, amino acids from positions 18&#x2013;45 showed both low conservation and low predicted functional importance. This observed concordance of the ESM prediction with annotated functional domains and evolutionary conservation demonstrated the model&#x2019;s ability to capture important regions in the APOE gene, given that the gene is only moderately conserved and is quite tolerant to missense variants (<xref ref-type="bibr" rid="B18">Lek et al., 2016</xref>).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>ESM-1b and GERP predicted functional importance scores for all potential SAVs in APOE gene. <bold>(A)</bold> GERP conservation scores for APOE gene. <bold>(B)</bold> functional domains for APOE gene. <bold>(C)</bold> ESM-1b <italic>in silico</italic> mutagenesis predictions for APOE gene.</p>
</caption>
<graphic xlink:href="fbinf-03-1122559-g001.tif"/>
</fig>
<p>Additionally, multiple clustering patterns were observed in the prediction heatmap, as illustrated by regions with high predicted values. One of these regions was amino acids 1&#x2013;18, representing the signal peptide region. While few studies have tried to evaluate the functional importance of variants residing in this region, it is clear that multiple variants can be extremely harmful to the protein&#x2019;s function.</p>
</sec>
<sec id="s3-2">
<title>3.2 ESM-1b model can identify variants of high functional importance in the population</title>
<p>To illustrate if the scores predicted by ESM-1b can truly reflect function importance at the variant level, we next evaluated allele frequencies observed in a large-scale population cohort, namely, gnomAD, and see if the model&#x2019;s predictions show correlations with allele frequencies (AFs) of the variants in general populations. Due to purifying selection, variants with lower AFs are more likely to be deleterious, whereas variants with higher AFs are more likely to be tolerated (benign). As illustrated in <xref ref-type="fig" rid="F2">Figure 2</xref>, predictions by ESM-1 showed statistically significant positive correlations with &#x2212;log<sub>10</sub>(AFs) in the general population, which indicates its capability of identifying truly functional variants that have undergone purifying selection.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Correlation between ESM-1b predictions and population allele frequencies among ClinVar reported variants in APOE. Pearson correlation coefficient and associated <italic>p</italic>-value were reported.</p>
</caption>
<graphic xlink:href="fbinf-03-1122559-g002.tif"/>
</fig>
</sec>
<sec id="s3-3">
<title>3.3 AlphaFold&#x2019;s predictions correlate with evolutionary conservation</title>
<p>Next, we investigated the predictions made by AlphaFold. AlphaFold gives a per-residue confidence metric called pLDDT (predicted Local Distance Difference Test) score for all alpha-carbon atoms (<xref ref-type="bibr" rid="B24">Mariani et al., 2013</xref>). Regions with high pLDDT scores usually have fewer clashes and structural violations. As shown in <xref ref-type="fig" rid="F3">Figure 3</xref>, pLDDT scores correlate with conservation scores (Spearman correlation coefficient &#x3d; 0.43, <italic>p</italic>-value &#x3d; 1.94 &#xd7; 10<sup>&#x2212;15</sup>), which was expected, as AlphaFold prediction relies on MSA data as input, which primarily utilizes conservation data. Additionally, in regions with high pLDDT scores (pLDDT &#x3e;70), for example, the amino acid&#x2019;s approximate position from 45&#x2013;170, only pathogenic variants and no benign variants were reported. Their potential structural importance could explain this observed pattern.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>The pLDDT scores for AlphaFold predicted APOE structure. <bold>(A)</bold> GERP conservation score. <bold>(B)</bold> pLDDT scores along APOE protein sequence. ClinVar pathogenic/benign variants were highlighted in red and green, respectively. All variants are referred to as all amino acids in APOE, which reflect the pLDDT distribution for all amino acids of APOE.</p>
</caption>
<graphic xlink:href="fbinf-03-1122559-g003.tif"/>
</fig>
</sec>
<sec id="s3-4">
<title>3.4 Orthogonal tools show low pairwise correlations</title>
<p>We compared the correlation of the predictions made by four popular computational frameworks, namely, ESM-1b, AlphaFold, Missense3D, and DynaMut2, which measure protein properties from different perspectives (<xref ref-type="fig" rid="F4">Figure 4</xref>). Specifically, ESM model studies comprehensive protein sequence features from millions of protein sequences using a language model. AlphaFold model studies protein sequences and tries to predict 3D protein structures using sequences and available templates. Missense3D adopts a bioinformatics pipeline and evaluates a wide range of structural impacts of an SAV. DynaMut2 model predicts protein stability by learning a series of biochemical and biophysical features from the target proteins. We have examined additional popular <italic>in silico</italic> tools that can predict protein stability (<xref ref-type="bibr" rid="B5">Caldararu et al., 2020</xref>; <xref ref-type="bibr" rid="B29">Pan et al., 2022</xref>), including FoldX (<xref ref-type="bibr" rid="B32">Schymkowitz et al., 2005</xref>), DDGun (<xref ref-type="bibr" rid="B27">Montanucci et al., 2019</xref>) and Maestro (<xref ref-type="bibr" rid="B16">Laimer et al., 2015</xref>), but all of them showed inferior performance compared to DynaMut2 in APOE (<xref ref-type="sec" rid="s10">Supplementary Figures S1, S2</xref>; <xref ref-type="sec" rid="s10">Supplementary Table S2</xref>). Therefore, we chose only DynaMut2 as the representative tool for protein stability prediction. Interestingly, benchmarked using ClinVar labels, the DynaMut2, as the best individual predictor among protein stability methods, outperformed predictors from other categories, including ESM and AlphaFold. Therefore, we provided predictions from DynaMut2 for all possible SAVs in APOE in <xref ref-type="sec" rid="s10">Supplementary Table S3</xref>.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Pairwise correlation of the variants between the four <italic>in silico</italic> predictors.</p>
</caption>
<graphic xlink:href="fbinf-03-1122559-g004.tif"/>
</fig>
<p>All these four selected tools showed no or very weak correlations with each other, which can be both concerning and useful. On one hand, if the inconsistency arises from methodological flaws, their ability to capture useful information is extremely limited. Users should be cautious when adopting these tools in their workflow. On the other hand, if this inconsistency arises from the differences in methodological preferences and their ability to capture different aspects of protein functions, then these tools can provide valuable orthogonal information.</p>
</sec>
<sec id="s3-5">
<title>3.5 Ensemble of multiple tools can provide biological meaningful insights</title>
<p>To evaluate the usefulness of the previously described tools and illustrate if their low correlation can be beneficial to explaining variant effects, we obtained top candidates from multiple predictions and examined their biological relevance as a means of validation. The top candidates were obtained based on the predictions made by each of the four tools. We consider variants to be potentially pathogenic if predictions from two or more tools showed indicative of a disruptive effect. Using this ensemble approach (majority vote), five candidate variants were obtained. As shown in <xref ref-type="table" rid="T1">Table 1</xref>, variants C130R, R163C, and R132C are most likely to be functional. Importantly, DynaMut2, which predicts the stability of the mutant protein sequence, showed destabilizing effects for all these 3 variants. AlphaFold models also predict the top 2 variants to disrupt the key functional domains. Interestingly, the most promising variant, C130R, is the variant that separates the transcript that carries the variant gene (APOE4) from the wild-type transcript (APOE3). The variant replaces Cysteine with Arginine, which was predicted to change a residue state from buried to exposed (<xref ref-type="fig" rid="F5">Figure 5</xref>). The functional importance of the C130R variant was validated by previous studies, which reported the variant to be associated with an elevated risk of AD (<xref ref-type="bibr" rid="B11">Husain et al., 2021</xref>; <xref ref-type="bibr" rid="B25">Martens et al., 2022</xref>). This observation highlighted the ability to combine multiple functional prediction tools in finding key functional variants.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Five candidate variants that affect APOE function.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Variant</th>
<th align="center">Allele frequency</th>
<th align="center">ESM</th>
<th align="center">DynaMut</th>
<th align="center">Missense3D</th>
<th align="center">AlphaFold</th>
<th align="center">Evidence count</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">C130R</td>
<td align="center">0.138</td>
<td align="center">0</td>
<td align="center">
<bold>&#x2212;1.07</bold>
</td>
<td align="center">
<bold>1</bold>
</td>
<td align="center">
<bold>33.813</bold>
</td>
<td align="center">3</td>
</tr>
<tr>
<td align="center">R163C</td>
<td align="center">0.001</td>
<td align="center">
<bold>9.631</bold>
</td>
<td align="center">
<bold>&#x2212;0.86</bold>
</td>
<td align="center">0</td>
<td align="center">
<bold>8.305</bold>
</td>
<td align="center">3</td>
</tr>
<tr>
<td align="center">R132C</td>
<td align="center">0.00003</td>
<td align="center">
<bold>8.259</bold>
</td>
<td align="center">
<bold>&#x2212;0.77</bold>
</td>
<td align="center">
<bold>1</bold>
</td>
<td align="center">5.664</td>
<td align="center">3</td>
</tr>
<tr>
<td align="center">R163P</td>
<td align="center">NA</td>
<td align="center">
<bold>10.296</bold>
</td>
<td align="center">&#x2212;0.48</td>
<td align="center">0</td>
<td align="center">
<bold>9.320</bold>
</td>
<td align="center">2</td>
</tr>
<tr>
<td align="center">R160C</td>
<td align="center">NA</td>
<td align="center">
<bold>9.571</bold>
</td>
<td align="center">&#x2212;0.02</td>
<td align="center">0</td>
<td align="center">
<bold>9.113</bold>
</td>
<td align="center">2</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>&#x2a;ESM, cutoff &#x3d; 7.97(top 25 percentile); DynaMut cutoff &#x3d; &#x2212;0.5; Missense3D &#x3d; 1 (damaging); AlphaFold cutoff &#x3d; 6.42 (top 25 percentile). &#x2a;specifies which cutoff value was used for each of these predictors to decide if their predictions for the variants are damaging (functional) or not (non-functional).</p>
</fn>
</table-wrap-foot>
<table-wrap-foot>
<fn>
<p>The bold values mean that the individual predictor&#x2019;s prediction for that specific variant is damaging (functional).</p>
</fn>
</table-wrap-foot>
</table-wrap>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Predicted change in residue properties between Cysteine (green) and Arginine (red) in C130R variant by Missense3D.</p>
</caption>
<graphic xlink:href="fbinf-03-1122559-g005.tif"/>
</fig>
<p>Next, using a similar approach, we identify one variant to be potentially benign. As shown in <xref ref-type="table" rid="T2">Table 2</xref>, all four tools predicted the variant to be non-functional. ClinVar reported a conflicting interpretation of pathogenicity for this SAV, meaning that multiple clinical laboratories reported contradictory interpretations for the same variant. Specifically, some studies reported it to be benign while others report it to be uncertain significance, according to the 2015 ACMG-AMP guidelines (<xref ref-type="bibr" rid="B30">Richards et al., 2015</xref>). Given its previous uncertain annotations and the fact that all orthogonal <italic>in silico</italic> methods showed concordant prediction, its function is worth investigating in future studies to confirm whether the SAV is truly benign. All calculated scores for all 38 SAVs analyzed in the study are provided in <xref ref-type="sec" rid="s10">Supplementary Table S2</xref>.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Candidate variant that predicted to be benign by all tools.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Variant</th>
<th align="center">Allele frequency</th>
<th align="center">ESM</th>
<th align="center">DynaMut</th>
<th align="center">Missense3D</th>
<th align="center">AlaphaFold</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">L46P</td>
<td align="center">0.0025</td>
<td align="center">3.106</td>
<td align="center">&#x2212;0.09</td>
<td align="center">0</td>
<td align="center">3.564</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>4 Discussion</title>
<p>In this study, we explored the usefulness of various orthogonal <italic>in silico</italic> predictors in their ability to prioritize functionally and structurally disruptive SAVs in the APOE gene. Using little to no prior knowledge, we identified 5 potentially disrupting variants, one of which (C130R) was classified by previous studies as a key risk factor for AD (<xref ref-type="bibr" rid="B10">Holtzman et al., 2012</xref>).</p>
<p>As illustrated by our study, the ESM model, which utilized large-scale pretraining and state-of-the-art deep learning architectures, can efficiently identify highly important domains and functional SAVs. The N-terminal of the APOE protein consists of 4 helices, H1, H2, H3, and H4, which form a four-helix bundle that spans amino acids from 42 to 182 (<xref ref-type="bibr" rid="B34">Wilson et al., 1991</xref>). These helices contain some key functional domains, such as the LDL-receptor binding region (residues 154&#x2013;168). As illustrated by the ESM prediction (<xref ref-type="fig" rid="F1">Figure 1</xref>), this region indeed contains multiple highlighted bands, reflecting the potential functional importance of the variants. Moreover, the previously mentioned domain for the signal peptide (residues 1&#x2013;18) represents another region of interest. It has been previously reported that variants located in signal-peptide-encoding sequences may severely impact protein transportation (<xref ref-type="bibr" rid="B12">Jarjanazi et al., 2007</xref>). For this under-investigated region, no variant was reported in ClinVar, including benign, pathogenic, or variant of unknown significance (VUS), which calls for future studies to perform functional validation of variants in this region that focus on the transportation and maturation of APOE.</p>
<p>However, the ESM model was imperfect, and it may fail to predict variants residing in regions with little homologous coverage. For example, in our study, the ESM model incorrectly predicted the C130R variant to be non-functional. On the other hand, the AlphaFold model has demonstrated potential in identifying such highly disruptive SAVs. While the C130R variant was predicted as non-functional by the ESM model, it showed the highest disruptive effect predicted by AlphaFold among the top 5 candidate SAVs. Based on our results from non-specific <italic>in silico</italic> predictions, this C130R variant may convey its functional impact through altered protein 3D structure rather than the function encoded in the underlying amino acid. Indeed, this C130R, or the equivalently C112R in mature APOE, was reported to destabilize the protein structure, which was considered to improve its ability to bind to lipid and amyloid-&#x3b2; surfaces, which may ultimately increase the risk of AD (<xref ref-type="bibr" rid="B7">Chetty et al., 2017</xref>).</p>
<p>Aside from the promising results of using a set of orthogonal <italic>in silico</italic> tools to help us understand the functional importance of APOE variants, we believe there are a few limitations in our study that future studies could improve upon. First, our illustration and analysis in this study were based only on a single gene APOE, and future studies may include other apolipoprotein genes to investigate the capability of these novel computational tools in assisting lipid research. Second, we only considered SAVs in this study, and we note that InDels (short insertions or deletions) may play a greater role in protein stability and function. It is still an open question regarding if and how these existing computational tools can help with this regard. Third, in this study, we performed validation across multiple data resources, including conservation score and population allele frequency, and future studies may be conducted to include additional <italic>in silico</italic> validations and even experimental validations, such as deep mutational scanning data, to further elucidate the functional importance of the reported variants.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s10">Supplementary Material</xref>, further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>XL, YB, and CL conceived the idea. CL, IH, and MM performed the formal analysis. CL, IH, MM, and GW wrote the manuscript. XL and YB edited the manuscript and supervised the findings of this work.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>This study was funded for article processing charge is provided by the University of South Florida to XL.</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fbinf.2023.1122559/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fbinf.2023.1122559/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material>
<label>SUPPLEMENTARY FIGURE S1</label>
<caption>
<p>Performances of all quantitative predictors analyzed in this study benchmarked using ClinVar labels. The dashed red line represents the ROC cut-off value. Tools with greater ROC values were chosen to construct the ensemble.</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>SUPPLEMENTARY FIGURE S2</label>
<caption>
<p>ROC curves for all quantitative predictors analyzed in this study benchmarked using ClinVar labels.</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="Table3.CSV" id="SM1" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image2.PDF" id="SM2" mimetype="application/PDF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table2.CSV" id="SM3" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table1.CSV" id="SM4" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image1.PDF" id="SM5" mimetype="application/PDF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bertram</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lange</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Mullin</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Parkinson</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hsiao</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hogan</surname>
<given-names>M. F.</given-names>
</name>
<etal/>
</person-group> (<year>2008</year>). <article-title>Genome-wide association analysis reveals putative alzheimer&#x27;s disease susceptibility loci in addition to APOE</article-title>. <source>Am. J. Hum. Genet.</source> <volume>83</volume> (<issue>5</issue>), <fpage>623</fpage>&#x2013;<lpage>632</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2008.10.008</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bettens</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Sleegers</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Van broeckhoven</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Genetic insights in alzheimer&#x27;s disease</article-title>. <source>Lancet Neurology</source> <volume>12</volume> (<issue>1</issue>), <fpage>92</fpage>&#x2013;<lpage>104</lpage>. <pub-id pub-id-type="doi">10.1016/S1474-4422(12)70259-4</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bojanowski</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Chew</surname>
<given-names>E. Y.</given-names>
</name>
<name>
<surname>Ning</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Csaky</surname>
<given-names>K. G.</given-names>
</name>
<name>
<surname>Green</surname>
<given-names>W. R.</given-names>
</name>
<etal/>
</person-group> (<year>2006</year>). <article-title>Anapolipoprotein E variant may protect against age-related macular degeneration through cytokine regulation</article-title>. <source>Environ. Mol. Mutagen.</source> <volume>47</volume> (<issue>8</issue>), <fpage>594</fpage>&#x2013;<lpage>602</lpage>. <pub-id pub-id-type="doi">10.1002/em.20233</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brandes</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Goldman</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>C. H.</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>C. J.</given-names>
</name>
<name>
<surname>Ntranos</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Genome-wide prediction of disease variants with a deep protein language model</article-title>. <source>bioRxiv</source>. <pub-id pub-id-type="doi">10.1101/2022.08.25.505311</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Caldararu</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Mehra</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Blundell</surname>
<given-names>T. L.</given-names>
</name>
<name>
<surname>Kepp</surname>
<given-names>K. P.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Systematic investigation of the data set dependency of protein stability predictors</article-title>. <source>J. Chem. Inf. Model.</source> <volume>60</volume> (<issue>10</issue>), <fpage>4772</fpage>&#x2013;<lpage>4784</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jcim.0c00591</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Caswell</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Gunning</surname>
<given-names>A. C.</given-names>
</name>
<name>
<surname>Owens</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Ellard</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wright</surname>
<given-names>C. F.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Assessing the clinical utility of protein structural analysis in genomic variant classification: Experiences from a diagnostic laboratory</article-title>. <source>Genome Med.</source> <volume>14</volume> (<issue>1</issue>), <fpage>77</fpage>. <pub-id pub-id-type="doi">10.1186/s13073-022-01082-2</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chetty</surname>
<given-names>P. S.</given-names>
</name>
<name>
<surname>Mayne</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lund-katz</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Englander</surname>
<given-names>S. W.</given-names>
</name>
<name>
<surname>Phillips</surname>
<given-names>M. C.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Helical structure, stability, and dynamics in human apolipoprotein e3 and e4 by hydrogen exchange and mass spectrometry</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>114</volume> (<issue>5</issue>), <fpage>968</fpage>&#x2013;<lpage>973</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1617523114</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Davydov</surname>
<given-names>E. V.</given-names>
</name>
<name>
<surname>Goode</surname>
<given-names>D. L.</given-names>
</name>
<name>
<surname>Sirota</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Cooper</surname>
<given-names>G. M.</given-names>
</name>
<name>
<surname>Sidow</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Batzoglou</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Identifying a high fraction of the human genome to be under selective constraint using gerp&#x2b;&#x2b;</article-title>. <source>PLoS Comput. Biol.</source> <volume>6</volume> (<issue>12</issue>), <fpage>e1001025</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1001025</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dolai</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cherakara</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Garai</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Apolipoprotein e4 exhibits intermediates with domain interaction</article-title>. <source>Biochimica Biophysica Acta (BBA) - Proteins Proteomics</source> <volume>1868</volume> (<issue>12</issue>), <fpage>140535</fpage>. <pub-id pub-id-type="doi">10.1016/j.bbapap.2020.140535</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Holtzman</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>Herz</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bu</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Apolipoprotein E and apolipoprotein E receptors: Normal biology and roles in alzheimer disease</article-title>. <source>Cold Spring Harb. Perspect. Med.</source> <volume>2</volume> (<issue>3</issue>), <fpage>a006312</fpage>. <pub-id pub-id-type="doi">10.1101/cshperspect.a006312</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Husain</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Laurent</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Plourde</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>APOE and alzheimer&#x27;s disease: From lipid transport to physiopathology and therapeutics</article-title>. <source>Front. Neurosci.</source> <volume>15</volume>, <fpage>630502</fpage>. <pub-id pub-id-type="doi">10.3389/fnins.2021.630502</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jarjanazi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Savas</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pabalan</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Dennis</surname>
<given-names>J. W.</given-names>
</name>
<name>
<surname>Ozcelik</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Biological implications of snps in signal peptide domains of human proteins</article-title>. <source>Proteins Struct. Funct. Bioinforma.</source> <volume>70</volume> (<issue>2</issue>), <fpage>394</fpage>&#x2013;<lpage>403</lpage>. <pub-id pub-id-type="doi">10.1002/prot.21548</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jumper</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Evans</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Pritzel</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Green</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Figurnov</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ronneberger</surname>
<given-names>O.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Highly accurate protein structure prediction with alphafold</article-title>. <source>Nature</source> <volume>596</volume> (<issue>7873</issue>), <fpage>583</fpage>&#x2013;<lpage>589</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-021-03819-2</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kamboh</surname>
<given-names>M. I.</given-names>
</name>
<name>
<surname>Sanghera</surname>
<given-names>D. K.</given-names>
</name>
<name>
<surname>Ferrell</surname>
<given-names>R. E.</given-names>
</name>
<name>
<surname>Dekosky</surname>
<given-names>S. T.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>A4POE&#x2a;4-associated alzheimer&#x27;s disease risk is modified by &#x3b1;1&#x2013;antichymotrypsin polymorphism</article-title>. <source>Nat. Genet.</source> <volume>10</volume> (<issue>4</issue>), <fpage>486</fpage>&#x2013;<lpage>488</lpage>. <pub-id pub-id-type="doi">10.1038/ng0895-486</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khanna</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Hanna</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Sternberg</surname>
<given-names>M. J. E.</given-names>
</name>
<name>
<surname>David</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Missense3D-DB web catalogue: An atom-based analysis and repository of 4M human protein-coding genetic variants</article-title>. <source>Hum. Genet.</source> <volume>140</volume> (<issue>5</issue>), <fpage>805</fpage>&#x2013;<lpage>812</lpage>. <pub-id pub-id-type="doi">10.1007/s00439-020-02246-z</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Laimer</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hofer</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Fritz</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wegenkittl</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lackner</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Maestro - multi agent stability prediction upon point mutations</article-title>. <source>BMC Bioinforma.</source> <volume>16</volume> (<issue>1</issue>), <fpage>116</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-015-0548-6</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Landrum</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Benson</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Brown</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Chao</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Chitipiralla</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>ClinVar: Public archive of interpretations of clinically relevant variants</article-title>. <source>Nucleic Acids Res.</source> <volume>44</volume> (<issue>D1</issue>), <fpage>D862</fpage>&#x2013;<lpage>D868</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkv1222</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lek</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Karczewski</surname>
<given-names>K. J.</given-names>
</name>
<name>
<surname>Minikel</surname>
<given-names>E. V.</given-names>
</name>
<name>
<surname>Samocha</surname>
<given-names>K. E.</given-names>
</name>
<name>
<surname>Banks</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Fennell</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Analysis of protein-coding genetic variation in 60,706 humans</article-title>. <source>Nature</source> <volume>536</volume> (<issue>7616</issue>), <fpage>285</fpage>&#x2013;<lpage>291</lpage>. <pub-id pub-id-type="doi">10.1038/nature19057</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lewandowski</surname>
<given-names>C. T.</given-names>
</name>
<name>
<surname>Maldonado weng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ladu</surname>
<given-names>M. J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Alzheimer&#x27;s disease pathology in APOE transgenic mouse models: The who, what, when, where, why, and how</article-title>. <source>Neurobiol. Dis.</source> <volume>139</volume>, <fpage>104811</fpage>. <pub-id pub-id-type="doi">10.1016/j.nbd.2020.104811</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Akin</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Rao</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Hie</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Evolutionary-scale prediction of atomic level protein structure with a language model</article-title>. <source>bioRxiv</source>. <pub-id pub-id-type="doi">10.1101/2022.07.20.500902</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Jian</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Boerwinkle</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>DbNSFP: A lightweight database of human nonsynonymous snps and their functional predictions</article-title>. <source>Hum. Mutat.</source> <volume>32</volume> (<issue>8</issue>), <fpage>894</fpage>&#x2013;<lpage>899</lpage>. <pub-id pub-id-type="doi">10.1002/humu.21517</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Mou</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Tu</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>DbNSFP v4: A comprehensive database of transcript-specific functional predictions and annotations for human nonsynonymous and splice-site snvs</article-title>. <source>Genome Med.</source> <volume>12</volume> (<issue>1</issue>), <fpage>103</fpage>. <pub-id pub-id-type="doi">10.1186/s13073-020-00803-9</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>J.-T.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.-F.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>P.-R.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>C.-C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>APOE genotype and neuroimaging markers of alzheimer&#x27;s disease: Systematic review and meta-analysis</article-title>. <source>J. Neurology, Neurosurg. Psychiatry</source> <volume>86</volume> (<issue>2</issue>), <fpage>127</fpage>&#x2013;<lpage>134</lpage>. <pub-id pub-id-type="doi">10.1136/jnnp-2014-307719</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mariani</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Biasini</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Barbato</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Schwede</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Lddt: A local superposition-free score for comparing protein structures and models using distance difference tests</article-title>. <source>Bioinformatics</source> <volume>29</volume> (<issue>21</issue>), <fpage>2722</fpage>&#x2013;<lpage>2728</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btt473</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Martens</surname>
<given-names>Y. A.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C.-C.</given-names>
</name>
<name>
<surname>Kanekiyo</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Goate</surname>
<given-names>A. M.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>ApoE cascade hypothesis in the pathogenesis of alzheimer&#x27;s disease and related dementias</article-title>. <source>Neuron</source> <volume>110</volume> (<issue>8</issue>), <fpage>1304</fpage>&#x2013;<lpage>1317</lpage>. <pub-id pub-id-type="doi">10.1016/j.neuron.2022.03.004</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mirdita</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sch&#xfc;tze</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Moriwaki</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Heo</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ovchinnikov</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Steinegger</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>ColabFold: Making protein folding accessible to all</article-title>. <source>Nat. Methods</source> <volume>19</volume> (<issue>6</issue>), <fpage>679</fpage>&#x2013;<lpage>682</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-022-01488-1</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Montanucci</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Capriotti</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Frank</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ben-Tal</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Fariselli</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>DDGun: An untrained method for the prediction of protein stability changes upon single and multiple point variations</article-title>. <source>BMC Bioinforma.</source> <volume>20</volume> (<issue>S14</issue>), <fpage>335</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-019-2923-1</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pak</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Markhieva</surname>
<given-names>K. A.</given-names>
</name>
<name>
<surname>Novikova</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Petrov</surname>
<given-names>D. S.</given-names>
</name>
<name>
<surname>Vorobyev</surname>
<given-names>I. S.</given-names>
</name>
<name>
<surname>Maksimova</surname>
<given-names>E. S.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Using alphafold to predict the impact of single mutations on protein stability and function</article-title>. <source>bioRxiv</source>. <pub-id pub-id-type="doi">10.1101/2021.09.19.460937</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pan</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Nguyen</surname>
<given-names>T. B.</given-names>
</name>
<name>
<surname>Ascher</surname>
<given-names>D. B.</given-names>
</name>
<name>
<surname>Pires</surname>
<given-names>D. E. V.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Systematic evaluation of computational tools to predict the effects of mutations on protein stability in the absence of experimental structures</article-title>. <source>Briefings Bioinforma.</source> <volume>23</volume> (<issue>2</issue>), <fpage>bbac025</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbac025</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Richards</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Aziz</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Bale</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bick</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Das</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Gastier-Foster</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Standards and guidelines for the interpretation of sequence variants: A joint consensus recommendation of the American college of medical genetics and genomics and the association for molecular pathology</article-title>. <source>Genet. Med.</source> <volume>17</volume> (<issue>5</issue>), <fpage>405</fpage>&#x2013;<lpage>424</lpage>. <pub-id pub-id-type="doi">10.1038/gim.2015.30</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rodrigues</surname>
<given-names>C. H. m.</given-names>
</name>
<name>
<surname>Pires</surname>
<given-names>D. E. v.</given-names>
</name>
<name>
<surname>Ascher</surname>
<given-names>D. B.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>&#x3c;DynaMut2: Assessing changes in stability and flexibility upon single and multiple point missense mutations</article-title>. <source>Protein Sci.</source> <volume>30</volume> (<issue>1</issue>), <fpage>60</fpage>&#x2013;<lpage>69</lpage>. <pub-id pub-id-type="doi">10.1002/pro.3942</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schymkowitz</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Borg</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Stricher</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Nys</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Rousseau</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Serrano</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>The FoldX web server: An online force field</article-title>. <source>Nucleic Acids Res.</source> <volume>33</volume>, <fpage>W382</fpage>&#x2013;<lpage>W388</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gki387</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Steinegger</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>S&#xf6;ding</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets</article-title>. <source>Nat. Biotechnol.</source> <volume>35</volume> (<issue>11</issue>), <fpage>1026</fpage>&#x2013;<lpage>1028</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.3988</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wilson</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wardell</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Weisgraber</surname>
<given-names>K. H.</given-names>
</name>
<name>
<surname>Mahley</surname>
<given-names>R. W.</given-names>
</name>
<name>
<surname>Agard</surname>
<given-names>D. A.</given-names>
</name>
</person-group> (<year>1991</year>). <article-title>Three-Dimensional structure of the LDL receptor-binding domain of human apolipoprotein E</article-title>. <source>Science</source> <volume>252</volume> (<issue>5014</issue>), <fpage>1817</fpage>&#x2013;<lpage>1822</lpage>. <pub-id pub-id-type="doi">10.1126/science.2063194</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yamazaki</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Caulfield</surname>
<given-names>T. R.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C.-C.</given-names>
</name>
<name>
<surname>Bu</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Apolipoprotein E and alzheimer disease: Pathobiology and targeting strategies</article-title>. <source>Nat. Rev. Neurol.</source> <volume>15</volume> (<issue>9</issue>), <fpage>501</fpage>&#x2013;<lpage>518</lpage>. <pub-id pub-id-type="doi">10.1038/s41582-019-0228-7</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>