<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mar. Sci.</journal-id>
<journal-title>Frontiers in Marine Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mar. Sci.</abbrev-journal-title>
<issn pub-type="epub">2296-7745</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmars.2024.1362131</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Marine Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Integrative computational framework to decipher the functions of shell proteins in biomineralization</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Dong</surname>
<given-names>Wentao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2615091"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Xie</surname>
<given-names>Liping</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Zhang</surname>
<given-names>Rongqing</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/570423"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Ministry of Education Key Laboratory of Protein Sciences, School of Life Sciences, Tsinghua University</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Biotechnology and Biomedicine, Yangtze Delta Region Institute of Tsinghua University</institution>, <addr-line>Jiaxing, Zhejiang</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Zhejiang Provincial Key Laboratory of Applied Enzymology, Yangtze Delta Region Institute of Tsinghua University</institution>, <addr-line>Jiaxing</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Taizhou Innovation Center, Yangtze Delta Region Institute of Tsinghua University</institution>, <addr-line>Taizhou, Zhejiang</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Chuang Liu, Hohai University, China</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Zhi Liao, Zhejiang Ocean University, China</p>
<p>Felipe Aguilera, University of Concepcion, Chile</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Rongqing Zhang, <email xlink:href="mailto:rqzhang@mail.tsinghua.edu.cn">rqzhang@mail.tsinghua.edu.cn</email>; Liping Xie, <email xlink:href="mailto:lpxie@mail.tsinghua.edu.cn">lpxie@mail.tsinghua.edu.cn</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>19</day>
<month>07</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>11</volume>
<elocation-id>1362131</elocation-id>
<history>
<date date-type="received">
<day>27</day>
<month>12</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>26</day>
<month>06</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Dong, Xie and Zhang</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Dong, Xie and Zhang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Mollusk shells contain biominerals with remarkable mechanical properties enabled by a small fraction of embedded organic matrix proteins. However, the specific molecular functions of most shell proteins have remained elusive. Traditional genomics and functional studies are extremely laborious to identify key components. To address this, we developed an in-silico pipeline integrating protein structure modeling, molecular dynamics simulations, and machine learning to elucidate the critical ion protein interactions governing shell formation. Using the pearl oyster <italic>Pinctada fucata</italic> as a test case, our framework successfully recapitulated known protein functions and predicted roles of uncharacterized proteins to guide future experiments. Moreover, the pipeline&#x2019;s modular design enables versatile applications for rapidly elucidating structure-function relationships in diverse biomineralization systems, complementing conventional wet-lab methods. Overall, this computational approach leverages automatic simulations and analytics to unlock molecular insights into shell protein ion dynamics, accelerating the discovery of key crystallization regulators for bioinspired materials design.</p>
</abstract>
<kwd-group>
<kwd>biomineralization</kwd>
<kwd>molecular dynamics simulation</kwd>
<kwd>machine learning</kwd>
<kwd>shell proteins</kwd>
<kwd>
<italic>Pinctada fucata</italic>
</kwd>
</kwd-group>
<counts>
<fig-count count="5"/>
<table-count count="1"/>
<equation-count count="0"/>
<ref-count count="63"/>
<page-count count="11"/>
<word-count count="4899"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Marine Molecular Biology and Ecology</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<title>Introduction</title>
<p>Biominerals are biogenic composites of inorganic constituents and embedded organic matters. The latter endows biominerals extraordinary mechanical properties despite their low content and therefore is of great interests for scientists to design new materials following similar strategy (<xref ref-type="bibr" rid="B20">Huang et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B62">Zhao et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B63">Zhou et&#xa0;al., 2022</xref>). The organic components in biominerals, also refer to organic matrix, play a pivotal role in mineral deposition such as crystal nucleation, orientation, polymorph selection and morphology modification (<xref ref-type="bibr" rid="B2">Addadi and Weiner, 2014</xref>) Understanding how organic matrix precisely affect the crystallization is crucial to bioinspired material synthesis.</p>
<p>Mollusks are masters in producing plentiful biominerals with various microstructures, textures, and shapes. Their functions include protection, feeding, buoyancy, mating and vision, making them a great reservoir for bioinspired materials (<xref ref-type="bibr" rid="B35">Lowenstam and Weiner, 1989</xref>). For example, the shiny nacreous layer in some bivalves, gastropods and cephalopods, namely the mother of pearl, have been extensively studied both on the fundamental basis of its fracture-resistance and architectural assembly, and on the biomimetic applications of nacre-like materials (<xref ref-type="bibr" rid="B16">Finnemore et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B8">Cartwright, 2016</xref>). Despite the large progress in making nacre-like structure of improved mechanical properties, the natural nacre is far beyond completely replicated, especially regarding the stiffness and toughness, which may due to the unique crystallization process in nacre.</p>
<p>The formation of shell structure in mollusks is regulated by the shell matrix, which contains polysaccharides, proteins, and lipids (<xref ref-type="bibr" rid="B39">Marin et&#xa0;al., 2012</xref>). Shell proteins are the key components controlling shell formation, as revealed by intensive studies in the <italic>Pinctada</italic> genus (<xref ref-type="bibr" rid="B33">Liu et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B27">Kintsu et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B40">Mariom et&#xa0;al., 2019</xref>). In the past three decades, hundreds of shell proteins have been identified by biochemical methods or by omics tools (<xref ref-type="bibr" rid="B34">Liu and Zhang, 2021</xref>). Yet their exact roles in controlling CaCO<sub>3</sub> crystallization are not clear, except for a few members. This may be ascribed to the time and effort consuming procedures of functional characterization of shell proteins, as well as the relatively limited methods to explore the protein&#x2019;s function. Basically, the function of a shell protein is revealed by its amino acid sequence and their performance in <italic>in vitro</italic> CaCO<sub>3</sub> crystallization, which may further be supported by the expression of the related gene either under normal situation or under treatments of shell damage or RNAi knock-down. Such methods have been successful in characterizing the potential roles of shell proteins in crystallization or in some particular cases in immunity (<xref ref-type="bibr" rid="B4">Bahn et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B59">Yang et&#xa0;al., 2020</xref>).</p>
<p>However, the rapid expansion of numbers of shell proteins identified by omics tools, especially proteomics, in the recent years has challenged the traditional characterizing methods, making it a really tough and nearly inaccessible task to reveal the roles of all the identified shell proteins (<xref ref-type="bibr" rid="B34">Liu and Zhang, 2021</xref>). Moreover, the traditional characterization methods cannot fully elucidate the molecular mechanism of the crystallization controlling by the shell proteins at molecular or even atomic level. Molecular dynamics is a time-saving method for studying the movement patterns and interactions of proteins at the atomic level, and can be used to explore the intricate structure of protein molecules, the relationships between atoms, and how these relationships affect the physical properties and biological functions of proteins (<xref ref-type="bibr" rid="B13">Eastman et&#xa0;al., 2017</xref>). For example, by understanding the dynamic behavior of proteins, scientists can predict and design new drug molecules that can effectively interact with specific proteins, thereby achieving the goal of treating diseases (<xref ref-type="bibr" rid="B49">Sinha et&#xa0;al., 2022</xref>). Moreover, molecular dynamics simulations revealed that cement protein MrCP20 from barnacle Megabalanus rosa can sequester free Ca2+ and CO<sub>3</sub>
<sup>2&#x2212;</sup> ions on its highly charged surface through disorder&#x2212;order interplay of the protein and ions, and thus regulating calcite deposition in the barnacle base plate (<xref ref-type="bibr" rid="B18">Harini et&#xa0;al., 2019</xref>). To fill the gap between the biomineralization genome and the protein functions, here we presented a systematic framework integrating the state-of-the-art protein folding algorithm, molecular dynamics (MD) simulations and machine learning analysis, in attempt to describe the interaction among proteins and the inorganic ions (calcium, bicarbonate, and magnesium).</p>
</sec>
<sec id="s2">
<title>Methods</title>
<sec id="s2_1">
<title>Sequence analysis</title>
<p>We firstly retrieved all the protein sequences of <italic>Pinctada fucata</italic> from Uniprot, with a total of 1116 entries (up to August of 2023). Out of all proteins, we did a literature search and gathered all the cases with experimental results for influence on calcite or aragonite crystallization (see <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref> and <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplement Material</bold>
</xref> for details). Also, we included some potentially functional proteins from previous in-depth proteomic analysis of shell matrix proteins (<xref ref-type="bibr" rid="B33">Liu et&#xa0;al., 2015</xref>) to examine their functions with our protocol. Then we processed the sequences with two approaches: MAFFT (<xref ref-type="bibr" rid="B26">Katoh et&#xa0;al., 2002</xref>) method to perform the multiple sequence alignment and obtain the distance matrix; and large protein sequence language model ESM2b (<xref ref-type="bibr" rid="B32">Lin et&#xa0;al., 2023</xref>) to process the sequences and obtained the embedding vectors. Then, the distance matrix and the embedding vectors were subject to t-SNE (<xref ref-type="bibr" rid="B53">Van der Maaten and Hinton, 2008</xref>) for visualization using Scikit-learn package (<xref ref-type="bibr" rid="B47">Pedregosa et&#xa0;al., 2011</xref>).</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Proteins used in the case study and their roles in CaCO<sub>3</sub> precipitation.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Name</th>
<th valign="top" align="left">GenBank</th>
<th valign="top" align="left">Calcite growth condition</th>
<th valign="top" align="left">Aragonite growth condition</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">basic protein N23 (<xref ref-type="bibr" rid="B15">Fang et&#xa0;al., 2012</xref>)</td>
<td valign="top" align="left">AFJ19278.1</td>
<td valign="top" align="left">inhibit</td>
<td valign="top" align="left">promote</td>
</tr>
<tr>
<td valign="top" align="left">mantle protein N25 (<xref ref-type="bibr" rid="B58">Yang et&#xa0;al., 2019</xref>)</td>
<td valign="top" align="left">AKC03097.1</td>
<td valign="top" align="left">inhibit</td>
<td valign="top" align="left">inhibit</td>
</tr>
<tr>
<td valign="top" align="left">matrix protein Y2 (<xref ref-type="bibr" rid="B57">Yan et&#xa0;al., 2017</xref>)</td>
<td valign="top" align="left">ASM90391.1</td>
<td valign="top" align="left">inhibit</td>
<td valign="top" align="left">inhibit</td>
</tr>
<tr>
<td valign="top" align="left">PNU7 (<xref ref-type="bibr" rid="B60">Yi et&#xa0;al., 2022</xref>)</td>
<td valign="top" align="left">UYL04248.1</td>
<td valign="top" align="left">inhibit</td>
<td valign="top" align="left">inhibit</td>
</tr>
<tr>
<td valign="top" align="left">Prismalin-14 (<xref ref-type="bibr" rid="B24">Jackson et&#xa0;al., 2006</xref>)</td>
<td valign="top" align="left">BAD27406</td>
<td valign="top" align="left">inhibit</td>
<td valign="top" align="left">unknown</td>
</tr>
<tr>
<td valign="top" align="left">PNU9 (<xref ref-type="bibr" rid="B28">Kong et&#xa0;al., 2019</xref>)</td>
<td valign="top" align="left">QCX35555.1</td>
<td valign="top" align="left">no effect</td>
<td valign="top" align="left">inhibit</td>
</tr>
<tr>
<td valign="top" align="left">Aspein (<xref ref-type="bibr" rid="B23">Isowa et&#xa0;al., 2012</xref>)</td>
<td valign="top" align="left">BAD00044</td>
<td valign="top" align="left">promote</td>
<td valign="top" align="left">unknown</td>
</tr>
<tr>
<td valign="top" align="left">PfN44 (<xref ref-type="bibr" rid="B46">Pan et&#xa0;al., 2014</xref>)</td>
<td valign="top" align="left">AGG35567.1</td>
<td valign="top" align="left">promote</td>
<td valign="top" align="left">inhibit</td>
</tr>
<tr>
<td valign="top" align="left">PNU5 (<xref ref-type="bibr" rid="B48">Shuai et&#xa0;al., 2023</xref>)</td>
<td valign="top" align="left">AKV63171.1</td>
<td valign="top" align="left">promote</td>
<td valign="top" align="left">promote</td>
</tr>
<tr>
<td valign="top" align="left">lysine-rich matrix protein 7 (<xref ref-type="bibr" rid="B31">Liang et&#xa0;al., 2016</xref>)</td>
<td valign="top" align="left">AMC39955.1</td>
<td valign="top" align="left">unknown</td>
<td valign="top" align="left">inhibit</td>
</tr>
<tr>
<td valign="top" align="left">MSI31 (<xref ref-type="bibr" rid="B33">Liu et&#xa0;al., 2015</xref>)</td>
<td valign="top" align="left">BAL52321.1</td>
<td valign="top" align="left">unknown</td>
<td valign="top" align="left">unknown</td>
</tr>
<tr>
<td valign="top" align="left">MSI80 (<xref ref-type="bibr" rid="B33">Liu et&#xa0;al., 2015</xref>)</td>
<td valign="top" align="left">BAL45933</td>
<td valign="top" align="left">unknown</td>
<td valign="top" align="left">unknown</td>
</tr>
<tr>
<td valign="top" align="left">Nacrein (<xref ref-type="bibr" rid="B6">Blank et&#xa0;al., 2003</xref>)</td>
<td valign="top" align="left">BAA11940</td>
<td valign="top" align="left">unknown</td>
<td valign="top" align="left">inhibit</td>
</tr>
<tr>
<td valign="top" align="left">N-U6 (<xref ref-type="bibr" rid="B33">Liu et&#xa0;al., 2015</xref>)</td>
<td valign="top" align="left">AKV63162.1</td>
<td valign="top" align="left">unknown</td>
<td valign="top" align="left">unknown</td>
</tr>
<tr>
<td valign="top" align="left">PfT (<xref ref-type="bibr" rid="B33">Liu et&#xa0;al., 2015</xref>)</td>
<td valign="top" align="left">Unpublished</td>
<td valign="top" align="left">unknown</td>
<td valign="top" align="left">unknown</td>
</tr>
<tr>
<td valign="top" align="left">Prismin 1 (<xref ref-type="bibr" rid="B33">Liu et&#xa0;al., 2015</xref>)</td>
<td valign="top" align="left">BAF93505.1</td>
<td valign="top" align="left">unknown</td>
<td valign="top" align="left">unknown</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2_2">
<title>Structure modeling and molecular dynamics simulations</title>
<p>All protein structures used for MD simulations were predicted using Uni-Fold (<xref ref-type="bibr" rid="B30">Li et&#xa0;al., 2022</xref>). To perform large scale MD simulations with various settings, we developed an automated pipeline named ProtIon based on OpenMM (<xref ref-type="bibr" rid="B13">Eastman et&#xa0;al., 2017</xref>), and the source code can be found here: (<ext-link ext-link-type="uri" xlink:href="https://github.com/Dongwentao96/ProtIon">https://github.com/Dongwentao96/ProtIon</ext-link>). The standard protocol is shown in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>: the input structure was firstly examined to repair the missing residues or atoms with PDBfixer (<ext-link ext-link-type="uri" xlink:href="https://github.com/openmm/pdbfixer">https://github.com/openmm/pdbfixer</ext-link>), while determining the protonation states depending on the pre-defined pH value. This procedure is particularly useful if experimental structures are used, which often contain missing residues or atoms. Then, the protein was put into a box with 1.0 nm buffer padding and the ions of choices were added at the pre-defined concentration [in our case 8mM CaCl<sub>2</sub> and 16mM NaHCO<sub>3</sub> for calcite and an additional 50mM MgCl<sub>2</sub> for aragonite based on our previous study (<xref ref-type="bibr" rid="B60">Yi et&#xa0;al., 2022</xref>)], along with counter-ions to maintain the electro-neutrality of the systems. For simulations, the energy minimization was firstly performed, followed by equilibration under NVT ensemble for 100ps and NPT (300K, 1bar) ensemble for 500ps, with heavy atoms fixed using Langevin dynamics (<xref ref-type="bibr" rid="B61">Zhang et&#xa0;al., 2019</xref>) and Monte-Carlo barostat (<xref ref-type="bibr" rid="B10">Chow and Ferguson, 1995</xref>), Amber FF14SB (<xref ref-type="bibr" rid="B36">Maier et&#xa0;al., 2015</xref>) forcefield and TIP3P (<xref ref-type="bibr" rid="B25">Jorgensen et&#xa0;al., 1983</xref>) water model. Finally, production simulations were performed under NPT conditions (300K, 1 bar). In our case, 100ns simulations were performed for each case to capture the local distribution of ions. Upon simulation completion, the integrated script can automatically generate the density map of ion distribution throughout the simulation, as well as perform routine trajectory analysis. The package support various solvent or small organic molecules, with GAFF2 (<xref ref-type="bibr" rid="B55">Wang et&#xa0;al., 2004</xref>) forcefield by default and parameters from Li et. al. (<xref ref-type="bibr" rid="B29">Li et&#xa0;al., 2015</xref>) for ions to automatically do the parameterization. The trajectory was processed with MDTraj (<xref ref-type="bibr" rid="B41">McGibbon et&#xa0;al., 2015</xref>) and MDanalysis (<xref ref-type="bibr" rid="B42">Michaud-Agrawal et&#xa0;al., 2011</xref>), and all visualization was performed using VMD (<xref ref-type="bibr" rid="B21">Humphrey&#xa0;et&#xa0;al., 1996</xref>).</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>ProtIon overall workflow. The input sequences can be processed with protein folding algorithm like Uni-Fold. Then, the structures were processed by PDBFixer, and the MD systems were built with ions and other molecules added and the whole system solvated. After equilibration and production runs, the trajectories were processed to display the ion density maps and subject to further analysis.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1362131-g001.tif"/>
</fig>
</sec>
<sec id="s2_3">
<title>Ion distribution calculation</title>
<p>To analyze the ion distribution over the simulations, we applied a 4 &#xc5; cut-off around each amino acid and counted the ion numbers for each snapshot as upper bound considering the van der waals radius of the studying ions. Then the count numbers were divided by the total number of ions then the average counts over all snapshots were calculated for further analysis and visualization. The Volmap module in VMD was used to perform the density calculation.</p>
</sec>
<sec id="s2_4">
<title>Machine learning analysis</title>
<p>We used the statistical domain features from the Python package TSFEL (<xref ref-type="bibr" rid="B5">Barandas et&#xa0;al., 2020</xref>) to systematically embed the residue-level ion enrichment capability for each system from the molecular dynamics simulation trajectories into feature vectors of the same length, with the domain set to statistical, fs set to 1, window_spliter set to False, overlap set to 0, and the rest as default values. The features include histogram, interquartile range, mean absolute deviation, median absolute deviation, root mean square, standard deviation, variance, ECDF percentile count and ECDF slope. Similar to sequence analysis, we used the t-SNE method with the Python package Scikit-learn for visualization using Matplotlib (<xref ref-type="bibr" rid="B22">Hunter, 2007</xref>) and Seaborn (<xref ref-type="bibr" rid="B56">Waskom, 2021</xref>) packages.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<title>Results</title>
<p>Proteins usually exert physiological functions through some specific side chains of the amino acid residues. Therefore, elucidating the ion interaction maps of shell proteins is essential to deciphering the molecular mechanisms of shell proteins on biomineralization. Our computational simulations aimed to capture this key biophysical process and predict protein functions from the perspectives of structure, dynamics and ion distributions. However, most shell proteins identified so far in mollusk lack known homologous proteins, because only a few mollusk species have relatively complete draft genomes. At the same time, most of them do not have the three-dimensional structure analyzed by experiment. Recent advancement in protein folding algorithm paved the way for structural and dynamic studies towards our goal, since most matrix proteins are poorly characterized, and with no resolved experimental structures. Among several state-of-the-art folding algorithms (<xref ref-type="bibr" rid="B3">Baek et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B30">Li et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B32">Lin et&#xa0;al., 2023</xref>), we used Uni-Fold (<xref ref-type="bibr" rid="B30">Li et&#xa0;al., 2022</xref>) to process our sequences on the online platform Hermite (<ext-link ext-link-type="uri" xlink:href="https://hermite.dp.tech">https://hermite.dp.tech</ext-link>). With the predicted structures we employed our ProtIon pipeline to perform MD simulations in calcite and aragonite crystallization solutions with corresponding ion concentrations (see Methods). Usually, the folded structures contain uncertainties, especially in the loop regions, but such deviation can be restored within the simulation process governed by the physical forcefield (<xref ref-type="bibr" rid="B54">Wang et&#xa0;al., 2022</xref>). Indeed, we observed the RMSD may change significantly but reached plateau within 100ns simulation time in almost all systems (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;1</bold>
</xref> and <xref ref-type="supplementary-material" rid="SM1">
<bold>2</bold>
</xref>). Subsequent analysis of the trajectories revealed distinct ion binding capabilities correlated with promotion or inhibition of crystal growth.</p>
<p>The results show that proteins known to promote calcite or aragonite growth exhibited localized enrichments of both Ca<sup>2+</sup> and HCO<sub>3</sub>
<sup>-</sup> ions in certain regions (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>). In contrast, proteins inhibiting mineralization showed accumulation of single ion types or spatially distant enrichment regions. Although other factors may also affect the specific functions such as the interactions between the proteins and the substrate or the geometric shape or size of the proteins. While most proteins of interests contain rich charged, especially acidic amino acids in the sequences, it is only through the 3D structures, combined with the MD simulations that can we examine whether the protein can gather both ions (Ca<sup>2+</sup> and HCO<sub>3</sub>
<sup>-</sup>) to the same regions.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Ionic density volmaps captured by molecular dynamics simulation. <bold>(A)</bold> Proteins promote growth of calcite. <bold>(B)</bold> Protein PNU 9, which has no effects on growth of calcite. <bold>(C)</bold> Proteins inhibit growth of calcite. <bold>(D)</bold> Proteins promote growth of aragonite. <bold>(E)</bold> Proteins inhibit growth of aragonite. For N23 and PNU7, the C-terminal domains were highlighted due to their distinct and important role in aragonite formation as mentioned in the main text.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1362131-g002.tif"/>
</fig>
<sec id="s3_1">
<title>MD simulations explains past functional studies</title>
<p>In the past few decades, the impact of some matrix proteins on CaCO<sub>3</sub> precipitation have been characterized (see <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>). Then, we examined these matrix proteins in the table by molecular dynamics model. It should be noted that, Aspein, PNU5, and PfN44 all displayed concurrent accumulations of large amounts of Ca<sup>2+</sup> and HCO<sub>3</sub>
<sup>-</sup> locally (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2A</bold>
</xref>), agreeing with their reported promoting role in the calcite growth (<xref ref-type="bibr" rid="B23">Isowa et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B46">Pan et&#xa0;al., 2014</xref>; <xref ref-type="bibr" rid="B48">Shuai et&#xa0;al., 2023</xref>). Moreover, PNU5 showed co-enrichment of Ca<sup>2+</sup>, Mg<sup>2+</sup>, and HCO<sub>3</sub>
<sup>-</sup> (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2D</bold>
</xref>), consistent with its reported aragonite crystallization promotion (<xref ref-type="bibr" rid="B48">Shuai et&#xa0;al., 2023</xref>). It is particularly interesting to see that when Mg<sup>2+</sup> ion was present, the binding affinity of PfN44 protein in the system altered significantly, due to the Mg<sup>2+</sup> ions binding to the central beta sheet and thus disrupting the overall scaffold (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2E</bold>
</xref>). This phenomenon is in alignment with previous study reporting that PfN44 can stabilize magnesium-calcite to inhibit the crystallization of aragonite, explaining its opposite functions in calcite and aragonite formation (<xref ref-type="bibr" rid="B46">Pan et&#xa0;al., 2014</xref>).</p>
<p>PfN23 is a basic SMP identified from the <italic>P. fucata</italic> shell, which can specifically induce the crystallization of aragonite, and its positively charged C-terminal is supposed to be the key functional region (<xref ref-type="bibr" rid="B15">Fang et&#xa0;al., 2012</xref>). Coincide with this, we found that the C-terminus of basic protein N23 selectively accumulated Ca<sup>2+</sup> and HCO<sub>3</sub>
<sup>-</sup> only in the aragonite solution (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2C</bold>
</xref>). Compared to its aragonite conformation, N23 in the calcite solution without Mg<sup>2+</sup> exhibited larger structural changes and lost the C-terminal Ca<sup>2+</sup> enrichment, and instead accumulate HCO<sub>3</sub>
<sup>-</sup> in more regions. This change in anion binding might be involved in the previous reported promotion of calcite dissolution (<xref ref-type="bibr" rid="B15">Fang et&#xa0;al., 2012</xref>).</p>
<p>Similarly, mantle protein N25 also displayed localized Ca<sup>2+</sup> and HCO<sub>3</sub>
<sup>-</sup> enrichments, but with lower overall levels and more directional focus compared to the abovementioned proteins when subjected to calcite crystallization solution (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2C</bold>
</xref>). Previous evidence indicates that N25 can block growing sites for forming crystal layer, thus increasing the energy cost for deposition and decreasing growth rates of some crystal faces of CaCO<sub>3</sub> (<xref ref-type="bibr" rid="B58">Yang et&#xa0;al., 2019</xref>). Therefore, the observed ion binding regions of N25 may be involved in interaction with the crystal layer. Additionally, N25 and lysine-rich matrix protein 7 showed localized single ion enrichments in the aragonite solution, consistent with their reported inhibition of aragonite crystal growth (<xref ref-type="bibr" rid="B31">Liang et&#xa0;al., 2016</xref>).</p>
<p>PNU9 showed no significant ion accumulation in calcite crystallization solution (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2B</bold>
</xref>), agreeing with the functional study reported previously (<xref ref-type="bibr" rid="B28">Kong et&#xa0;al., 2019</xref>). In the Mg<sup>2+</sup>-containing aragonite solution, PNU9 exhibited only minor, dispersed enrichments of Mg<sup>2+</sup> and HCO<sub>3</sub>
<sup>-</sup>. Similarly, matrix protein PfY2 displayed local accumulation of single ion types in calcite. Such discrete single ion binding likely underlies their inhibiting effect of crystal growth (<xref ref-type="bibr" rid="B57">Yan et&#xa0;al., 2017</xref>).</p>
<p>Likewise, nacrein in the aragonite forming condition showed the ability to locally enrich single ions in multiple separated regions (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2E</bold>
</xref>), consistent with its reported inhibition of aragonite crystal growth (<xref ref-type="bibr" rid="B6">Blank et&#xa0;al., 2003</xref>). Prismalin-14 was expressed only at the mantle edge and primarily present in the prismatic layer composed of columnar calcite surrounded by organic matrices (<xref ref-type="bibr" rid="B50">Suzuki et&#xa0;al., 2004</xref>). Our results show that Prismalin-14 has two minor HCO<sub>3</sub>
<sup>-</sup> enriching regions sharing a planar Ca<sup>2+</sup> site (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2C</bold>
</xref>). Another distinct region with high single Ca<sup>2+</sup> accumulation may modulate the inhibition of calcium carbonate crystallization.</p>
<p>In the calcite solution, PNU7 only showed Ca<sup>2+</sup> enrichment (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2C</bold>
</xref>), and is predicted to play an inhibition role. However, in the aragonite simulation, PNU7&#x2019;s C-terminus exhibited two sites with high Ca<sup>2+</sup>/HCO<sub>3</sub>
<sup>-</sup> accumulation and minor surrounding Mg<sup>2+</sup> enrichment (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2E</bold>
</xref>). It also displayed a region near the N-terminus with high HCO<sub>3</sub>
<sup>-</sup> accumulation. <italic>In vitro</italic> crystallization experiment showed that PNU7 can modify calcite morphology and stabilize large vaterite particles when Mg<sup>2+</sup> is absent, while at lower Mg<sup>2+</sup> concentration, large amounts of tiny crystals were formed (<xref ref-type="bibr" rid="B60">Yi et&#xa0;al., 2022</xref>). When the C-terminus of PNU7 was deleted, the growth of calcite and vaterite were inhibited. Our observed binding patterns of this protein agree with these reported functions.</p>
<p>In the aragonite solution, matrix protein Y2 exhibited localized co-enrichment of Ca<sup>2+</sup> and HCO<sub>3</sub>
<sup>-</sup>, along with minor co-enrichment of Ca<sup>2+</sup> and Mg<sup>2+</sup> (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2E</bold>
</xref>). Purified recombinant rPfY2 protein has been found to significantly suppressed CaCO<sub>3</sub> precipitation rate and participated in the crystal nucleation process (<xref ref-type="bibr" rid="B57">Yan et&#xa0;al., 2017</xref>). Additionally, the morphology of crystals was modified, and the transformation of amorphous calcium carbonate (ACC) to calcite or aragonite was inhibited. We hypothesize the functional domains enriching Ca<sup>2+</sup> and HCO<sub>3</sub>
<sup>-</sup> can regulate CaCO<sub>3</sub> precipitation rate, representing a more specialized form of growth inhibition.</p>
</sec>
<sec id="s3_2">
<title>PCA analysis on MD simulation data well separated proteins of different functions</title>
<p>Comparing with previous studies on the shell matrix proteins, we found that proteins with different functions exhibit highly correlated results in molecular dynamics simulations, suggesting that molecular dynamics models can be used to deduce the potential functions of some uncharacterized shell proteins and to predict their possible effects on the growth of calcite or aragonite. Thus, we recorded the frequency of ion occurrences within proximity to each residue during the calcite and aragonite crystallization simulations as a characterization of the ion enrichment capability for each residue (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figures&#xa0;3</bold>
</xref> and <xref ref-type="supplementary-material" rid="SM1">
<bold>4</bold>
</xref>). We then used TSFEL to extract features as protein representations (See Methods). Additionally, we selected traditional distance matrices based on sequence alignment and embeddings from pre-trained protein language models as alternative protein representations.</p>
<p>Subsequently, we utilized t-SNE for unsupervised dimensionality reduction of the protein representations from the three methods. As seen in <xref ref-type="fig" rid="f3">
<bold>Figures&#xa0;3A, D</bold>
</xref>, the matrix proteins were grouped into two groups of promoting crystallization/inhibiting crystallization by using the multi-sequence comparison distance matrix as a parameter, and there was a certain degree of differentiation between the two types of proteins under aragonite crystallization condition, but it was not ideal in the calcite environment. When embedding vectors are used as parameters, the resulting groupings have a large overlap, both in the calcite and aragonite growth conditions (<xref ref-type="fig" rid="f3">
<bold>Figures&#xa0;3B, E</bold>
</xref>). When molecular dynamics simulations of locus ion enrichment were used to describe the function of proteins, the two protein groups were well separated, with very little overlap between proteins with promoting or inhibiting crystallization properties (<xref ref-type="fig" rid="f3">
<bold>Figures&#xa0;3C, F</bold>
</xref>). Therefore, the ion enrichment capability representations have a natural advantage for depicting proteins affecting the growth of calcite versus aragonite.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>t-SNE results of mineralization related proteins. The t-SNE dimensional reduction applied to (<bold>A, D</bold>) multisequence comparison distance matrix (<bold>B, E</bold>) esm2_t30_150M_UR50D embedding vectors (<bold>C, F</bold>) the molecular dynamics simulation trajectory extracted feature vectors, for calcite/aragonite respectively.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1362131-g003.tif"/>
</fig>
</sec>
<sec id="s3_3">
<title>ProtIon combined with machine learning to explore biomineralization proteins</title>
<p>Based on the molecular dynamics simulation features, we employed SVM (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>) to build a model to classify the known proteins and applied to other proteins potentially related to biomineralization that were previously reported (<xref ref-type="bibr" rid="B33">Liu et&#xa0;al., 2015</xref>). Interestingly, PNU7, N23 and PfN44 are very close to the hyperplane, and these three have subtle and contradictory roles in calcite and aragonite, with both Ca<sup>2+</sup> and HCO<sub>3</sub>
<sup>-</sup> bind to a specific domain and/or greatly affected by the presence of Mg<sup>2+</sup>.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>SVM classification of shell proteins under calcite <bold>(A)</bold> and aragonite <bold>(B)</bold> growth conditions. For calcite the promote, inhibit and no effect proteins were separated by two hyperplanes shown as two lines. For aragonite, only the promote and inhibit proteins were separated by the hyperplanes shown as a line, since no &#x2018;no effect&#x2019; proteins were reported. In both cases, other unknown proteins were projected to predict their potential role.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1362131-g004.tif"/>
</fig>
<p>According to the SVM predictions (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>) and ionic density maps, we speculated that MSI31, MSI80 and Nacrein (<xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5A</bold>
</xref> and <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;1</bold>
</xref>) may promote calcite growth. They exhibited multiple regions of co-enrichment of Ca<sup>2+</sup> and HCO<sub>3</sub>
<sup>-</sup> ions and relatively large size, which are consistent with the aforementioned cases of Aspein, PNU5 and PfN44 under calcite growth condition (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>). As to Lysine-rich matrix protein 7 (KRMP7), it only showed enrichment of HCO<sub>3</sub>
<sup>-</sup> ions and is likely to inhibit the growth of calcite. PfT has three small regions of co-enrichment of Ca<sup>2+</sup> and HCO<sub>3</sub>
<sup>-</sup> (<xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5B</bold>
</xref>), as well as a significant Ca<sup>2+</sup> ion enrichment region, which is relatively similar to the simulation result of Prismalin-14, indicating potential functions of changing crystal morphology and inhibiting calcite growth.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Ion density maps of unknown potential proteins <bold>(A)</bold> the potentially promoting proteins MSI31, Nacrein and MSI80 for calcite formation; <bold>(B)</bold> to possibly inhibiting proteins lysine-rich matrix protein 7 and PfT for calcite formation; <bold>(C)</bold> the potentially promoting proteins Aspein and MSI31 for aragonite formation and <bold>(D)</bold> the potentially inhibiting proteins MSI80 and PfT for aragonite formation.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-11-1362131-g005.tif"/>
</fig>
<p>In terms of the impact on aragonite crystals, Aspein and MSI31 may promote aragonite growth, as predicted by the SVM model. As shown in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5D</bold>
</xref>, they both have regions of co-enrichment of Ca<sup>2+</sup>, HCO<sub>3</sub>
<sup>-</sup> and Mg<sup>2+</sup>, similar to the simulation results of PNU5 and N23. Meanwhile, MSI80 and PfT do not have significant large co-enrichment regions, also consistent with the prediction.</p>
<p>However, it should be noted that the SVM model may have limited predictive potential for some proteins. For example, there are weak cases like N-U6 and Prismin-1 (see <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;5</bold>
</xref> and <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;1</bold>
</xref>) under calcite growth condition, both having some ion enrichment regions but were predicted to have no effect on calcite crystal growth, probably due to the relatively small protein size that prevent the stable binding to the substrate. Also, when under aragonite growth condition, N-U6, Prismalin-14 and Prismin-1 all have some regions of co-enrichment of Ca<sup>2+</sup> and HCO<sub>3</sub>
<sup>-</sup>, but no Mg<sup>2+</sup> was accumulated around these regions. Their predicted inhibition on aragonite growth may be ascribed to the relatively small size and ion enrichment region, or due to the attachment to the newly formed crystal layer, or reduce the precipitation rate of CaCO<sub>3</sub> as reported in previous studies (<xref ref-type="bibr" rid="B58">Yang et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B60">Yi et&#xa0;al., 2022</xref>).</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<title>Discussion</title>
<p>Mollusk shell formation is a complex biomineralization process precisely controlled by the organism to generate intricate composite materials with remarkable mechanical properties. The shell matrix comprises proteins, polysaccharides, and other macromolecules that regulate mineral deposition and crystal growth at multiple levels (<xref ref-type="bibr" rid="B37">Marin et&#xa0;al., 2007a</xref>, <xref ref-type="bibr" rid="B38">2007</xref>, <xref ref-type="bibr" rid="B39">2012</xref>). At the heart of this process is the interaction between the organic matrix and the inorganic ions that make up the mineral phase. The availability and transport of ions, particularly calcium, carbonate, and magnesium, are critical determinants of shell construction (<xref ref-type="bibr" rid="B9">Cartwright and Checa, 2007</xref>). Calcium and bicarbonate ions are taken up from seawater by the mantle epithelium and transported to the calcification site via the extrapallial fluid (<xref ref-type="bibr" rid="B39">Marin et&#xa0;al., 2012</xref>). The organic matrix creates localized ion-rich environments to stimulate crystal nucleation. Matrix proteins selectively bind ions via acidic residues, acting as scaffolds for oriented crystal growth (<xref ref-type="bibr" rid="B14">Falini et&#xa0;al., 1996</xref>). Specific macromolecular conformations and ion binding motifs direct the polymorph selection between calcite and aragonite.</p>
<p>The precise spatial and temporal regulation of ion concentrations by the organic matrix is key to controlling crystallization kinetics and directing the assembly of composite shell microstructures (<xref ref-type="bibr" rid="B45">Nudelman and Sommerdijk, 2012</xref>). Tracking the dynamics of ion accumulation reveals mechanistic insights into how proteins influence crystal nucleation, orientation, phase, morphology and material properties. As Marin et&#xa0;al. discussed, the protein-ion interactions that direct crystallization occur at multiple length scales, from the nanoscale ion binding sites within individual proteins to the larger-scale accumulation patterns shaped by the 3D matrix scaffold (<xref ref-type="bibr" rid="B38">Marin et&#xa0;al., 2007b</xref>).</p>
<p>Although the full mechanism of biomineral formation is complicated (<xref ref-type="bibr" rid="B59">Yang et&#xa0;al., 2020</xref>), involving enzymatic processes, spatial patterning, and controlled ion accumulation, the general picture is that matrix proteins attach to a chitin-silk fibroin substrate and then concentrate Ca<sup>2+</sup> and HCO<sub>3</sub>
<sup>-</sup> from the environment to form shell layers. This requires the proteins to stably bind both ions while presenting a large enough surface for crystal nucleation. If the protein can only gather anion or cation, it will drive the crystallization kinetics towards dissociation. The results in the present study indicate that the interactions between the side chain of the shell proteins and the inorganic ions are related to the regulatory role of the proteins. Magnesium can directly affect the precipitation of calcite and aragonite (<xref ref-type="bibr" rid="B43">Morse et&#xa0;al., 1997</xref>). Our study showed that some shell proteins, especially those from aragonitic nacre layers, interacted with Mg<sup>2+</sup> differently, which may explain their control of crystal polymorph selection during shell formation. Previous studies have shown that proteins rich in aspartic acid are thought to bind calcium ions, and proteins containing the EF-hand domain can also bind calcium ions (<xref ref-type="bibr" rid="B19">Hattan et&#xa0;al., 2001</xref>; <xref ref-type="bibr" rid="B17">Gotliv et&#xa0;al., 2005</xref>; <xref ref-type="bibr" rid="B52">Takeuchi et&#xa0;al., 2008</xref>). The binding of calcium or magnesium ions in turn changes the structure of the matrix protein. In addition, Lia Addadi and Steve Weiner&#x2019;s group have shown that the regular arrangement of amino acid residues in the tandem repeats containing asparagine is well matched to the crystal lattice of calcium carbonate with a specific crystal polymorph, thus determining the CaCO<sub>3</sub> polymorph (<xref ref-type="bibr" rid="B1">Addadi and Weiner, 1985</xref>). Consistently, tandem repeats containing DDRK can significantly affect the deposition of calcium carbonate (<xref ref-type="bibr" rid="B51">Tah et&#xa0;al., 2024</xref>).</p>
<p>The automated simulation and analysis tool ProtIon based on Python package OpenMM to study protein-ion interactions and performed consistency validation on some important proteins in the field of biomineralization, proving a high consistency between simulation results and experimental results. Based on the simulation analysis results, we can identify residues in proteins that have a greater impact on mineralization. The rich structural information provided by our method can be further used for understanding the detailed mechanism for shell formation protein design and domain fusion to incubate better varieties, as well as protein design and domain fusion to incubate better varieties.</p>
<p>The machine learning model we here developed to predict the function of matrix proteins is only theoretically tested, which warrant more experimental evidence in the future to support it. For example, the ion enrichment of specific amino acid sequence can be analyzed by truncating the protein to obtain mutant. Alternatively, we can also directly express the domain with ion binding affinity to verify its role. If the validity of our model can be verified experimentally, its application can be expanded. In the present study, we only use one shell protein in a single stimulation, but under natural biomineralization conditions, many proteins interact and regulate CaCO3 precipitation synergistically. Nevertheless, many shell proteins have disordered regions which usually exhibit as loops and coiled coils. Previous studied have shown that these disordered regions are vital in shell mineralization (<xref ref-type="bibr" rid="B44">Ndao et&#xa0;al., 2010</xref>; <xref ref-type="bibr" rid="B7">Brown et&#xa0;al., 2014</xref>), which is further supported by the results present here revealing the interaction of the disordered regions and the inorganic ions. However, we excluded some shell proteins with large proportion of disordered regions and have no 3D structures in this study (e.g. MSI60 and shematrin family) due to technical difficulties in the subsequent MD simulation. Because our machine learning model depends on the 3D structure of the target proteins, this shortcoming of our method limits its application to all shell proteins. Moreover, the training dataset of our model was relatively limited due to the fact that only a small part of the identified molluscan shell proteins has been characterized via <italic>in vitro</italic> crystallization experiment. Excluding shell protein other than <italic>P. fucata</italic> shell proteins also led to the limited training data. Therefore, more efforts are warranted to establish a routine method that is applicable to most shell protein from various molluscan genera.</p>
</sec>
<sec id="s5" sec-type="conclusions">
<title>Conclusion</title>
<p>On the top of the proteomic studies (<xref ref-type="bibr" rid="B11">Connors et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B33">Liu et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B12">Du et&#xa0;al., 2017</xref>), our framework can perform simulations under various conditions and reveal the ensemble average ion distributions over the protein surfaces, which in turn can be used to determine the particular role of the protein in shell formation. In particular, we tested some well-studied shell proteins and a few potentially functional proteins, for the cases of calcite and aragonite formation. Our simulations successfully captured the ion accumulation and correlate well with the experiments. In addition, we build up a machine learning model to further identify potential proteins with certain promoting or inhibiting mineral formation functions. Moreover, our molecular dynamics simulation module was based on OpenMM (<xref ref-type="bibr" rid="B13">Eastman et&#xa0;al., 2017</xref>), an open source and versatile simulation framework, allowing for convenient adjustment according to specific case.</p>
</sec>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Material</bold>
</xref>. Further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>WD: Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. LX: Funding acquisition, Supervision, Writing &#x2013; review &amp; editing. RZ: Funding acquisition, Resources, Supervision, Writing &#x2013; review &amp; editing.</p>
</sec>
</body>
<back>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This work was supported by the National Natural Science Foundation of China Grants 32072951.</p>
</sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s11" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fmars.2024.1362131/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fmars.2024.1362131/full#supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet_1.pdf" id="SM1" mimetype="application/pdf"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Addadi</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Weiner</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>1985</year>). <article-title>Interactions between acidic proteins and crystals: stereochemical requirements in biomineralization</article-title>. <source>Proc. Natl. Acad. Sci. U.S.A.</source> <volume>82</volume>, <fpage>4110</fpage>&#x2013;<lpage>4114</lpage>.</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Addadi</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Weiner</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Biomineralization: mineral formation by organisms</article-title>. <source>Physica. Scripta.</source> <volume>89</volume>, <fpage>098003</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1088/0031-8949/89/9/098003</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baek</surname> <given-names>M.</given-names>
</name>
<name>
<surname>DiMaio</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Anishchenko</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Dauparas</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Ovchinnikov</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>G. R.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Accurate prediction of protein structures and interactions using a three-track neural network</article-title>. <source>Science</source> <volume>373</volume>, <fpage>871</fpage>&#x2013;<lpage>876</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1126/science.abj8754</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bahn</surname> <given-names>S. Y.</given-names>
</name>
<name>
<surname>Jo</surname> <given-names>B. H.</given-names>
</name>
<name>
<surname>Choi</surname> <given-names>Y. S.</given-names>
</name>
<name>
<surname>Cha</surname> <given-names>H. J.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Control of nacre biomineralization by Pif80 in pearl oyster</article-title>. <source>Sci. Adv.</source> <volume>3</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1126/sciadv.1700765</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barandas</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Folgado</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Fernandes</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Santos</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Abreu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Bota</surname> <given-names>P.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>TSFEL: time series feature extraction library</article-title>. <source>SoftwareX</source> <volume>11</volume>, <fpage>100456</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.softx.2020.100456</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Blank</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Arnoldi</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Khoshnavaz</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Treccani</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Kuntz</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Mann</surname> <given-names>K.</given-names>
</name>
<etal/>
</person-group>. (<year>2003</year>). <article-title>The nacre protein perlucin nucleates growth of calcium carbonate crystals</article-title>. <source>J. Microsc.</source> <volume>212</volume>, <fpage>280</fpage>&#x2013;<lpage>291</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/j.1365-2818.2003.01263.x</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brown</surname> <given-names>A. H.</given-names>
</name>
<name>
<surname>Rodger</surname> <given-names>P. M.</given-names>
</name>
<name>
<surname>Evans</surname> <given-names>J. S.</given-names>
</name>
<name>
<surname>Walsh</surname> <given-names>T. R.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Equilibrium conformational ensemble of the intrinsically disordered peptide n16n: linking subdomain structures and function in nacre</article-title>. <source>Biomacromolecules</source> <volume>15</volume>, <fpage>4467</fpage>&#x2013;<lpage>4479</lpage>.</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cartwright</surname> <given-names>J. H. E.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Directed self-assembly, genomic assembly complexity and the formation of biological structure, or, what are the genes for nacre</article-title>? <source>Philos. Trans. R. Soc. A.: Mathematical. Phys. Eng. Sci.</source> <volume>374</volume>, <fpage>20150449</fpage>.</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cartwright</surname> <given-names>J. H. E.</given-names>
</name>
<name>
<surname>Checa</surname> <given-names>A. G.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>The dynamics of nacre self-assembly</article-title>. <source>J. R. Soc. Interface</source> <volume>4</volume>, <fpage>491</fpage>&#x2013;<lpage>504</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1098/rsif.2006.0188</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chow</surname> <given-names>K.-H.</given-names>
</name>
<name>
<surname>Ferguson</surname> <given-names>D. M.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Isothermal-isobaric molecular dynamics simulations with Monte Carlo volume sampling</article-title>. <source>Comput. Phys. Commun.</source> <volume>91</volume>, <fpage>283</fpage>&#x2013;<lpage>289</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/0010-4655(95)00059-O</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Connors</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>Ehrlich</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Hog</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Godeffroy</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Araya</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Kallai</surname> <given-names>I.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>Three-dimensional structure of the shell plate assembly of the chiton Tonicella marmorea and its biomechanical consequences</article-title>. <source>J. Struc. Biol.</source> <volume>177</volume>, <fpage>314</fpage>&#x2013;<lpage>328</lpage>.</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Du</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Jiao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>R.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>The pearl oyster Pinctada fucata martensii genome and multi-omic analyses provide insights into biomineralization</article-title>. <source>GigaScience</source> <volume>6</volume>.</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Eastman</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Swails</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Chodera</surname> <given-names>J. D.</given-names>
</name>
<name>
<surname>McGibbon</surname> <given-names>R. T.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Beauchamp</surname> <given-names>K. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>OpenMM 7: Rapid development of high performance algorithms for molecular dynamics</article-title>. <source>PloS Comput. Biol.</source> <volume>13</volume>, <fpage>e1005659</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pcbi.1005659</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Falini</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Albeck</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Weiner</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Addadi</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>1996</year>). <article-title>Control of aragonite or calcite polymorphism by mollusk shell macromolecules</article-title>. <source>Science</source> <volume>271</volume>, <fpage>67</fpage>&#x2013;<lpage>69</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1126/science.271.5245.67</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Pan</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>Novel basic protein, PfN23, functions as key macromolecule during nacre formation</article-title>. <source>J. Biol. Chem.</source> <volume>287</volume>, <fpage>15776</fpage>&#x2013;<lpage>15785</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1074/jbc.M112.341594</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Finnemore</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Cunha</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Shean</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Vignolini</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Guldin</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Oyen</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>Biomimetic layer-by-layer assembly of artificial nacre</article-title>. <source>Nat. Commun.</source> <volume>3</volume>, <fpage>966</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/ncomms1970</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gotliv</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Kessler</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Sumerel</surname> <given-names>J. L.</given-names>
</name>
<name>
<surname>Morse</surname> <given-names>D. E.</given-names>
</name>
<name>
<surname>Tuross</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Addadi</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2005</year>). <article-title>Asprich: A Novel Aspartic Acid-Rich Protein Family from the Prismatic Shell Matrix of the Bivalve Atrina rigida</article-title>. <source>ChemBioChem.</source> <volume>6</volume>, <fpage>304</fpage>&#x2013;<lpage>314</lpage>.</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Harini</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Akshita</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Chandra S.</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Konstantin</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Ali</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Three-dimensional structure of Megabalanus rosa Cement Protein 20 revealed by multi-dimensional NMR and molecular dynamics simulations</article-title>. <source>Philosoph. Transact. R. Soc B: Biol. Sci.</source>, <fpage>374</fpage>.</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hattan</surname> <given-names>S. J.</given-names>
</name>
<name>
<surname>Laue</surname> <given-names>T. M.</given-names>
</name>
<name>
<surname>Chasteen</surname> <given-names>N. D.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Purification and Characterization of a Novel Calcium-binding Protein from the Extrapallial Fluid of the Mollusc, Mytilus edulis</article-title>. <source>J. Biol. Chem.</source> <volume>276</volume>, <fpage>4461</fpage>&#x2013;<lpage>4468</lpage>.</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Restrepo</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Jung</surname> <given-names>J. Y.</given-names>
</name>
<name>
<surname>Su</surname> <given-names>F. Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Z. Q.</given-names>
</name>
<name>
<surname>Ritchie</surname> <given-names>R. O.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Multiscale toughening mechanisms in biological materials and bioinspired designs</article-title>. <source>Adv. Mater.</source> <volume>31</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/adma.201901561</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Humphrey</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Dalke</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Schulten</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>1996</year>). <article-title>VMD: Visual molecular dynamics</article-title>. <source>J. Mol. Graphics</source> <volume>14</volume>, <fpage>33</fpage>&#x2013;<lpage>38</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/0263-7855(96)00018-5</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hunter</surname> <given-names>J. D.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Matplotlib: A 2D graphics environment</article-title>. <source>Computing. Sci. Eng.</source> <volume>9</volume>, <fpage>90</fpage>&#x2013;<lpage>95</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/MCSE.2007.55</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Isowa</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Sarashina</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Setiamarga</surname> <given-names>D. H.</given-names>
</name>
<name>
<surname>Endo</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>A comparative study of the shell matrix protein aspein in pterioid bivalves</article-title>. <source>J. Mol. Evol.</source> <volume>75</volume>, <fpage>11</fpage>&#x2013;<lpage>18</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00239-012-9514-3</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jackson</surname> <given-names>D. J.</given-names>
</name>
<name>
<surname>McDougall</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Green</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Simpson</surname> <given-names>F.</given-names>
</name>
<name>
<surname>W&#xf6;rheide</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Degnan</surname> <given-names>B. M.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>A rapidly evolving secretome builds and patterns a sea shell</article-title>. <source>BMC Biol.</source> <volume>4</volume>, <fpage>40</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/1741-7007-4-40</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jorgensen</surname> <given-names>W. L.</given-names>
</name>
<name>
<surname>Chandrasekhar</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Madura</surname> <given-names>J. D.</given-names>
</name>
<name>
<surname>Impey</surname> <given-names>R. W.</given-names>
</name>
<name>
<surname>Klein</surname> <given-names>M. L.</given-names>
</name>
</person-group> (<year>1983</year>). <article-title>Comparison of simple potential functions for simulating liquid water</article-title>. <source>J. Chem. Phys.</source> <volume>79</volume>, <fpage>926</fpage>&#x2013;<lpage>935</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1063/1.445869</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Katoh</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Misawa</surname> <given-names>K.</given-names>
</name>
<name>
<surname>K.i. and Miyata</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>MAFFT: a novel method for rapid multiple sequence alignment based on fast Fourier transform</article-title>. <source>Nucleic Acids Res.</source> <volume>30</volume>, <fpage>3059</fpage>&#x2013;<lpage>3066</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gkf436</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kintsu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Okumura</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Negishi</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Ifuku</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Kogure</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Sakuda</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Crystal defects induced by chitin and chitinolytic enzymes in the prismatic layer of Pinctada fucata</article-title>. <source>Biochem. Biophys. Res. Commun.</source> <volume>489</volume>, <fpage>89</fpage>&#x2013;<lpage>95</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.bbrc.2017.05.088</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kong</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>A novel basic matrix protein of Pinctada fucata, PNU9, functions as inhibitor during crystallization of aragonite</article-title>. <source>CrystEngComm</source> <volume>21</volume>, <fpage>1250</fpage>&#x2013;<lpage>1261</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1039/C8CE02194E</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Song</surname> <given-names>L. F.</given-names>
</name>
<name>
<surname>Merz</surname> <given-names>K. M.</given-names>
<suffix>Jr.</suffix>
</name>
</person-group> (<year>2015</year>). <article-title>Systematic parameterization of monovalent ions employing the nonbonded model</article-title>. <source>J. Chem. Theory Comput.</source> <volume>11</volume>, <fpage>1645</fpage>&#x2013;<lpage>1657</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/ct500918t</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Bi</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Ke</surname> <given-names>G.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Uni-Fold: an open-source platform for developing protein folding models beyond AlphaFold</article-title>. <source>bioRxiv</source>. 2022.2008. 2004.502811. doi:&#xa0;<pub-id pub-id-type="doi">10.1101/2022.08.04.502811</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>C. Q.</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Jia</surname> <given-names>G. C.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>). <article-title>Identification and characterization of the lysine-rich matrix protein family in pinctada fucata: indicative of roles in shell formation</article-title>. <source>Mar. Biotechnol. (NY).</source> <volume>18</volume>, <fpage>645</fpage>&#x2013;<lpage>658</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10126-016-9724-6</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Akin</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Rao</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Hie</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>W.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Evolutionary-scale prediction of atomic-level protein structure with a language model</article-title>. <source>Science</source> <volume>379</volume>, <fpage>1123</fpage>&#x2013;<lpage>1130</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1126/science.ade2574</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>S. G.</given-names>
</name>
<name>
<surname>Kong</surname> <given-names>J. J.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Y. J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>T. P.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>L. P.</given-names>
</name>
<etal/>
</person-group>. (<year>2015</year>). <article-title>In-depth proteomic analysis of shell matrix proteins of Pinctada fucata</article-title>. <source>Sci. Rep-Uk.</source> <volume>5</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/srep17269</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Biomineral proteomics: A tool for multiple disciplinary studies</article-title>. <source>J. Proteomics</source> <volume>238</volume>, <fpage>104171</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jprot.2021.104171</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Lowenstam</surname> <given-names>H. A.</given-names>
</name>
<name>
<surname>Weiner</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>1989</year>). <source>On biomineralization</source> (<publisher-loc>New York and London</publisher-loc>: <publisher-name>Oxford University Press</publisher-name>). doi:&#xa0;<pub-id pub-id-type="doi">10.1093/oso/9780195049770.001.0001</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maier</surname> <given-names>J. A.</given-names>
</name>
<name>
<surname>Martinez</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Kasavajhala</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Wickstrom</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Hauser</surname> <given-names>K. E.</given-names>
</name>
<name>
<surname>Simmerling</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>ff14SB: improving the accuracy of protein side chain and backbone parameters from ff99SB</article-title>. <source>J. Chem. Theory Comput.</source> <volume>11</volume>, <fpage>3696</fpage>&#x2013;<lpage>3713</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/acs.jctc.5b00255</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marin</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Luquet</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Marie</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Medakovic</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2007</year>a). <article-title>Molluscan shell proteins: primary structure, origin, and evolution, current topics in developmental biology</article-title>. <source>Acad. Press.</source> <volume>pp</volume>, <fpage>209</fpage>&#x2013;<lpage>276</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S0070-2153(07)80006-8</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marin</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Pokroy</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Luquet</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Layrolle</surname> <given-names>P.</given-names>
</name>
<name>
<surname>De Groot</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2007</year>b). <article-title>Protein mapping of calcium carbonate biominerals by immunogold</article-title>. <source>Biomaterials</source> <volume>28</volume>, <fpage>2368</fpage>&#x2013;<lpage>2377</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.biomaterials.2007.01.029</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marin</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Roy</surname> <given-names>N. L.</given-names>
</name>
<name>
<surname>Marie</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>The formation and mineralization of mollusk shell</article-title>. <source>Front. Biosci. (Schol. Ed).</source> <volume>4</volume>, <fpage>1099</fpage>&#x2013;<lpage>1125</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2741/s321</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mariom</surname>
</name>
<name>
<surname>Take</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Igarashi</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yoshitake</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Asakawa</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Maeyama</surname> <given-names>K.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Gene expression profiles at different stages for formation of pearl sac and pearl in the pearl oyster Pinctada fucata</article-title>. <source>BMC Genomics</source> <volume>20</volume>, <fpage>240</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12864-019-5579-3</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>McGibbon</surname> <given-names>R. T.</given-names>
</name>
<name>
<surname>Beauchamp</surname> <given-names>K. A.</given-names>
</name>
<name>
<surname>Harrigan</surname> <given-names>M. P.</given-names>
</name>
<name>
<surname>Klein</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Swails</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Hern&#xe1;ndez</surname> <given-names>C. X.</given-names>
</name>
<etal/>
</person-group>. (<year>2015</year>). <article-title>MDTraj: A modern open library for the analysis of molecular dynamics trajectories</article-title>. <source>Biophys. J.</source> <volume>109</volume>, <fpage>1528</fpage>&#x2013;<lpage>1532</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.bpj.2015.08.015</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Michaud-Agrawal</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Denning</surname> <given-names>E. J.</given-names>
</name>
<name>
<surname>Woolf</surname> <given-names>T. B.</given-names>
</name>
<name>
<surname>Beckstein</surname> <given-names>O.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>MDAnalysis: a toolkit for the analysis of molecular dynamics simulations</article-title>. <source>J. Comput. Chem.</source> <volume>32</volume>, <fpage>2319</fpage>&#x2013;<lpage>2327</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/jcc.21787</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Morse</surname> <given-names>J. W.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Tsio</surname> <given-names>M. Y.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>Influences of temperature and Mg: Ca ratio on CaCO3 precipitates from seawater</article-title>. <source>Geology</source> <volume>25</volume>, <fpage>85</fpage>&#x2013;<lpage>87</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1130/0091-7613(1997)025&lt;0085:IOTAMC&gt;2.3.CO;2</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ndao</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Keene</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Amos</surname> <given-names>F. F.</given-names>
</name>
<name>
<surname>Rewari</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Ponce</surname> <given-names>C. B.</given-names>
</name>
<name>
<surname>Estroff</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2010</year>). <article-title>Intrinsically disordered mollusk shell prismatic protein that modulates calcium carbonate crystal growth</article-title>. <source>Biomacromolecules</source> <volume>11</volume>, <fpage>2539</fpage>&#x2013;<lpage>2544</lpage>.</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nudelman</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Sommerdijk</surname> <given-names>N. A.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Biomineralization as an inspiration for materials chemistry</article-title>. <source>Angew. Chem. Int. Ed. Engl.</source> <volume>51</volume>, <fpage>6582</fpage>&#x2013;<lpage>6596</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/anie.201106715</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pan</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Fang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2014</year>). <article-title>A novel acidic matrix protein, PfN44, stabilizes magnesium calcite to inhibit the crystallization of aragonite</article-title>. <source>J. Biol. Chem.</source> <volume>289</volume>, <fpage>2776</fpage>&#x2013;<lpage>2787</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1074/jbc.M113.504027</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pedregosa</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Varoquaux</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Gramfort</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Michel</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Thirion</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Grisel</surname> <given-names>O.</given-names>
</name>
<etal/>
</person-group>. (<year>2011</year>). <article-title>Scikit-learn: machine learning in python</article-title>. <source>J. Mach. Learn. Res.</source> <volume>12</volume>, <fpage>2825</fpage>&#x2013;<lpage>2830</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5555/1953048.2078195</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shuai</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Deng</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A novel matrix protein PNU5 facilitates the transformation from amorphous calcium carbonate to calcite and aragonite</article-title>. <source>Int. J. Biol. Macromol.</source> <volume>224</volume>, <fpage>754</fpage>&#x2013;<lpage>765</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ijbiomac.2022.10.163</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sinha</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Tam</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>S. M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Applications of molecular dynamics simulation in protein study</article-title>. <source>Membranes</source>, <fpage>12</fpage>.</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Suzuki</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Murayama</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Inoue</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Ozaki</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Tohse</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Kogure</surname> <given-names>T.</given-names>
</name>
<etal/>
</person-group>. (<year>2004</year>). <article-title>Characterization of Prismalin-14, a novel matrix protein from the prismatic layer of the Japanese pearl oyster ( Pinctada fucata )</article-title>. <source>Biochem. J.</source> <volume>382</volume>, <fpage>205</fpage>&#x2013;<lpage>213</lpage>.</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tah</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Upcher</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Berman</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Goldstein-Goren</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>A simple periodic peptide derived from Pinctada fucata Pif80 protein induces aragonite nucleation in magnesium absence</article-title>. <source>ChemRxiv</source>.</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Takeuchi</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Sarashina</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Iijima</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Endo</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>
<italic>In vitro</italic> regulation of CaCO 3 crystal polymorphism by the highly acidic molluscan shell protein Aspein</article-title>. <source>FEBS Lett.</source> <volume>582</volume>, <fpage>591</fpage>&#x2013;<lpage>596</lpage>.</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Van der Maaten</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Hinton</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Visualizing data using t-SNE</article-title>. <source>J. Mach. Learn. Res.</source> <volume>9</volume>.</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Chang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Weinan</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Efficient sampling of high-dimensional free energy landscapes using adaptive reinforced dynamics</article-title>. <source>Nat. Comput. Sci.</source> <volume>2</volume>, <fpage>20</fpage>&#x2013;<lpage>29</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s43588-021-00173-1</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wolf</surname> <given-names>R. M.</given-names>
</name>
<name>
<surname>Caldwell</surname> <given-names>J. W.</given-names>
</name>
<name>
<surname>Kollman</surname> <given-names>P. A.</given-names>
</name>
<name>
<surname>Case</surname> <given-names>D. A.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Development and testing of a general amber force field</article-title>. <source>J. Comput. Chem.</source> <volume>25</volume>, <fpage>1157</fpage>&#x2013;<lpage>1174</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/jcc.20035</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Waskom</surname> <given-names>M. L.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Seaborn: statistical data visualization</article-title>. <source>J. Open Source Software.</source> <volume>6</volume>, <fpage>3021</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.21105/joss.03021</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yan</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>G.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>A novel matrix protein, pfY2, functions as a crucial macromolecule during shell formation</article-title>. <source>Sci. Rep.</source> <volume>7</volume>, <fpage>6021</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-017-06375-w</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>A basic protein, N25, from a mollusk modifies calcium carbonate morphology and shell biomineralization</article-title>. <source>J. Biol. Chem.</source> <volume>294</volume>, <fpage>8371</fpage>&#x2013;<lpage>8383</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1074/jbc.RA118.007338</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>S. G.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>Z. M.</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>Y. H.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>A novel matrix protein PfX regulates shell ultrastructure by binding to specific calcium carbonate crystal faces</article-title>. <source>Int. J. Biol. Macromol.</source> <volume>156</volume>, <fpage>302</fpage>&#x2013;<lpage>313</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ijbiomac.2020.04.016</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yi</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zou</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A novel bifunctional protein PNU7 in CaCO(3) polymorph formation: Vaterite stabilization and surface energy minimization</article-title>. <source>Int. J. Biol. Macromol.</source> <volume>222</volume>, <fpage>2796</fpage>&#x2013;<lpage>2807</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ijbiomac.2022.10.059</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Tuckerman</surname> <given-names>M. E.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Unified efficient thermostat scheme for the canonical ensemble with holonomic or isokinetic constraints via molecular dynamics</article-title>. <source>J. Phys. Chem. A.</source> <volume>123</volume>, <fpage>6056</fpage>&#x2013;<lpage>6079</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/acs.jpca.9b02771</pub-id>
</citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname> <given-names>C. Q.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>P. C.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>J. J.</given-names>
</name>
<name>
<surname>Qi</surname> <given-names>S. H.</given-names>
</name>
<name>
<surname>Yamauchi</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>R. R.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Layered nanocomposites by shear-flow-induced alignment of nanosheets</article-title>. <source>Nature</source> <volume>580</volume>, <fpage>210</fpage>&#x2013;<lpage>21+</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41586-020-2161-8</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname> <given-names>T. X.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>C. Q.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Y. H.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>H. S.</given-names>
</name>
<name>
<surname>Nie</surname> <given-names>Z. D.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Large-area ultrastrong and stiff layered MXene nanocomposites by shear-flow-induced alignment of nanosheets</article-title>. <source>ACS Nano.</source> <volume>16</volume>, <fpage>12013</fpage>&#x2013;<lpage>12023</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/acsnano.2c02062</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>