<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="review-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Hematol.</journal-id>
<journal-title>Frontiers in Hematology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Hematol.</abbrev-journal-title>
<issn pub-type="epub">2813-3935</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frhem.2024.1305741</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Hematology</subject>
<subj-group>
<subject>Review</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title><italic>De novo</italic> drug design through artificial intelligence: an introduction</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Crucitti</surname><given-names>Davide</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2350855"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>P&#xe9;rez M&#xed;guez</surname><given-names>Carlos</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>D&#xed;az Arias</surname><given-names>Jos&#xe9; &#xc1;ngel</given-names>
</name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/663620"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Fernandez Prada</surname><given-names>Diego Beltr&#xe1;n</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2532036"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Mosquera Orgueira</surname><given-names>Adri&#xe1;n</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/126761"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Group of Computational Genomics and Hematology (GreCoXen), Health Research Institute of Santiago de Compostela (IDIS)</institution>, <addr-line>Santiago de Compostela</addr-line>, <country>Spain</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Pharmacology, University of Santiago de Compostela</institution>, <addr-line>Santiago de Compostela</addr-line>, <country>Spain</country></aff>
<aff id="aff3"><sup>3</sup><institution>Department of Hematology, Complejo Hospitalario Universitario de Santiago</institution>, <addr-line>Santiago de Compostela</addr-line>, <country>Spain</country></aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Shuaiying Cui, Boston University, United States</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Dragos Horvath, UMR7140 Chimie de la Mati&#xe8;re Complexe, France</p>
<p>Nikolaos Sousos, University of Oxford, United Kingdom</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Davide Crucitti, <email xlink:href="mailto:davide.crucitti@sergas.es">davide.crucitti@sergas.es</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>25</day>
<month>01</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>3</volume>
<elocation-id>1305741</elocation-id>
<history>
<date date-type="received">
<day>02</day>
<month>10</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>05</day>
<month>01</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Crucitti, P&#xe9;rez M&#xed;guez, D&#xed;az Arias, Fernandez Prada and Mosquera Orgueira</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Crucitti, P&#xe9;rez M&#xed;guez, D&#xed;az Arias, Fernandez Prada and Mosquera Orgueira</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Developing new drugs is a complex and formidable challenge, intensified by rapidly evolving global health needs. <italic>De novo</italic> drug design is a promising strategy to accelerate and refine this process. The recent introduction of Generative Artificial Intelligence (AI) algorithms has brought new attention to the field and catalyzed a paradigm shift, allowing rapid and semi-automatic design and optimization of drug-like molecules. This review explores the impact of <italic>de novo</italic> drug design, highlighting both traditional methodologies and the recently introduced generative algorithms, as well as the promising development of Active Learning (AL). It places special emphasis on their application in oncological drug development, where the need for novel therapeutic agents is urgent. The potential integration of these AI technologies with established computational and experimental methods heralds a new era in the rapid development of innovative drugs. Despite the promising developments and notable successes, these technologies are not without limitations, which require careful consideration and further advancement. This review, intended for professionals across related disciplines, provides a comprehensive introduction to AI-driven <italic>de novo</italic> drug design of small organic molecules. It aims to offer a clear understanding of the current state and future prospects of these innovative techniques in drug discovery.</p>
</abstract>
<kwd-group>
<kwd><italic>de novo</italic>
</kwd>
<kwd>drug design</kwd>
<kwd>artificial intelligence</kwd>
<kwd>cheminformatics</kwd>
<kwd>machine learning</kwd>
</kwd-group>
<counts>
<fig-count count="3"/>
<table-count count="1"/>
<equation-count count="0"/>
<ref-count count="128"/>
<page-count count="14"/>
<word-count count="7273"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Blood Cancer</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<title>Introduction</title>
<p>Modern medicine&#x2019;s progress is tightly linked to drug design innovations. Developing new drugs is critical for global health, but the process is costly and time-consuming, often taking several years and costing over a billion dollars (<xref ref-type="bibr" rid="B1">1</xref>, <xref ref-type="bibr" rid="B2">2</xref>). Improvements in the drug design phase can greatly reduce these expenses and make it more accessible.</p>
<p>Traditionally, drug design was mainly experimental. However, in the 1990s, computational techniques like <italic>de novo</italic> molecular design began to emerge (<xref ref-type="bibr" rid="B2">2</xref>, <xref ref-type="bibr" rid="B3">3</xref>). <italic>De novo</italic> design is a set of computational methods that can be used to design a compound without using a previously known one as a starting point. This approach aims to automate the creation of new chemical structures tailored to specific molecular characteristics. It leverages knowledge from existing, effective molecules to design novel ones with unique structural features. In drug design, <italic>de novo</italic> methods focus on generating molecules with unique drug-like qualities, differentiating them from current treatments. This approach presents a multifaceted challenge as it seeks to design molecules that satisfy various pharmaceutical criteria, including biological activity, target selectivity, and optimal ADMET (Absorption, Distribution, Metabolism, Excretion, and Toxicity) profiles. Despite their innovative nature, these methods faced challenges such as the difficulty of synthesizing the molecules they proposed and the need for specialized computational skills, limiting their broad application in the field of drug discovery (<xref ref-type="bibr" rid="B4">4</xref>).</p>
<p>A significant change occurred in 2017 with the introduction of generative AI in <italic>de novo</italic> design (<xref ref-type="bibr" rid="B5">5</xref>, <xref ref-type="bibr" rid="B6">6</xref>). This breakthrough revitalized interest in the field and inspired solutions to previous limitations. These AI algorithms, utilizing vast data on bioactivity, toxicity, and protein structures, have streamlined the process of identifying and refining drug candidates. The emergence of various models, each employing distinct AI architectures, has led to a rapid proliferation of innovative methods. This expansion has significantly enhanced the role of these technologies in the realm of drug discovery (<xref ref-type="bibr" rid="B7">7</xref>, <xref ref-type="bibr" rid="B8">8</xref>).</p>
<p>Pharmaceutical companies are now integrating these algorithms into their drug design processes, often in collaboration with AI firms (<xref ref-type="bibr" rid="B9">9</xref>). Drugs developed using these methods, like DSP-1181, EXS21546, and DSP-0038, have reached clinical trials (<xref ref-type="bibr" rid="B10">10</xref>), demonstrating the effectiveness of generative algorithms in producing viable therapeutic agents. While these compounds primarily target well-researched biological targets and do not innovate structural or binding properties, they validate the utility of generative algorithms in producing effective therapeutic agents.</p>
<p>Generative drug design has demonstrated its maturity and effectiveness. These methods stand to substantially change the pharmaceutical industry, even without considering additional advancements. They enable the rapid generation of alternative treatments once a new drug target is discovered, thereby significantly enhancing their role in both drug discovery and development. They also automate aspects of drug optimization, streamlining the development process.</p>
<p>Despite the rapid progress of these methods, it&#x2019;s vital to critically assess their limitations. Most of their validation is based on computational benchmarks, which may not fully address real-world challenges (<xref ref-type="bibr" rid="B11">11</xref>). Integrating these algorithms into the design-make-test-analyze (DMTA) cycle is essential for a comprehensive efficacy assessment. This integration not only validates their effectiveness but also identifies areas for improvement, fostering wider adoption and refined performance.</p>
<p>This review introduces various <italic>de novo</italic> algorithms and their role in drug discovery, aimed at professionals in related fields. It covers the advantages and challenges of AI in drug discovery, detailing how these technologies integrate with existing processes. Topics include computational strategies for molecule selection, experimental validation of compounds&#x2019; efficacy, and limitations like synthetic feasibility and chemical space exploration <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>. Evaluation metrics for these algorithms are also discussed. It also discusses limitations such as synthetic feasibility and chemical space exploration, as well as metrics to evaluate algorithms. Finally, the review includes case studies highlighting the impact of these technologies in real-world drug development, specifically in oncology and hematology.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Drug design process.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frhem-03-1305741-g001.tif"/>
</fig>
</sec>
<sec id="s2">
<title>Drug development campaigns</title>
<p>Drug development is a complex, iterative, and multi-stage process that aims to convert a biological hypothesis into a clinically effective drug (<xref ref-type="bibr" rid="B4">4</xref>, <xref ref-type="bibr" rid="B12">12</xref>). This iterative phase utilizes a feedback-oriented approach. It begins with designing molecules based on existing data, followed by computational analysis for selection and ranking. The chosen candidates are then synthesized and tested experimentally, including evaluations on isolated targets, cell cultures, and animal models.</p>
<sec id="s2_1">
<title>Phases of drug discovery</title>
<sec id="s2_1_1">
<title>Target identification and validation</title>
<p>The early stage of drug discovery involves identifying and validating biological targets that can be influenced by potential drugs to change disease progression. These targets vary, including proteins, mutated genes, specific nucleic acid sequences, or components of pathogens (<xref ref-type="bibr" rid="B13">13</xref>). Validation is crucial to confirm the therapeutic relevance of these targets. To this end, an array of molecular techniques are used for gene and protein-level verification, while cell-based assays further substantiate the biological significance of the targets in a more complex environment. The introduction of AI in this phase has accelerated the processing of large datasets, speeding up target validation.</p>
</sec>
<sec id="s2_1_2">
<title>Hit discovery</title>
<p>After validating drug targets, the subsequent phase in drug discovery is identifying &#x201c;hits,&#x201d; molecules that affect the activity of the target. This phase emphasizes exploring a diverse range of molecular structures, as some may show initial activity but prove challenging to optimize. High-throughput screening (HTS) has been a traditional method, testing vast compound libraries against the target. However, HTS is costly and inefficient, often screening over 50,000 compounds with low hit rates and high costs per compound (<xref ref-type="bibr" rid="B4">4</xref>). The rise of <italic>de novo</italic> drug design offers a more efficient alternative for hit identification, reducing the reliance on extensive experimental validation. These AI algorithms utilize existing biomedical data to optimize the experimental design and more precisely explore the vast chemical space, which contains an estimated <inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>10</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>33</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>10</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>63</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> drug-like molecules (<xref ref-type="bibr" rid="B14">14</xref>). A notable limitation of conventional algorithms is their tendency to explore wide chemical regions without considering the synthesizability of proposed molecules.</p>
</sec>
<sec id="s2_1_3">
<title>Hit-to-lead</title>
<p>After identifying potential drug molecules, the next stage in drug discovery is transforming these &#x201c;hits&#x201d; into &#x201c;lead&#x201d; compounds, aiming to improve their effectiveness, specificity, and overall suitability as drugs. The main strategy involves altering the core structure, or scaffold, of the molecule and adjusting its substituents. This approach provides insights into how specific changes affect interactions with the target and synthesizing a series of molecules with a common scaffold is easier than a set of entirely diverse compounds.</p>
<p>AI has become a valuable tool in this phase. AI algorithms do more than just analyze the effects of different changes; they actively suggest modifications to enhance the potential of lead candidates. AI can incrementally improve a known hit molecule or, based on its training, generate entirely new molecules likely to be effective in the same category.</p>
</sec>
<sec id="s2_1_4">
<title>Lead optimization</title>
<p>The final stage involves refining lead compounds to ensure their effectiveness, safety, and compliance with clinical standards. This stage requires meticulous structural modifications, aiming to prepare drug candidates for clinical trials. AI significantly aids this process by forecasting a drug&#x2019;s behavior and proposing structural changes, thus accelerating the development of promising candidates.</p>
<p>Refining drug candidates is a collaborative effort combining a chemist&#x2019;s expertise, laboratory testing, computational modeling, and AI insights. The ability of AI to predict key aspects, such as a the interaction of a drug with its target and its physical and chemical properties, is particularly valuable. <italic>De novo</italic> AI provides chemists with structural modification suggestions, facilitating exploration and expediting the optimization process. Consequently, AI enables chemists to work more effectively, accelerating the journey towards viable drug candidates.</p>
</sec>
</sec>
<sec id="s2_2">
<title>Drug design strategies</title>
<p>The landscape of drug discovery is a complex and dynamic field that necessitates strategies for identifying and optimizing molecules. Ranging from the broad exploration of chemical space to nuanced structural modifications, these strategies present unique advantages and challenges. These are exemplified in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>. This section outlines key strategies commonly used in the field.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Drug design strategies. <bold>(A)</bold> &#x2014; Scaffold hopping, <bold>(B)</bold> &#x2014; Scaffold decoration, <bold>(C)</bold> &#x2014; Fragment Linking, <bold>(D)</bold> &#x2014; Fragment Merging, <bold>(E)</bold> &#x2014; Fragment Growing.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frhem-03-1305741-g002.tif"/>
</fig>
<sec id="s2_2_1">
<title>Chemical space sampling</title>
<p>Chemical space sampling involves selecting a diverse subset of molecules from a vast array, using computational tools to maximize discovery potential while balancing the synthesizability of these molecules (<xref ref-type="bibr" rid="B4">4</xref>, <xref ref-type="bibr" rid="B15">15</xref>&#x2013;<xref ref-type="bibr" rid="B19">19</xref>).</p>
</sec>
<sec id="s2_2_2">
<title>Scaffold hopping</title>
<p>Scaffold hopping is used in hit-to-lead and optimization phases to find novel lead molecules by modifying the core structure of a molecules, the scaffold, while maintaining similar activity. This strategy has evolved with AI, enabling the generation of alternative scaffolds and suggesting optimal substituents for target interactions (<xref ref-type="bibr" rid="B20">20</xref>&#x2013;<xref ref-type="bibr" rid="B23">23</xref>). Although this process results in structurally distinct compounds, their binding mechanisms to the target typically remain similar, positioning scaffold hopping more as an optimization step than an exploratory one.</p>
</sec>
<sec id="s2_2_3">
<title>Scaffold decoration</title>
<p>In scaffold decoration, functional groups are added to the scaffold to enhance interaction with the target, thereby improving efficacy or selectivity. This approach is pivotal in the later stages of drug development, allowing for the fine-tuning of molecular properties and evaluating the influence of each substituent over the activity (<xref ref-type="bibr" rid="B24">24</xref>, <xref ref-type="bibr" rid="B25">25</xref>).</p>
</sec>
<sec id="s2_2_4">
<title>Fragment based design strategies</title>
<p>Distinct from other methods, this approach begins with small molecules, named fragments, which have shown target-binding affinity (<xref ref-type="bibr" rid="B26">26</xref>&#x2013;<xref ref-type="bibr" rid="B29">29</xref>). These fragments are then elaborated upon using various techniques. Fragment Linking involves joining two or more fragments that interact with different sites on the target to create a single molecule with improved binding affinity. Fragment Merging combines overlapping fragments into a unified molecular structure. Lastly, Fragment Growing expands the fragment by adding parts to enhance its binding affinity to the target.</p>
</sec>
</sec>
</sec>
<sec id="s3">
<title><italic>De novo</italic> design methods</title>
<p>The fusion of AI with computational chemistry has led to the development of <italic>de novo</italic> design methods, which have found applications in areas ranging from drug discovery to materials science (<xref ref-type="bibr" rid="B30">30</xref>). These innovative techniques are focused on synthesizing a variety of new, diverse, and efficient chemical compounds. In contrast to traditional AI applications that mainly predict molecular properties or interactions with biological targets, these new methods are centered on the creation of novel molecules. Their objective is to explore and map extensive areas of the chemical space, going beyond the confines of existing drug-like molecule databases.</p>
<p><italic>De novo</italic> molecular design primarily employs two strategies. The first, &#x201c;Holistic Generation,&#x201d; involves creating an entire molecule from scratch, which is particularly useful for initial discovery phases and for exploring chemical spaces broadly. The second strategy, &#x201c;Iterative Generation,&#x201d; builds molecules in a step-by-step manner, which is more suited for refining molecules or tailoring them for specific purposes.  A selection of de novo design methodologies which have been developed over time can be found in <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>A list of relevant <italic>de novo</italic> methodologies which have been developed over time.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="bottom" align="left">Model</th>
<th valign="bottom" align="left">Year</th>
<th valign="bottom" align="left">Molecular Representation</th>
<th valign="bottom" align="left">Mode</th>
<th valign="bottom" align="left">Chemical Space</th>
<th valign="bottom" align="left">Domain</th>
<th valign="bottom" align="left">Applicable Strategies</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">GALILEO (<xref ref-type="bibr" rid="B31">31</xref>)</td>
<td valign="top" align="left">2023</td>
<td valign="top" align="left">2D Graph</td>
<td valign="top" align="left">Iterative addition</td>
<td valign="top" align="left">Synthon</td>
<td valign="top" align="left">Hit Discovery, Hit to Lead</td>
<td valign="top" align="left">Scaffold hopping, Scaffold decoration</td>
</tr>
<tr>
<td valign="top" align="left">RJT-RL (<xref ref-type="bibr" rid="B32">32</xref>)</td>
<td valign="top" align="left">2022</td>
<td valign="top" align="left">2D Graph</td>
<td valign="top" align="left">Iterative addition</td>
<td valign="top" align="left">Trained</td>
<td valign="top" align="left">Hit to lead, Lead optimization</td>
<td valign="top" align="left">Scaffold hopping, Scaffold decoration</td>
</tr>
<tr>
<td valign="top" align="left">MolPal (<xref ref-type="bibr" rid="B33">33</xref>)</td>
<td valign="top" align="left">2021</td>
<td valign="top" align="left">Fingerprints</td>
<td valign="top" align="left">Active Learning</td>
<td valign="top" align="left">Predefined Set</td>
<td valign="top" align="left">Hit Discovery</td>
<td valign="top" align="left">Chemical Space Sampling</td>
</tr>
<tr>
<td valign="top" align="left">STONED (<xref ref-type="bibr" rid="B34">34</xref>)</td>
<td valign="top" align="left">2021</td>
<td valign="top" align="left">SELFIES</td>
<td valign="top" align="left">Random Mutation</td>
<td valign="top" align="left">Valence Rules</td>
<td valign="top" align="left">Hit discovery, Lead optimization</td>
<td valign="top" align="left">Scaffold decoration</td>
</tr>
<tr>
<td valign="top" align="left">CReM (<xref ref-type="bibr" rid="B35">35</xref>)</td>
<td valign="top" align="left">2020</td>
<td valign="top" align="left">SMILES</td>
<td valign="top" align="left">Iterative addition</td>
<td valign="top" align="left">Matched Molecular Pairs</td>
<td valign="top" align="left">Hit to Lead, Lead optimization</td>
<td valign="top" align="left">Scaffold hopping, Scaffold decoration</td>
</tr>
<tr>
<td valign="top" align="left">DeLinker (<xref ref-type="bibr" rid="B36">36</xref>)</td>
<td valign="top" align="left">2020</td>
<td valign="top" align="left">3D Graph</td>
<td valign="top" align="left">Fragment linking</td>
<td valign="top" align="left">Trained</td>
<td valign="top" align="left">Scaffold Hopping</td>
<td valign="top" align="left">Fragment based</td>
</tr>
<tr>
<td valign="top" align="left">GENTRL (<xref ref-type="bibr" rid="B37">37</xref>)</td>
<td valign="top" align="left">2019</td>
<td valign="top" align="left">2D/3D Graph</td>
<td valign="top" align="left">Latent Space Exploration</td>
<td valign="top" align="left">Trained</td>
<td valign="top" align="left">Hit discovery, Lead optimization</td>
<td valign="top" align="left">Chemical Space Sampling</td>
</tr>
<tr>
<td valign="top" align="left">JT-VAE (<xref ref-type="bibr" rid="B38">38</xref>)</td>
<td valign="top" align="left">2018</td>
<td valign="top" align="left">2D Graph</td>
<td valign="top" align="left">Iterative addition</td>
<td valign="top" align="left">Trained</td>
<td valign="top" align="left">Hit Discovery, Hit to Lead</td>
<td valign="top" align="left">Scaffold decoration, Scaffold hopping</td>
</tr>
<tr>
<td valign="top" align="left">CoG (<xref ref-type="bibr" rid="B39">39</xref>)</td>
<td valign="top" align="left">2004</td>
<td valign="top" align="left">2D Graph</td>
<td valign="top" align="left">Iterative addition</td>
<td valign="top" align="left">Fragment Space</td>
<td valign="top" align="left">Hit Discovery, Lead optimization</td>
<td valign="top" align="left">Scaffold decoration, Scaffold hopping</td>
</tr>
<tr>
<td valign="top" align="left">SYNOPSIS (<xref ref-type="bibr" rid="B40">40</xref>, <xref ref-type="bibr" rid="B41">41</xref>)</td>
<td valign="top" align="left">2003</td>
<td valign="top" align="left">2D Graph</td>
<td valign="top" align="left">Iterative addition</td>
<td valign="top" align="left">Synthon</td>
<td valign="top" align="left">Hit Discovery, Lead optimization</td>
<td valign="top" align="left">Scaffold decoration, Scaffold hopping</td>
</tr>
</tbody>
</table>
</table-wrap>
<sec id="s3_1">
<title>Molecular representations and encodings</title>
<p>Molecules, being tangible entities that cannot be directly manipulated through computations, need methods to represent their properties and characteristics. These range from quantum mechanics-based electron distribution models to basic chemical formulas. The representations are communicated through various encodings, such as the structural formula, which visually outlines molecular topologies. Encodings for molecular structure generation are typically view atoms as indivisible units connected by covalent bonds.</p>
<p>String-based encodings like the Wiswesser Line Notation (<xref ref-type="bibr" rid="B42">42</xref>) and SMILES (<xref ref-type="bibr" rid="B43">43</xref>) represent molecular topology of organic molecules through character sequences. However, limitations in SMILES led to the development of DeepSMILES (<xref ref-type="bibr" rid="B44">44</xref>), ensuring syntactic correctness, and SELFIES (<xref ref-type="bibr" rid="B45">45</xref>), ensuring both syntactical validity and chemical space adherence.</p>
<p>Graph-based encodings depict molecules as nodes (atoms) linked by edges (bonds) (<xref ref-type="bibr" rid="B46">46</xref>), with 2D versions showing connections and 3D versions adding spatial information. While 3D encodings offer more detail, they also add complexity in algorithm training.</p>
<p>Feature-based encodings use molecular descriptors (<xref ref-type="bibr" rid="B47">47</xref>) or fingerprints (<xref ref-type="bibr" rid="B48">48</xref>) to detail molecules based on properties or experimental data, useful for comparison and property prediction but less effective for structure creation.</p>
<p>AI can define encodings by converting molecular structures into a continuous numerical space, which can be decoded back (<xref ref-type="bibr" rid="B8">8</xref>). Trained on extensive molecular datasets, they capture essential features and allow for the exploration of new molecular structures. Challenges in learning encodings include accurately representing chemical space and ensuring relevance to drug development, as molecules close in latent space might have divergent biological and chemical properties.</p>
</sec>
<sec id="s3_2">
<title>Overview of available databases</title>
<p>Access to extensive data is crucial to train algorithms. Key to this is the availability of public databases. Prominent among these are ChEMBL (<xref ref-type="bibr" rid="B49">49</xref>), PubChem (<xref ref-type="bibr" rid="B50">50</xref>), BindingDB (<xref ref-type="bibr" rid="B51">51</xref>), and DrugBank (<xref ref-type="bibr" rid="B52">52</xref>), which offer essential bioactivity data for drug discovery. ChemSpider (<xref ref-type="bibr" rid="B53">53</xref>) aggregates information on molecular properties and available compounds. CompTox (<xref ref-type="bibr" rid="B54">54</xref>) is a resource for toxicity and environmental hazards. GDB17 (<xref ref-type="bibr" rid="B55">55</xref>) lists organic compounds up to 17 atoms, while QM9 (<xref ref-type="bibr" rid="B56">56</xref>) provides a subset with quantum mechanically determined molecular conformations. ZINC (<xref ref-type="bibr" rid="B57">57</xref>) catalogs commercially available compounds, and Enamine&#x2019;s REAL database lists synthesizable molecules.</p>
<p>The Protein Data Bank (PDB) (<xref ref-type="bibr" rid="B58">58</xref>) provides structural details of proteins and nucleic acids, crucial for understanding molecular interactions. Uniprot (<xref ref-type="bibr" rid="B59">59</xref>) offers protein sequences and functional annotations, which help target selection and validation. The Therapeutic Target Database (TTD) (<xref ref-type="bibr" rid="B60">60</xref>) focuses on well-studied therapeutic targets.</p>
</sec>
<sec id="s3_3">
<title>Trained generative techniques</title>
<p>The field of <italic>de novo</italic> drug design has been significantly advanced by generative algorithms, each offering unique advantages and applications. A training dataset of molecular structures is essential for these techniques. These use a set of known molecules to train a generative algorithm which is then used to generate novel molecular structures. Key AI methods are summarized below, with a detailed discussion available in the comprehensive review by Martinelli (<xref ref-type="bibr" rid="B7">7</xref>).</p>
<list list-type="bullet">
<list-item>
<p>Recurrent Neural Networks (RNNs): Predominant in generative drug design, RNNs excel in generating new molecular structures by identifying patterns in training data. Notable developments include the first model by Olivecrona et&#xa0;al. (<xref ref-type="bibr" rid="B61">61</xref>) and subsequent advancements like DrugEx (<xref ref-type="bibr" rid="B62">62</xref>, <xref ref-type="bibr" rid="B63">63</xref>), which emphasizes multi-objective optimization, including toxicity considerations.</p>
</list-item>
<list-item>
<p>Latent Space Exploration: Latent space exploration methodologies are another common generative drug design technique. Among these, VAEs have garnered considerable attention, and were the first architecture specifically applied to drug generation in 2017 (<xref ref-type="bibr" rid="B5">5</xref>). Subsequent studies have reinforced their utility and applicability (<xref ref-type="bibr" rid="B7">7</xref>, <xref ref-type="bibr" rid="B8">8</xref>). VAEs function through a dual neural network architecture, comprised of an encoder that maps molecular structures into a latent space, and a decoder that reverses this transformation. Models have been designed to accommodate both 2D and 3D molecular representations (<xref ref-type="bibr" rid="B64">64</xref>). The PASITHEA model introduced &#x201c;deep dreaming&#x201d; to molecular design (<xref ref-type="bibr" rid="B65">65</xref>). This approach employs a deep neural network, trained to predict specific molecular properties. By reversing the network and inputting desired property values, one can generate molecules optimized for those properties, an improvement over the more abstract latent space of VAEs.</p>
</list-item>
<list-item>
<p>Generative Adversarial Networks (GANs): This architecture involves two competing neural networks: a generator and a discriminator. The generator&#x2019;s primary objective is to create molecular structures that the discriminator cannot distinguish from genuine molecules. The first example in generative drug design was ORGANIC (<xref ref-type="bibr" rid="B66">66</xref>) and other ones were rapidly developed, such as ATNC (<xref ref-type="bibr" rid="B67">67</xref>) and MolGAN (<xref ref-type="bibr" rid="B68">68</xref>) which improved the rate of valid molecules generated. These methodologies have been applied especially in the field of molecular optimization (<xref ref-type="bibr" rid="B69">69</xref>&#x2013;<xref ref-type="bibr" rid="B71">71</xref>).</p>
</list-item>
<list-item>
<p>Transformer-based Models: Drawing inspiration from natural language models like BERT, transformers view molecules as sequences of tokens. These models typically employ a string based molecular representation. A notable example is ChemBERTa (<xref ref-type="bibr" rid="B72">72</xref>). Transformers have also shown utility in suggesting structural modifications during the optimization phases of drug design (<xref ref-type="bibr" rid="B73">73</xref>, <xref ref-type="bibr" rid="B74">74</xref>).</p>
</list-item>
</list>
</sec>
<sec id="s3_4">
<title>Rule-based techniques</title>
<p>Rule-based techniques, formulated before trained generative algorithms, provide a structured approach to exploring the chemical space. Combinatorial chemistry principles are used to construct a well-defined chemical space and a set of traversal rules for these techniques. Combinatorial chemistry involves the creation of molecules by combining molecular substructures or atoms based on human-defined rules. These methodologies differ according to the rules employed in the combinatorial process and the exploratory algorithms used to select molecules to analyze from these potentially extensive sets.</p>
<p>The chemical space can be defined in several ways:</p>
<list list-type="order">
<list-item>
<p>Valence Rules: This approach focuses on combinations of atoms adhering to valence rules. An example is GDB-17 (<xref ref-type="bibr" rid="B55">55</xref>), which lists all organic molecules with up to 17 atoms of C, N, O, and S.</p>
</list-item>
<list-item>
<p>Fragment Spaces: Techniques like BRICS (<xref ref-type="bibr" rid="B75">75</xref>) and RECAP (<xref ref-type="bibr" rid="B76">76</xref>) employ sets of small molecular fragments that are linked together through a set of manually defined rules.</p>
</list-item>
<list-item>
<p>Synthon Method: Proposed in 2007 (<xref ref-type="bibr" rid="B77">77</xref>), it uses generalized reactions on available reagents to enumerate synthetically accessible molecules. Notable methodologies include DOGS (<xref ref-type="bibr" rid="B78">78</xref>), Synth-On (<xref ref-type="bibr" rid="B79">79</xref>), and pre-compiled commercial libraries (<xref ref-type="bibr" rid="B80">80</xref>, <xref ref-type="bibr" rid="B81">81</xref>).</p>
</list-item>
<list-item>
<p>Definition of Substituents: This method involves defining a set of substituents that will be applied to a molecular scaffold, similar to traditional scaffold decoration approaches.</p>
</list-item>
<list-item>
<p>Matched Molecular Pairs Analysis: First proposed in 2005 (<xref ref-type="bibr" rid="B82">82</xref>), it utilizes a catalog of structural transformations known to enhance molecular properties. It is useful in lead optimization (<xref ref-type="bibr" rid="B35">35</xref>, <xref ref-type="bibr" rid="B83">83</xref>) and enhancing ADMET properties (<xref ref-type="bibr" rid="B84">84</xref>, <xref ref-type="bibr" rid="B85">85</xref>).</p>
</list-item>
</list>
<p>The main strategies employed to explore the defined chemical space can be grouped in two categories:</p>
<list list-type="order">
<list-item>
<p>Random Generation/Mutation: This approach often uses molecular string representations to either create entirely new molecules or iteratively change existing ones. An example is the STONED algorithm (<xref ref-type="bibr" rid="B34">34</xref>).</p>
</list-item>
<list-item>
<p>Evolutionary Molecular Modification: These methods begin with a molecule and applies atomic or fragment modifications using evolutionary algorithms. It is particularly effective for scaffold hopping or decoration. Examples include GB-GA (<xref ref-type="bibr" rid="B86">86</xref>) and EvoMol (<xref ref-type="bibr" rid="B87">87</xref>) which use valence rules. CoG (<xref ref-type="bibr" rid="B39">39</xref>) uses fragment spaces, while SYNOPSIS (<xref ref-type="bibr" rid="B40">40</xref>, <xref ref-type="bibr" rid="B41">41</xref>) and GALILEO (<xref ref-type="bibr" rid="B31">31</xref>) are based on synthon spaces.</p>
</list-item>
</list>
</sec>
<sec id="s3_5">
<title>Active learning</title>
<p>Active Learning (AL) is a novel introduction to the field of <italic>de Novo Drug Design</italic> for the exploration of chemical space. AL operates on the principle that models can achieve higher accuracy with fewer data by intelligently selecting their training samples. Central to AL is the concept of uncertainty. AL addresses epistemic uncertainty, related to the lack of knowledge in certain areas of the modeled space by the model (<xref ref-type="bibr" rid="B88">88</xref>). For instance, an unfamiliar molecular structure can increase uncertainty in predictions due to insufficient knowledge in that area. AL reduces uncertainty by adding new data to the training set.</p>
<p>AL has traditionally been used to improve model predictions, but is now also used to explore the chemical space. It identifies and investigates molecules linked with high uncertainty, assumed to be those presenting less-explored molecular structures. By doing so, AL enables more efficient exploration with fewer experiments. These approaches employ AI techniques like Bayesian machine learning algorithms and Ensemble learning to estimate uncertainty. The process follows an iterative process, it identifies molecules with high uncertainty, tests them and adds the results to the training set. This results in the refinement of the model and the expansion of the training set, which is effectively the set of screened compounds. Predictions from the model are generally discarded, as the method is primarily used to explore and not predict. Unlike the methods previously outlined, this methodology is purely explorative and does not generate novel structures, the structures for this exploration can come from different sources, such as databases, combinatorial chemistry, or generative algorithms. Strategies that include molecules predicted to have desirable properties can be used to combine the exploration and optimization steps. In fact, it has been shown that purely exploratory approaches effectively explore the chemical space but do not identify many active compounds (<xref ref-type="bibr" rid="B89">89</xref>). A schematic representation of the process using AL is exemplified in <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Iterative selection process through active learning.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frhem-03-1305741-g003.tif"/>
</fig>
<p>However, AL requires regular generation of new data, making it more suited for virtual rather than experimental screening. While these strategies do not improve the accuracy of the prediction, they optimize the quantity of tested compounds; making it possible to screen larger libraries. Despite this, it has been applied in the context of automated experimental testing (<xref ref-type="bibr" rid="B90">90</xref>).</p>
<p>The application of AL to drug discovery has been explored since the early 2000s, but most of these studies were retrospective in nature (<xref ref-type="bibr" rid="B91">91</xref>). The focus has only recently shifted towards identifying and evaluating specific AL techniques that could be effectively integrated into the drug discovery process (<xref ref-type="bibr" rid="B33">33</xref>), but it is still an emerging field with slow and limited adoption (<xref ref-type="bibr" rid="B7">7</xref>, <xref ref-type="bibr" rid="B89">89</xref>). Three experimental high-throughput screening results datasets were used to demonstrate the potential of AL, showing a sixfold increase in Enrichment Factor over traditional virtual screening methods (<xref ref-type="bibr" rid="B89">89</xref>).</p>
</sec>
</sec>
<sec id="s4">
<title>Virtual screening</title>
<p>Virtual screening is a key technique in drug design used to assess and prioritize molecular candidates. It uses principles from chemistry, statistics, and empirical data to predict how molecules will behave in terms of activity, toxicity, and suitability as drugs.</p>
<p>Since it&#x2019;s expensive and complex to synthesize and test every molecule created by AI, virtual screening is essential. It helps focus on the most promising molecules, making drug discovery more efficient. These methods also estimate molecular properties for machine learning training when experimental data is scarce. However, they can&#x2019;t replace actual laboratory tests. When generative algorithms are trained exclusively on computed data, their performance is governed by the accuracy of the underlying theoretical models. It is advisable to use experimental data to train algorithms and to use computational techniques to filter and prioritize whenever feasible.</p>
<p>In the early stages of drug discovery, virtual screening is crucial for identifying diverse and unique molecules rather than just focusing on similar, high-affinity compounds.</p>
<sec id="s4_1">
<title>Rule-based filtering</title>
<p>Rule-based filters serve as rapid evaluation systems to categorize molecules based on predetermined criteria related to properties like bioactivity or synthetic feasibility. These systems typically use two-dimensional molecular structures to pinpoint promising candidates and weed out potential &#x201c;false positives&#x201d;&#x2014;molecules that initially appear viable but fail in later testing stages. Tools like Lipinski&#x2019;s Rule of 5 (<xref ref-type="bibr" rid="B92">92</xref>) assess oral bioavailability, while the QED metric (<xref ref-type="bibr" rid="B93">93</xref>) estimates therapeutic potential. Structural filters, such as PAINS (<xref ref-type="bibr" rid="B94">94</xref>) and BRENK (<xref ref-type="bibr" rid="B95">95</xref>), identify molecules that could interfere with bioassays or prove unsuitable as lead candidates. Additionally, instruments like the Synthetic Accessibility Score (<xref ref-type="bibr" rid="B96">96</xref>) and automated retrosynthetic analysis (<xref ref-type="bibr" rid="B97">97</xref>&#x2013;<xref ref-type="bibr" rid="B99">99</xref>) aid in eliminating hard-to-synthesize molecules. However, it&#x2019;s important to recognize that while these methods are broadly useful, they are primarily tailored for known drugs and targets. This specialization can limit their applicability in assessing novel structures (<xref ref-type="bibr" rid="B100">100</xref>).</p>
</sec>
<sec id="s4_2">
<title>Ligand-based virtual screening</title>
<p>Ligand-based virtual screening, employed in drug discovery when a target&#x2019;s structure is unknown, it is computationally simple and identifies ligands with similar binding characteristics. Its effectiveness hinges on the availability of known ligands interacting with the target, a limitation when such data is scarce. In <italic>de novo</italic> design, focusing on structurally similar compounds is less preferred and as such these methodologies are often not appropriate.</p>
<p>Key techniques in this approach include Quantitative Structure-Activity Relationship (QSAR), which correlates molecular structure with biological activity, aiding in predicting the behavior of new molecules in terms of activity and toxicity (<xref ref-type="bibr" rid="B62">62</xref>, <xref ref-type="bibr" rid="B101">101</xref>, <xref ref-type="bibr" rid="B102">102</xref>). Pharmacophore Modeling identifies essential molecular features for biological activity, using known ligands to create models for finding new molecules with similar characteristics (<xref ref-type="bibr" rid="B31">31</xref>, <xref ref-type="bibr" rid="B103">103</xref>, <xref ref-type="bibr" rid="B104">104</xref>). Structural Similarity assesses the resemblance between molecules, facilitating the discovery of new compounds similar to active ones, or exploring diverse molecular structures for further study (<xref ref-type="bibr" rid="B17">17</xref>, <xref ref-type="bibr" rid="B48">48</xref>, <xref ref-type="bibr" rid="B105">105</xref>).</p>
</sec>
<sec id="s4_3">
<title>Structure-based virtual screening</title>
<p>Structure-based virtual screening is a technique in drug discovery that uses the 3D structures of both the drug molecule and its target. It can use either experimentally determined structures or accurate computational models of the target, made through methods like homology modeling or AI tools like AlphaFold (<xref ref-type="bibr" rid="B106">106</xref>, <xref ref-type="bibr" rid="B107">107</xref>). This approach can be useful even without data on binding ligands.</p>
<p>Molecular Docking is commonly used in structure-based virtual screening to predict binding modes (<xref ref-type="bibr" rid="B108">108</xref>). This knowledge aids in evaluating the interaction&#x2019;s quality and provides comparative insights based on other known ligands (<xref ref-type="bibr" rid="B109">109</xref>&#x2013;<xref ref-type="bibr" rid="B111">111</xref>). Molecular simulations, typically conducted through Molecular Dynamics or Monte Carlo techniques, aim to predict the structural, thermodynamic, and kinetic attributes of the molecule-target interaction (<xref ref-type="bibr" rid="B112">112</xref>). These simulations can be computationally demanding, making them most effective when focused on a refined list of candidates. One common application is the estimation of the Free Energy of binding through methods like Alchemical Free Energy, Thermodynamic integration or Free Energy Perturbation (<xref ref-type="bibr" rid="B113">113</xref>).</p>
</sec>
</sec>
<sec id="s5">
<title>Experimental assays in drug development</title>
<p>Drug development involves several experimental assays to test a drug&#x2019;s effectiveness, selectivity, and safety. These tests, essential at every stage of development, fall into three main categories: biochemical, biophysical, and cell-based (<xref ref-type="bibr" rid="B114">114</xref>, <xref ref-type="bibr" rid="B115">115</xref>).</p>
<p>Biochemical Assays: Used early in drug development, these tests measure how a drug interacts with its target at a molecular level by measuring its effects on function, often using indicators like fluorescent signals. Common types include FRET and ALPHAscreen. They are cost-effective and suitable for large-scale screening, but need follow-up tests for confirmation.</p>
<p>Biophysical Assays: These assays focus on the direct physical interaction between a drug and its target rather than functional outcomes. Techniques like X-ray crystallography and NMR are used here. They are more resource-intensive but provide detailed information about the interaction, helping to refine drug candidates (<xref ref-type="bibr" rid="B115">115</xref>).</p>
<p>Cell-Based Assays: Conducted throughout drug development, these tests assess a drug&#x2019;s effect within living cells. They are crucial for understanding a drug&#x2019;s overall impact, including its ability to enter cells and potential side effects. Techniques like cell viability and reporter gene assays are commonly used.</p>
<p>After these laboratory assays, promising drug candidates are then tested in animal models to further assess their efficacy and safety before moving on to human clinical trials.</p>
</sec>
<sec id="s6">
<title>Evaluating generative models</title>
<p>Evaluating the efficacy of generative models is essential to select the most suitable model for distinct tasks and to pave the way for innovations that outpace current constraints. A variety of metrics have been devised to serve this purpose.</p>
<p>Assessing how thoroughly a generative model explores the chemical space is central to its evaluation, with various metrics offering different perspectives on this crucial aspect. The Internal Diversity Metric gauges the structural diversity within generated compound collections, whereas the concept of richness focuses on counting the number of unique compounds produced (<xref ref-type="bibr" rid="B116">116</xref>). The Fr&#xe9;chet ChemNet Distance (<xref ref-type="bibr" rid="B117">117</xref>) provides numerical insights into the alignment of generated molecules with a target distribution, serving as an indicator of potential biases. Complementing these is the Coverage Score (<xref ref-type="bibr" rid="B118">118</xref>), which quantifies the model&#x2019;s ability to sample molecules from larger datasets. The #Circles Metric (<xref ref-type="bibr" rid="B119">119</xref>) takes a more comprehensive approach by examining the overlap in structural diversity between two chemical sets and assessing the impact of introducing new molecules to the sampling range. For a more detailed exploration of these and other methodologies, the work by Xie et&#xa0;al. (<xref ref-type="bibr" rid="B119">119</xref>) serves as a valuable resource.</p>
<p>Beyond metrics, benchmarking suites like GuacaMol (<xref ref-type="bibr" rid="B116">116</xref>) and MOSES (<xref ref-type="bibr" rid="B120">120</xref>) provide holistic evaluations by employing a range of these metrics across different tasks, which include both sampling of the chemical space and fine-tuning of specific physicochemical properties. Ciepli&#x144;ski et&#xa0;al. (<xref ref-type="bibr" rid="B121">121</xref>, <xref ref-type="bibr" rid="B122">122</xref>) have developed a framework that evaluates models based on their effectiveness in molecular docking simulations with protein targets, going beyond simple physicochemical assessments which are often insufficient when optimizing a drug structure. This approach has revealed limitations in current models, including the generation of improbable molecules and a tendency to underperform compared to the top molecules in public databases. Gao and Coley (<xref ref-type="bibr" rid="B97">97</xref>) have introduced an approach to assess the real-world synthesizability of generated molecular structures employing retrosynthetic analysis tools.</p>
</sec>
<sec id="s7">
<title>Experimentally validated approaches</title>
<p>In 2018, Merk et&#xa0;al. (<xref ref-type="bibr" rid="B123">123</xref>) set a landmark by using Generative AI to design and then synthesize and experimentally validate inhibitors for the nuclear receptors RXR&#x3b1;/&#x3b2;/&#x3b3; and PPAR&#x3b1;/&#x3b3;/&#x3b4;. They utilized a deep RNN, trained on a vast dataset of over 540,000 SMILES of bioactive compounds. This was fine-tuned using 25 fatty acid mimetics known for their agonistic activity. The system generated 1,000 molecular structures, with 93% being chemically valid and 90% being unique. Out of 49 computationally high-scoring compounds, five were synthesized, and four demonstrated promising bioactivities. However, these novel molecules resembled the training set compounds, highlighting the need for further innovation in the generation of diverse structures.</p>
<p>DDR1, a kinase implicated in cancer and Inflammatory bowel disease, is a promising therapeutic target. Zhavoronkov et&#xa0;al. (<xref ref-type="bibr" rid="B37">37</xref>) demonstrated the power of deep learning in a &#x2018;hit-to-lead&#x2019; optimization campaign, identifying a DDR1 inhibitor in 46 days. By leveraging an existing DDR1 inhibitory molecular scaffold, they generated 30,000 structures adjusting the substituent groups of this scaffold to enhance its pharmacological efficacy which were then computationally screened. From this, six were synthesized based on their synthetic feasibility and target profile. Two showcased potent inhibitory and pharmacokinetic properties.</p>
<p>In 2022, Xiaoqin et&#xa0;al. (<xref ref-type="bibr" rid="B24">24</xref>) also utilized deep learning to design selective inhibitors for DDR1 using a generative scaffold decorator. Refining a scaffold active against FGFR but also interacting with DDR1, they generated over 19,000 molecular structures. After filtering and computational simulations two compounds were synthesized. Both demonstrated significant anti-inflammatory activity in animal tests, with one emerging as the most selective DDR1 inhibitor to date. These studies underscore the potential of generative AI in accelerating &#x2018;hit-to-lead&#x2019; drug discovery campaigns.</p>
<sec id="s7_1">
<title>Applications in oncology and hematology</title>
<p>The application of <italic>de novo</italic> techniques in oncological drug design has primarily focused on kinase targets, largely due to the extensive research and abundant data availability (<xref ref-type="bibr" rid="B124">124</xref>). As AI methodologies advance, there&#x2019;s a growing expectation that these tools will be applied to lesser-studied targets. While these studies have yielded successful results, the molecules generated do not show significant structural or binding mode differences compared to known ligands. Consequently, <italic>de novo</italic> techniques have shown more success in drug optimization rather than in the discovery of entirely novel hits.</p>
<p>In 2021, Yu et&#xa0;al. (<xref ref-type="bibr" rid="B22">22</xref>) undertook a scaffold hopping campaign to identify novel structures for JAK1 inhibition. To enhance this process, they used a neural network with Graph-Based Variational Autoencoders, training the model on scaffolds derived from drug-like compounds and fine-tuning it with the structure and bioactivity of known kinase inhibitors. The objective was to hop through different scaffolds while retaining similar side chains, as these two were encoded separately into the model. With a known JAK1 inhibitor as the starting scaffold, they generated 30,000 molecular structures. These underwent a screening process considering physicochemical properties, structural alerts, and their alignment with established inhibitors. A QSAR model and molecular docking were used to prioritize 25 molecules. 7 of these were synthesized, all of which demonstrated JAK1 inhibitory potential in experimental trials. While the inhibitory activity of the identified molecules is significant, they did not conduct kinase screening tests to assess selectivity, and in consequence it is difficult to appreciate the real value of the compounds.</p>
<p>In 2022, Jang et&#xa0;al. focused on FLT3 (<xref ref-type="bibr" rid="B125">125</xref>), a critical kinase in hematopoiesis. When mutated in acute myeloid leukemia (AML), it is often associated with adverse outcomes. Their approach involved enhancing the FLT3 selectivity of a previously active molecule against breast cancer cells, which was predicted to interact with FLT3. A deep learning generative model was trained on drug-like SMILES structures and fine-tuned with known FLT3 binders. The model generated over 10,000 structures. These were filtered, and the resulting molecules were ranked based on their binding affinities derived from Alchemical Free Energy calculations. The most promising compound was synthesized and tested in cellular cultures and on the protein. It showed affinity for the mutated FLT3 variant and inhibited FLT3-mutated AML cell proliferation. The study, however, has limitations: it leaned heavily on molecular docking simulations without experimental assays for kinase selectivity, and did not test on healthy cell lines, leaving potential toxicity unexplored. Additionally, the molecules produced strongly resembled known FLT3 inhibitors, questioning the novelty of molecules designed through this approach. The findings are promising, but comprehensive experimental validation is required.</p>
<p>In their 2023 study, Zhu et&#xa0;al. engaged in a &#x2018;hit-to-lead&#x2019; campaign targeting SIK2 (<xref ref-type="bibr" rid="B25">25</xref>). They utilized a proprietary AI system, with an existing inhibitor as the reference, to produce molecules with varied substituents. Each molecule, out of the 5,000 candidates, was docked onto the protein structure modelled by AlphaFold and evaluated on structural and protein interaction criteria. They were then grouped into 56 clusters based on structural similarities and hydrogen bond interactions. From each cluster, two molecules were synthesized and underwent <italic>in vitro</italic> and <italic>in vivo</italic> testing. One molecule stood out for its potent inhibitory activity and ideal ADMET properties.</p>
</sec>
</sec>
<sec id="s8" sec-type="discussion">
<title>Discussion</title>
<p>In this article, we have explored a range of Artificial Intelligence (AI) methodologies applied in drug design. Currently, the field is evolving, with these methodologies primarily applied in experimental settings. To date, the results, while promising in accelerating and standardizing drug optimization for known targets, have not substantially led to the creation of novel de novo structures with distinct inhibition patterns from existing inhibitors. The aspiration to simplify and automate drug design for new targets or novel targeting patterns remains largely unfulfilled. Despite the rapid development of these methodologies, there is still a significant journey ahead before we can identify a set of methodologies robust enough to be widely adopted as standard practices. This journey will necessitate extensive experimental validation of both current and future techniques. The initial positive results, however, do suggest a potential for broader adoption in the future. Trained generative algorithms have notably altered the landscape of de novo drug design. Initially received with skepticism, focusing more on theoretical potential than practical application, these techniques have gradually gained recognition. This is particularly evident with some AI-designed drugs progressing to clinical trials. Yet, their success has largely been confined to well-characterized protein targets, indicating a notable limitation in their ability to explore new or under-researched biological targets.</p>
<sec id="s8_1">
<title>Challenges and limitations</title>
<p>While the potential of AI to drive <italic>de novo</italic> molecular design is significant, this field faces several challenges. AI methodologies have proven successful in targeting well-studied proteins, yet advancements in the design of molecules significantly different from known ligands remain limited. Available methods typically optimize molecules within established chemical spaces, raising questions about their applicability to genuine <italic>de novo</italic> drug design. The challenge is determining whether the current methodologies need refinement, or if the approach itself is unsuitable for this type of problem.</p>
<p>A key area of focus has been trained generative algorithms, which have brought new life to <italic>de novo</italic> drug design. Their ability to explore chemical spaces beyond their training set structures is still a concern (<xref ref-type="bibr" rid="B122">122</xref>). Efforts are ongoing to expand the range of molecular structures these algorithms can explore (<xref ref-type="bibr" rid="B7">7</xref>, <xref ref-type="bibr" rid="B126">126</xref>). Yet, it remains challenging to assess their effectiveness in spanning chemical space. There are metrics for comparing molecule sets, but no methodology currently enables the enumeration of the chemical space accessible to generative algorithms. Such a methodology would be crucial for comparing the diversity and breadth of chemical spaces generated by these algorithms with those from combinatorial chemistry and existing chemical databases.</p>
<p>Data availability is also a challenge for trained generative algorithms. The bias towards well-explored molecular structures and targets in currently available datasets can limit their ability to generate diverse and innovative molecular designs for new targets (<xref ref-type="bibr" rid="B11">11</xref>).</p>
<p>Another significant limitation is the uncertainty regarding the quantity of molecules that need to be sampled from these algorithms to find a promising hit. These algorithms are expected to guide experimental design, but there are no comprehensive benchmarks of their effectiveness. Their limited application in large-scale hit discovery programs makes it difficult to assess their suitability for extensive <italic>de novo</italic> drug discovery and the resources needed. These methods have the potential to streamline the hit discovery process and reduce the number of necessary tests. However, this efficiency must be balanced against the higher synthetic efforts required for molecules generated through these algorithms, as opposed to those identified via high-throughput screening from commercial compound libraries. While advancements in synthesizability have been made, the necessity for custom synthesis will always persist. It is important to determine the extent to which these algorithms can reduce testing needs. Virtual screening can play a significant role in filtering compounds. This process predominantly depends on structure-based methods, as ligand-based methods are less effective outside well-characterized chemical spaces. The drawback of this reliance is the increased demand for computational resources, which exceeds that of traditional virtual screening methods. This raises questions about the circumstances under which these novel AI methodologies may offer advantages over established techniques. Determining the right balance between computational effort and the efficiency of hit discovery remains a challenge.</p>
<p>The synthesizability of molecules generated by AI algorithms remains a challenging issue (<xref ref-type="bibr" rid="B4">4</xref>, <xref ref-type="bibr" rid="B7">7</xref>, <xref ref-type="bibr" rid="B97">97</xref>, <xref ref-type="bibr" rid="B124">124</xref>). Despite this, significant progress has been made towards addressing this problem. New algorithms that can generate molecules more amenable to synthesis and improved methods for evaluating synthesizability, are expected to mitigate this limitation in the near future.</p>
<p>Rule-based systems offer an advantage over trained generative techniques in that they do not require training data and allow the enumeration of the explorable chemical space. Despite their longstanding presence, these systems have been primarily used to optimize hits or leads, with limited application in full <italic>de novo</italic> design. The success of these methods depends on the formulation of effective rules for generation. These rules must not only ensure synthesizability, but also ensure the creation of a diverse chemical spaces. Recent progress in this area involves generating synthon spaces from a set of generic reactions, potentially leading to more easily synthesizable molecules. The limited availability of publicly available reactions sets poses limitations, and it is unclear how much these methods improve synthesizability over other approaches.</p>
<p>Active Learning (AL) shows promise in optimizing virtual screenings, but integrating experimental data with computational results is a problem yet to be resolved. A methodology allowing such integration would enable a more targeted search process.</p>
<p>Another focus area is multi-objective optimization, which is crucial for developing effective and safe drugs. The ligands that bind effectively to the target must also have desirable physicochemical properties, selectivity, and an acceptable ADMET profile. Advances are being made in this field, using multi-objective optimization techniques from other disciplines, raising hopes for a solution in the near future.</p>
<p>Finally, the widespread application of these methodologies is hampered by their limited diffusion and ease of use. <italic>De novo</italic> design requires an integrated approach combining computational science, chemistry, and biology. Many models lack this interdisciplinary aspect and are therefore computationally sophisticated but less practical from a chemical or biological standpoint. This gap restricts their utility, especially for experimental chemists who require user-friendly tools. There are some commercial software with intuitive interfaces, but their use is still limited by the need for extensive computational knowledge and high costs (<xref ref-type="bibr" rid="B127">127</xref>, <xref ref-type="bibr" rid="B128">128</xref>).</p>
</sec>
<sec id="s8_2">
<title>Future directions</title>
<p>Methodologies for <italic>de novo</italic> drug design utilizing AI have been developed, yet their application in large-scale campaigns remains limited. The effectiveness of these methods compared to traditional approaches is still an open question. Therefore, a critical development area involves devising metrics to compare these AI methodologies against conventional methods and using <italic>de novo</italic> design in larger drug development campaigns.</p>
<p>Advancements in broadening the chemical space accessible to trained generative algorithms are being done (<xref ref-type="bibr" rid="B126">126</xref>). Future progress will be key in assessing whether these algorithms can create novel molecules distinct from their training data. This will clarify if such limitations are inherent to these methods or if they can be surpassed for more innovative molecular design. The capability of these algorithms to generate novel structures will significantly indicate their flexibility and influence in drug discovery.</p>
<p>Active Learning (AL) emerges as a potent tool in AI-based <italic>de novo</italic> drug design. Primarily used in compound screening, AL&#x2019;s potential extends to optimizing identified hits&#x2014;an area ripe for exploration. These methods could potentially replace evolutionary algorithms in rule-based systems, emphasizing the generation of unique or underexplored molecules. Currently, ensemble methods dominate AL, but integrating Bayesian models, renowned for their efficacy (<xref ref-type="bibr" rid="B88">88</xref>), could significantly enhance the exploration of chemical spaces.</p>
<p>Transitioning to the next phase of development, hybrid systems that merge exploratory and trained methodologies present a promising path for comprehensive chemical space exploration. Rule-based systems efficiently probe molecular structures akin to identified hits. In contrast, trained generative algorithms have proven effective in refining structures within explored chemical spaces. A cohesive <italic>de novo</italic> drug generation pipeline might first employ AL for rapid hit discovery, followed by Rule-Based systems for initial optimization. Subsequently, trained generative algorithms could refine these hits further, offering a layered approach composed of broad initial scans followed by focused lead tuning. This integrated strategy could substantially advance <italic>de novo</italic> drug design capabilities.</p>
<p>Furthermore, retrosynthetic analysis tools are advancing, particularly in addressing synthesizability challenges. Improvements in these tools may lessen the emphasis on generating only synthetically feasible molecules, paving the way for more creative and expansive molecular design.</p>
</sec>
</sec>
<sec id="s9" sec-type="conclusion">
<title>Conclusion</title>
<p>Artificial Intelligence (AI) holds potential to significantly impact <italic>de novo</italic> drug design, yet it&#x2019;s important to approach its current state and future prospects with a balanced view. The challenges outlined in this review are critical, and addressing them is key to harnessing AI&#x2019;s full potential in this field. While we anticipate improvements in the efficiency and effectiveness of drug development through AI, it&#x2019;s important to recognize that these advancements will be gradual and contingent upon overcoming significant hurdles.</p>
<p>AI methodologies have introduced noteworthy changes in drug design, including improvements in algorithmic efficiency and the exploration of new chemical spaces. However, it&#x2019;s crucial to note that these advancements are still in their nascent stages. The transformative impact of AI on drug design is promising, but it is not without its complexities and limitations.</p>
<p>Our discussion has covered a range of AI methodologies, exploring their integration into the drug discovery process and highlighting both their strengths and weaknesses. We have provided examples of AI applications in drug design, demonstrating the rapid development in this area. However, it&#x2019;s evident that most AI algorithms have found more success in optimizing molecular structures than in groundbreaking <italic>de novo</italic> design.</p>
<p>Looking to the future, the role of AI in drug design appears promising, but its trajectory is not without uncertainty. Developments in algorithmic sophistication and application breadth are anticipated, yet they will likely face challenges in surpassing the current limitations. As the field evolves, it is hoped that AI will not only supplement but also enhance traditional drug design methodologies. However, this evolution will require a careful and considered approach, ensuring that new technologies are robust, reliable, and truly beneficial in the quest for novel therapeutic solutions.</p>
</sec>
<sec id="s10" sec-type="author-contributions">
<title>Author contributions</title>
<p>DC: Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. CP: Visualization, Writing &#x2013; review &amp; editing. JA: Writing &#x2013; review &amp; editing. DF: Writing &#x2013; review &amp; editing. AM: Writing &#x2013; review &amp; editing.</p>
</sec>
</body>
<back>
<sec id="s11" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This research was supported by the grant 'JI-2022-Astrazenca-2 Germline Predisposition to Myelodysplastic Syndromes in Adults: Expanding Diagnostic Precision and Evidence of Pathogenicity.</p>
</sec>
<sec id="s12" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s13" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>DiMasi</surname> <given-names>JA</given-names>
</name>
<name>
<surname>Grabowski</surname> <given-names>HG</given-names>
</name>
<name>
<surname>Hansen</surname> <given-names>RW</given-names>
</name>
</person-group>. <article-title>Innovation in the pharmaceutical industry: New estimates of R&amp;D costs</article-title>. <source>J Health Economics</source> (<year>2016</year>) <volume>47</volume>:<fpage>20</fpage>&#x2013;<lpage>33</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jhealeco.2016.01.012</pub-id>
</citation>
</ref>
<ref id="B2">
<label>2</label>
<citation citation-type="book">
<person-group person-group-type="editor">
<name>
<surname>Clark</surname> <given-names>DE</given-names>
</name>
</person-group> ed. <source>Evolutionary algorithms in molecular design</source>. <edition>1st</edition>. <publisher-loc>Weinheim, Federal Republic of Germany</publisher-loc>: <publisher-name>Wiley</publisher-name> (<year>2000</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.1002/9783527613168</pub-id>
</citation>
</ref>
<ref id="B3">
<label>3</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moon</surname> <given-names>JB</given-names>
</name>
<name>
<surname>Howe</surname> <given-names>WJ</given-names>
</name>
</person-group>. <article-title>3D database searching and <italic>de novo</italic> construction methods in molecular design</article-title>. <source>Tetrahedron Comput Method</source> (<year>1990</year>) <volume>3</volume>:<fpage>697</fpage>&#x2013;<lpage>711</lpage>.- doi: <pub-id pub-id-type="doi">10.1016/0898-5529(90)90168-8</pub-id>
</citation>
</ref>
<ref id="B4">
<label>4</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Lipinski</surname> <given-names>CA</given-names>
</name>
</person-group>. <article-title>Overview of hit to lead: the medicinal chemist&#x2019;s role from HTS retest to lead optimization hand off</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Hayward</surname> <given-names>MM</given-names>
</name>
<name>
<surname>Bikker</surname> <given-names>JA</given-names>
</name>
<name>
<surname>Ellingboe</surname> <given-names>JW</given-names>
</name>
<name>
<surname>Freeman-Cook</surname> <given-names>KD</given-names>
</name>
<name>
<surname>Gilbert</surname> <given-names>AM</given-names>
</name>
<name>
<surname>Harrison</surname> <given-names>RK</given-names>
</name>
<etal/>
</person-group>, editors. <source>Lead-seeking approaches</source>. <publisher-loc>Berlin, Heidelberg</publisher-loc>: <publisher-name>Springer Berlin Heidelberg</publisher-name> (<year>2009</year>). p. <fpage>1</fpage>&#x2013;<lpage>24</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/7355_2009_4</pub-id>
</citation>
</ref>
<ref id="B5">
<label>5</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>G&#xf3;mez-Bombarelli</surname> <given-names>R</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>JN</given-names>
</name>
<name>
<surname>Duvenaud</surname> <given-names>D</given-names>
</name>
<name>
<surname>Hern&#xe1;ndez-Lobato</surname> <given-names>JM</given-names>
</name>
<name>
<surname>S&#xe1;nchez-Lengeling</surname> <given-names>B</given-names>
</name>
<name>
<surname>Sheberla</surname> <given-names>D</given-names>
</name>
<etal/>
</person-group>. <article-title>Automatic chemical design using a data-driven continuous representation of molecules</article-title>. <source>ACS Cent Sci</source> (<year>2018</year>) <volume>4</volume>:<page-range>268&#x2013;76</page-range>. doi: <pub-id pub-id-type="doi">10.1021/acscentsci.7b00572</pub-id>
</citation>
</ref>
<ref id="B6">
<label>6</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Segler</surname> <given-names>MHS</given-names>
</name>
<name>
<surname>Kogej</surname> <given-names>T</given-names>
</name>
<name>
<surname>Tyrchan</surname> <given-names>C</given-names>
</name>
<name>
<surname>Waller</surname> <given-names>MP</given-names>
</name>
</person-group>. <article-title>Generating focused molecule libraries for drug discovery with recurrent neural networks</article-title>. <source>ACS Cent Sci</source> (<year>2018</year>) <volume>4</volume>:<page-range>120&#x2013;31</page-range>. doi: <pub-id pub-id-type="doi">10.1021/acscentsci.7b00512</pub-id>
</citation>
</ref>
<ref id="B7">
<label>7</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Martinelli</surname> <given-names>DD</given-names>
</name>
</person-group>. <article-title>Generative machine learning for <italic>de novo</italic> drug discovery: A systematic review</article-title>. <source>Comput Biol Med</source> (<year>2022</year>) <volume>145</volume>:<fpage>105403</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compbiomed.2022.105403</pub-id>
</citation>
</ref>
<ref id="B8">
<label>8</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wigh</surname> <given-names>DS</given-names>
</name>
<name>
<surname>Goodman</surname> <given-names>JM</given-names>
</name>
<name>
<surname>Lapkin</surname> <given-names>AA</given-names>
</name>
</person-group>. <article-title>A review of molecular representation in the age of machine learning</article-title>. <source>WIREs Comput Mol Sci</source> (<year>2022</year>):<elocation-id>e1603</elocation-id>. doi: <pub-id pub-id-type="doi">10.1002/wcms.1603</pub-id>
</citation>
</ref>
<ref id="B9">
<label>9</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mak</surname> <given-names>K-K</given-names>
</name>
<name>
<surname>Balijepalli</surname> <given-names>MK</given-names>
</name>
<name>
<surname>Pichika</surname> <given-names>MR</given-names>
</name>
</person-group>. <article-title>Success stories of AI in drug discovery - where do things stand</article-title>? <source>Expert Opin. Drug Discov</source> (<year>2022</year>) <volume>17</volume>:<fpage>79</fpage>&#x2013;<lpage>92</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/17460441.2022.1985108</pub-id>
</citation>
</ref>
<ref id="B10">
<label>10</label>
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>drug discovery</surname> <given-names>AI</given-names>
</name>
</person-group>. <source>assessing the first AI-designed drug candidates to go into human clinical trials | CAS</source> (<year>2022</year>). Available at: <uri xlink:href="https://www.cas.org/resources/cas-insights/drug-discovery/ai-designed-drug-candidates">https://www.cas.org/resources/cas-insights/drug-discovery/ai-designed-drug-candidates</uri> (Accessed <access-date>September 8, 2023</access-date>).</citation>
</ref>
<ref id="B11">
<label>11</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Volkamer</surname> <given-names>A</given-names>
</name>
<name>
<surname>Riniker</surname> <given-names>S</given-names>
</name>
<name>
<surname>Nittinger</surname> <given-names>E</given-names>
</name>
<name>
<surname>Lanini</surname> <given-names>J</given-names>
</name>
<name>
<surname>Grisoni</surname> <given-names>F</given-names>
</name>
<name>
<surname>Evertsson</surname> <given-names>E</given-names>
</name>
<etal/>
</person-group>. <article-title>Machine learning for small molecule drug discovery in academia and industry</article-title>. <source>Artif Intell Life Sci</source> (<year>2023</year>) <volume>3</volume>:<fpage>100056</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ailsci.2022.100056</pub-id>
</citation>
</ref>
<ref id="B12">
<label>12</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hughes</surname> <given-names>J</given-names>
</name>
<name>
<surname>Rees</surname> <given-names>S</given-names>
</name>
<name>
<surname>Kalindjian</surname> <given-names>S</given-names>
</name>
<name>
<surname>Philpott</surname> <given-names>K</given-names>
</name>
</person-group>. <article-title>Principles of early drug discovery</article-title>. <source>Br J Pharmacol</source> (<year>2011</year>) <volume>162</volume>:<page-range>1239&#x2013;49</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/j.1476-5381.2010.01127.x</pub-id>
</citation>
</ref>
<ref id="B13">
<label>13</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Patrick</surname> <given-names>GL</given-names>
</name>
</person-group>. <source>An introduction to medicinal chemistry</source>. <edition>Fifth</edition>. <publisher-loc>Oxford</publisher-loc>: <publisher-name>Oxford University Press</publisher-name> (<year>2013</year>). <fpage>789 p</fpage>.</citation>
</ref>
<ref id="B14">
<label>14</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Polishchuk</surname> <given-names>PG</given-names>
</name>
<name>
<surname>Madzhidov</surname> <given-names>TI</given-names>
</name>
<name>
<surname>Varnek</surname> <given-names>A</given-names>
</name>
</person-group>. <article-title>Estimation of the size of drug-like chemical space based on GDB-17 data</article-title>. <source>J Comput Aided Mol Des</source> (<year>2013</year>) <volume>27</volume>:<page-range>675&#x2013;9</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10822-013-9672-4</pub-id>
</citation>
</ref>
<ref id="B15">
<label>15</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Boehm</surname> <given-names>M</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>T-Y</given-names>
</name>
<name>
<surname>Claussen</surname> <given-names>H</given-names>
</name>
<name>
<surname>Lemmen</surname> <given-names>C</given-names>
</name>
</person-group>. <article-title>Similarity searching and scaffold hopping in synthetically accessible combinatorial chemistry spaces</article-title>. <source>J Med Chem</source> (<year>2008</year>) <volume>51</volume>:<page-range>2468&#x2013;80</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/jm0707727</pub-id>
</citation>
</ref>
<ref id="B16">
<label>16</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bajusz</surname> <given-names>D</given-names>
</name>
<name>
<surname>R&#xe1;cz</surname> <given-names>A</given-names>
</name>
<name>
<surname>H&#xe9;berger</surname> <given-names>K</given-names>
</name>
</person-group>. <article-title>Why is Tanimoto index an appropriate choice for fingerprint-based similarity calculations</article-title>? <source>J Cheminform</source> (<year>2015</year>) <volume>7</volume>:<fpage>20</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13321-015-0069-3</pub-id>
</citation>
</ref>
<ref id="B17">
<label>17</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Garcia-Hernandez</surname> <given-names>C</given-names>
</name>
<name>
<surname>Fern&#xe1;ndez</surname> <given-names>A</given-names>
</name>
<name>
<surname>Serratosa</surname> <given-names>F</given-names>
</name>
</person-group>. <article-title>Ligand-based virtual screening using graph edit distance as molecular similarity measure</article-title>. <source>J Chem Inf Model</source> (<year>2019</year>) <volume>59</volume>:<page-range>1410&#x2013;21</page-range>. doi: <pub-id pub-id-type="doi">10.1021/acs.jcim.8b00820</pub-id>
</citation>
</ref>
<ref id="B18">
<label>18</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bender</surname> <given-names>A</given-names>
</name>
<name>
<surname>Mussa</surname> <given-names>HY</given-names>
</name>
<name>
<surname>Glen</surname> <given-names>RC</given-names>
</name>
<name>
<surname>Reiling</surname> <given-names>S</given-names>
</name>
</person-group>. <article-title>Similarity searching of chemical databases using atom environment descriptors (MOLPRINT 2D): evaluation of performance</article-title>. <source>J Chem Inf Comput Sci</source> (<year>2004</year>) <volume>44</volume>:<page-range>1708&#x2013;18</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/ci0498719</pub-id>
</citation>
</ref>
<ref id="B19">
<label>19</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Raymond</surname> <given-names>JW</given-names>
</name>
<name>
<surname>Blankley</surname> <given-names>CJ</given-names>
</name>
<name>
<surname>Willett</surname> <given-names>P</given-names>
</name>
</person-group>. <article-title>Comparison of chemical clustering methods using graph- and fingerprint-based similarity measures</article-title>. <source>J Mol Graphics Model</source> (<year>2003</year>) <volume>21</volume>:<page-range>421&#x2013;33</page-range>. doi: <pub-id pub-id-type="doi">10.1016/S1093-3263(02)00188-2</pub-id>
</citation>
</ref>
<ref id="B20">
<label>20</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sch&#xfc;ller</surname> <given-names>A</given-names>
</name>
<name>
<surname>Suhartono</surname> <given-names>M</given-names>
</name>
<name>
<surname>Fechner</surname> <given-names>U</given-names>
</name>
<name>
<surname>Tanrikulu</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Breitung</surname> <given-names>S</given-names>
</name>
<name>
<surname>Scheffer</surname> <given-names>U</given-names>
</name>
<etal/>
</person-group>. <article-title>The concept of template-based <italic>de novo</italic> design from drug-derived molecular fragments and its application to TAR RNA</article-title>. <source>J Comput Aided Mol Des</source> (<year>2008</year>) <volume>22</volume>:<fpage>59</fpage>&#x2013;<lpage>68</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10822-007-9157-4</pub-id>
</citation>
</ref>
<ref id="B21">
<label>21</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Grisoni</surname> <given-names>F</given-names>
</name>
<name>
<surname>Merk</surname> <given-names>D</given-names>
</name>
<name>
<surname>Consonni</surname> <given-names>V</given-names>
</name>
<name>
<surname>Hiss</surname> <given-names>JA</given-names>
</name>
<name>
<surname>Tagliabue</surname> <given-names>SG</given-names>
</name>
<name>
<surname>Todeschini</surname> <given-names>R</given-names>
</name>
<etal/>
</person-group>. <article-title>Scaffold hopping from natural products to synthetic mimetics by holistic molecular similarity</article-title>. <source>Commun Chem</source> (<year>2018</year>) <volume>1</volume>:<fpage>44</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s42004-018-0043-x</pub-id>
</citation>
</ref>
<ref id="B22">
<label>22</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>T</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J</given-names>
</name>
<name>
<surname>Qiu</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Rong</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Gong</surname> <given-names>Z</given-names>
</name>
<etal/>
</person-group>. <article-title>A novel scalarized scaffold hopping algorithm with graph-based variational autoencoder for discovery of JAK1 inhibitors</article-title>. <source>ACS Omega</source> (<year>2021</year>) <volume>6</volume>:<page-range>22945&#x2013;54</page-range>. doi: <pub-id pub-id-type="doi">10.1021/acsomega.1c03613</pub-id>
</citation>
</ref>
<ref id="B23">
<label>23</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krueger</surname> <given-names>B</given-names>
</name>
<name>
<surname>Dietrich</surname> <given-names>A</given-names>
</name>
<name>
<surname>Baringhaus</surname> <given-names>K-H</given-names>
</name>
<name>
<surname>Schneider</surname> <given-names>G</given-names>
</name>
</person-group>. <article-title>Scaffold-hopping potential of fragment-based <italic>de novo</italic> design: the chances and limits of variation</article-title>. <source>CCHTS</source> (<year>2009</year>) <volume>12</volume>:<page-range>383&#x2013;96</page-range>. doi: <pub-id pub-id-type="doi">10.2174/138620709788167971</pub-id>
</citation>
</ref>
<ref id="B24">
<label>24</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tan</surname> <given-names>X</given-names>
</name>
<name>
<surname>Li</surname> <given-names>C</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>R</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>S</given-names>
</name>
<name>
<surname>Li</surname> <given-names>F</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X</given-names>
</name>
<etal/>
</person-group>. <article-title>Discovery of pyrazolo[3,4-d]pyridazinone derivatives as selective DDR1 inhibitors via deep learning based design, synthesis, and biological evaluation</article-title>. <source>J Med Chem</source> (<year>2022</year>) <volume>65</volume>:<page-range>103&#x2013;19</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/acs.jmedchem.1c01205</pub-id>
</citation>
</ref>
<ref id="B25">
<label>25</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>W</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>F</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>T</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>X</given-names>
</name>
<etal/>
</person-group>. <article-title>Discovery of novel and selective SIK2 inhibitors by the application of AlphaFold structures and generative models</article-title>. <source>Bioorganic Medicinal Chem</source> (<year>2023</year>) <volume>91</volume>:<fpage>117414</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.bmc.2023.117414</pub-id>
</citation>
</ref>
<ref id="B26">
<label>26</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Erlanson</surname> <given-names>DA</given-names>
</name>
</person-group>. <article-title>Introduction to fragment-based drug discovery</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Davies</surname> <given-names>TG</given-names>
</name>
<name>
<surname>Hyv&#xf6;nen</surname> <given-names>M</given-names>
</name>
</person-group>, editors. <source>Fragment-based drug discovery and X-ray crystallography</source>. <publisher-loc>Berlin, Heidelberg</publisher-loc>: <publisher-name>Springer Berlin Heidelberg</publisher-name> (<year>2011</year>). p. <fpage>1</fpage>&#x2013;<lpage>32</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/128_2011_180</pub-id>
</citation>
</ref>
<ref id="B27">
<label>27</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Turner</surname> <given-names>LD</given-names>
</name>
<name>
<surname>Trinh</surname> <given-names>CH</given-names>
</name>
<name>
<surname>Hubball</surname> <given-names>RA</given-names>
</name>
<name>
<surname>Orritt</surname> <given-names>KM</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>C-C</given-names>
</name>
<name>
<surname>Burns</surname> <given-names>JE</given-names>
</name>
<etal/>
</person-group>. <article-title>From fragment to lead: <italic>de novo</italic> design and development toward a selective FGFR2 inhibitor</article-title>. <source>J Med Chem</source> (<year>2022</year>) <volume>65</volume>:<page-range>1481&#x2013;504</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/acs.jmedchem.1c01163</pub-id>
</citation>
</ref>
<ref id="B28">
<label>28</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Penner</surname> <given-names>P</given-names>
</name>
<name>
<surname>Martiny</surname> <given-names>V</given-names>
</name>
<name>
<surname>Bellmann</surname> <given-names>L</given-names>
</name>
<name>
<surname>Flachsenberg</surname> <given-names>F</given-names>
</name>
<name>
<surname>Gastreich</surname> <given-names>M</given-names>
</name>
<name>
<surname>Theret</surname> <given-names>I</given-names>
</name>
<etal/>
</person-group>. <article-title>FastGrow: on-the-fly growing and its application to DYRK1A</article-title>. <source>J Comput Aided Mol Des</source> (<year>2022</year>) <volume>36</volume>:<page-range>639&#x2013;51</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10822-022-00469-y</pub-id>
</citation>
</ref>
<ref id="B29">
<label>29</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wills</surname> <given-names>S</given-names>
</name>
<name>
<surname>Sanchez-Garcia</surname> <given-names>R</given-names>
</name>
<name>
<surname>Dudgeon</surname> <given-names>T</given-names>
</name>
<name>
<surname>Roughley</surname> <given-names>SD</given-names>
</name>
<name>
<surname>Merritt</surname> <given-names>A</given-names>
</name>
<name>
<surname>Hubbard</surname> <given-names>RE</given-names>
</name>
<etal/>
</person-group>. <article-title>Fragment merging using a graph database samples different catalogue space than similarity search</article-title>. <source>J Chem Inf Model</source> (<year>2023</year>) <volume>63</volume>:<page-range>3423&#x2013;37</page-range>. doi: <pub-id pub-id-type="doi">10.1021/acs.jcim.3c00276</pub-id>
</citation>
</ref>
<ref id="B30">
<label>30</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>D</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>H</given-names>
</name>
<etal/>
</person-group>. <article-title>Generative artificial intelligence and its applications in materials science: Current situation and future perspectives</article-title>. <source>J Materiomics</source> (<year>2023</year>) <volume>9</volume>:<fpage>798</fpage>&#x2013;<lpage>816</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jmat.2023.05.001</pub-id>
</citation>
</ref>
<ref id="B31">
<label>31</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meyenburg</surname> <given-names>C</given-names>
</name>
<name>
<surname>Dolfus</surname> <given-names>U</given-names>
</name>
<name>
<surname>Briem</surname> <given-names>H</given-names>
</name>
<name>
<surname>Rarey</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Galileo: Three-dimensional searching in large combinatorial fragment spaces on the example of pharmacophores</article-title>. <source>J Comput Aided Mol Des</source> (<year>2022</year>) <volume>37</volume>:<fpage>1</fpage>&#x2013;<lpage>16</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10822-022-00485-y</pub-id>
</citation>
</ref>
<ref id="B32">
<label>32</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ishitani</surname> <given-names>R</given-names>
</name>
<name>
<surname>Kataoka</surname> <given-names>T</given-names>
</name>
<name>
<surname>Rikimaru</surname> <given-names>K</given-names>
</name>
</person-group>. <article-title>Molecular design method using a reversible tree representation of chemical compounds and deep reinforcement learning</article-title>. <source>J Chem Inf Model</source> (<year>2022</year>) <volume>62</volume>:<page-range>4032&#x2013;48</page-range>. doi: <pub-id pub-id-type="doi">10.1021/acs.jcim.2c00366</pub-id>
</citation>
</ref>
<ref id="B33">
<label>33</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Graff</surname> <given-names>DE</given-names>
</name>
<name>
<surname>Shakhnovich</surname> <given-names>EI</given-names>
</name>
<name>
<surname>Coley</surname> <given-names>CW</given-names>
</name>
</person-group>. <article-title>Accelerating high-throughput virtual screening through molecular pool-based active learning</article-title>. <source>Chem Sci</source> (<year>2021</year>) <volume>12</volume>:<page-range>7866&#x2013;81</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1039/d0sc06805e</pub-id>
</citation>
</ref>
<ref id="B34">
<label>34</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nigam</surname> <given-names>A</given-names>
</name>
<name>
<surname>Pollice</surname> <given-names>R</given-names>
</name>
<name>
<surname>Krenn</surname> <given-names>M</given-names>
</name>
<name>
<surname>Gomes G dos</surname> <given-names>P</given-names>
</name>
<name>
<surname>Aspuru-Guzik</surname> <given-names>A</given-names>
</name>
</person-group>. <article-title>Beyond generative models: superfast traversal, optimization, novelty, exploration and discovery (STONED) algorithm for molecules using SELFIES</article-title>. <source>Chem Sci</source> (<year>2021</year>) <volume>12</volume>:<page-range>7079&#x2013;90</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1039/d1sc00231g</pub-id>
</citation>
</ref>
<ref id="B35">
<label>35</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Polishchuk</surname> <given-names>P</given-names>
</name>
</person-group>. <article-title>CReM: chemically reasonable mutations framework for structure generation</article-title>. <source>J Cheminformatics</source> (<year>2020</year>) <volume>12</volume>:<fpage>28</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13321-020-00431-w</pub-id>
</citation>
</ref>
<ref id="B36">
<label>36</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Imrie</surname> <given-names>F</given-names>
</name>
<name>
<surname>Bradley</surname> <given-names>AR</given-names>
</name>
<name>
<surname>van der Schaar</surname> <given-names>M</given-names>
</name>
<name>
<surname>Deane</surname> <given-names>CM</given-names>
</name>
</person-group>. <article-title>Deep generative models for 3D linker design</article-title>. <source>J Chem Inf Model</source> (<year>2020</year>) <volume>60</volume>:<page-range>1983&#x2013;95</page-range>. doi: <pub-id pub-id-type="doi">10.1021/acs.jcim.9b01120</pub-id>
</citation>
</ref>
<ref id="B37">
<label>37</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhavoronkov</surname> <given-names>A</given-names>
</name>
<name>
<surname>Ivanenkov</surname> <given-names>YA</given-names>
</name>
<name>
<surname>Aliper</surname> <given-names>A</given-names>
</name>
<name>
<surname>Veselov</surname> <given-names>MS</given-names>
</name>
<name>
<surname>Aladinskiy</surname> <given-names>VA</given-names>
</name>
<name>
<surname>Aladinskaya</surname> <given-names>AV</given-names>
</name>
<etal/>
</person-group>. <article-title>Deep learning enables rapid identification of potent DDR1 kinase inhibitors</article-title>. <source>Nat Biotechnol</source> (<year>2019</year>) <volume>37</volume>:<page-range>1038&#x2013;40</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41587-019-0224-x</pub-id>
</citation>
</ref>
<ref id="B38">
<label>38</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Jin</surname> <given-names>W</given-names>
</name>
<name>
<surname>Barzilay</surname> <given-names>R</given-names>
</name>
<name>
<surname>Jaakkola</surname> <given-names>T</given-names>
</name>
</person-group>. <source>Junction tree variational autoencoder for molecular graph generation</source>. [preprint]. (<year>2019</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1802.04364</pub-id>.</citation>
</ref>
<ref id="B39">
<label>39</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brown</surname> <given-names>N</given-names>
</name>
<name>
<surname>McKay</surname> <given-names>B</given-names>
</name>
<name>
<surname>Gilardoni</surname> <given-names>F</given-names>
</name>
<name>
<surname>Gasteiger</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>A graph-based genetic algorithm and its application to the multiobjective evolution of median molecules</article-title>. <source>J Chem Inf Comput Sci</source> (<year>2004</year>) <volume>44</volume>:<page-range>1079&#x2013;87</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/ci034290p</pub-id>
</citation>
</ref>
<ref id="B40">
<label>40</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vinkers</surname> <given-names>HM</given-names>
</name>
<name>
<surname>de Jonge</surname> <given-names>MR</given-names>
</name>
<name>
<surname>Daeyaert</surname> <given-names>FFD</given-names>
</name>
<name>
<surname>Heeres</surname> <given-names>J</given-names>
</name>
<name>
<surname>Koymans</surname> <given-names>LMH</given-names>
</name>
<name>
<surname>van Lenthe</surname> <given-names>JH</given-names>
</name>
<etal/>
</person-group>. <article-title>SYNOPSIS: SYNthesize and OPtimize system in silico</article-title>. <source>J Med Chem</source> (<year>2003</year>) <volume>46</volume>:<page-range>2765&#x2013;73</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/jm030809x</pub-id>
</citation>
</ref>
<ref id="B41">
<label>41</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Daeyaert</surname> <given-names>F</given-names>
</name>
<name>
<surname>Deem</surname> <given-names>MW</given-names>
</name>
</person-group>. <article-title>A pareto algorithm for efficient <italic>de novo</italic> design of multi-functional molecules</article-title>. <source>Mol Inf</source> (<year>2017</year>) <volume>36</volume>:<fpage>1600044</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/minf.201600044</pub-id>
</citation>
</ref>
<ref id="B42">
<label>42</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wiswesser</surname> <given-names>WJ</given-names>
</name>
</person-group>. <article-title>107 years of line-formula notations (1861-1968)</article-title>. <source>J Chem Doc</source> (<year>1968</year>) <volume>8</volume>:<page-range>146&#x2013;50</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/c160030a007</pub-id>
</citation>
</ref>
<ref id="B43">
<label>43</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Weininger</surname> <given-names>D</given-names>
</name>
</person-group>. <article-title>SMILES, a chemical language and information system. 1. Introduction to Method. encoding rules</article-title>. <source>J Chem Inf Comput Sci</source> (<year>1988</year>) <volume>28</volume>:<page-range>31&#x2013;6</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/ci00057a005</pub-id>
</citation>
</ref>
<ref id="B44">
<label>44</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>O&#x2019;Boyle</surname> <given-names>N</given-names>
</name>
<name>
<surname>Dalke</surname> <given-names>A</given-names>
</name>
</person-group>. <source>DeepSMILES: an adaptation of SMILES for use in machine-learning of chemical structures</source>.  [preprint]. (<year>2018</year>), Chemistry. doi:&#xa0;<pub-id pub-id-type="doi">10.26434/chemrxiv.7097960.v1</pub-id>.</citation>
</ref>
<ref id="B45">
<label>45</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krenn</surname> <given-names>M</given-names>
</name>
<name>
<surname>H&#xe4;se</surname> <given-names>F</given-names>
</name>
<name>
<surname>Nigam</surname> <given-names>A</given-names>
</name>
<name>
<surname>Friederich</surname> <given-names>P</given-names>
</name>
<name>
<surname>Aspuru-Guzik</surname> <given-names>A</given-names>
</name>
</person-group>. <article-title>Self-referencing embedded strings (SELFIES): A 100% robust molecular string representation</article-title>. <source>Mach Learn: Sci Technol</source> (<year>2020</year>) <volume>1</volume>:<fpage>045024</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1088/2632-2153/aba947</pub-id>
</citation>
</ref>
<ref id="B46">
<label>46</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kimber</surname> <given-names>TB</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Volkamer</surname> <given-names>A</given-names>
</name>
</person-group>. <article-title>Deep learning in virtual screening: recent applications and developments</article-title>. <source>IJMS</source> (<year>2021</year>) <volume>22</volume>:<fpage>4435</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/ijms22094435</pub-id>
</citation>
</ref>
<ref id="B47">
<label>47</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Todeschini</surname> <given-names>R</given-names>
</name>
<name>
<surname>Consonni</surname> <given-names>V</given-names>
</name>
</person-group>. <source>Handbook of molecular descriptors. 1st ed</source>. <publisher-loc>Weinheim, Federal Republic of Germany</publisher-loc>: <publisher-name>Wiley</publisher-name>. (<year>2000</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.1002/9783527613106</pub-id>
</citation>
</ref>
<ref id="B48">
<label>48</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cereto-Massagu&#xe9;</surname> <given-names>A</given-names>
</name>
<name>
<surname>Ojeda</surname> <given-names>MJ</given-names>
</name>
<name>
<surname>Valls</surname> <given-names>C</given-names>
</name>
<name>
<surname>Mulero</surname> <given-names>M</given-names>
</name>
<name>
<surname>Garcia-Vallv&#xe9;</surname> <given-names>S</given-names>
</name>
<name>
<surname>Pujadas</surname> <given-names>G</given-names>
</name>
</person-group>. <article-title>Molecular fingerprint similarity search in virtual screening</article-title>. <source>Methods</source> (<year>2015</year>) <volume>71</volume>:<fpage>58</fpage>&#x2013;<lpage>63</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ymeth.2014.08.005</pub-id>
</citation>
</ref>
<ref id="B49">
<label>49</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mendez</surname> <given-names>D</given-names>
</name>
<name>
<surname>Gaulton</surname> <given-names>A</given-names>
</name>
<name>
<surname>Bento</surname> <given-names>AP</given-names>
</name>
<name>
<surname>Chambers</surname> <given-names>J</given-names>
</name>
<name>
<surname>De Veij</surname> <given-names>M</given-names>
</name>
<name>
<surname>F&#xe9;lix</surname> <given-names>E</given-names>
</name>
<etal/>
</person-group>. <article-title>ChEMBL: towards direct deposition of bioassay data</article-title>. <source>Nucleic Acids Res</source> (<year>2019</year>) <volume>47</volume>:<page-range>D930&#x2013;40</page-range>. doi: <pub-id pub-id-type="doi">10.1093/nar/gky1075</pub-id>
</citation>
</ref>
<ref id="B50">
<label>50</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname> <given-names>S</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>J</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>T</given-names>
</name>
<name>
<surname>Gindulyte</surname> <given-names>A</given-names>
</name>
<name>
<surname>He</surname> <given-names>J</given-names>
</name>
<name>
<surname>He</surname> <given-names>S</given-names>
</name>
<etal/>
</person-group>. <article-title>PubChem 2023 update</article-title>. <source>Nucleic Acids Res</source> (<year>2023</year>) <volume>51</volume>:<page-range>D1373&#x2013;80</page-range>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkac956</pub-id>
</citation>
</ref>
<ref id="B51">
<label>51</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gilson</surname> <given-names>MK</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>T</given-names>
</name>
<name>
<surname>Baitaluk</surname> <given-names>M</given-names>
</name>
<name>
<surname>Nicola</surname> <given-names>G</given-names>
</name>
<name>
<surname>Hwang</surname> <given-names>L</given-names>
</name>
<name>
<surname>Chong</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>BindingDB in 2015: A public database for medicinal chemistry, computational chemistry and systems pharmacology</article-title>. <source>Nucleic Acids Res</source> (<year>2016</year>) <volume>44</volume>:<page-range>D1045&#x2013;53</page-range>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkv1072</pub-id>
</citation>
</ref>
<ref id="B52">
<label>52</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wishart</surname> <given-names>DS</given-names>
</name>
<name>
<surname>Knox</surname> <given-names>C</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>AC</given-names>
</name>
<name>
<surname>Shrivastava</surname> <given-names>S</given-names>
</name>
<name>
<surname>Hassanali</surname> <given-names>M</given-names>
</name>
<name>
<surname>Stothard</surname> <given-names>P</given-names>
</name>
<etal/>
</person-group>. <article-title>DrugBank: a comprehensive resource for in silico drug discovery and exploration</article-title>. <source>Nucleic Acids Res</source> (<year>2006</year>) <volume>34</volume>:<page-range>D668&#x2013;672</page-range>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkj067</pub-id>
</citation>
</ref>
<ref id="B53">
<label>53</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pence</surname> <given-names>HE</given-names>
</name>
<name>
<surname>Williams</surname> <given-names>A</given-names>
</name>
</person-group>. <article-title>ChemSpider: an online chemical information resource</article-title>. <source>J Chem Educ</source> (<year>2010</year>) <volume>87</volume>:<page-range>1123&#x2013;4</page-range>. doi: <pub-id pub-id-type="doi">10.1021/ed100697w</pub-id>
</citation>
</ref>
<ref id="B54">
<label>54</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Williams</surname> <given-names>AJ</given-names>
</name>
<name>
<surname>Grulke</surname> <given-names>CM</given-names>
</name>
<name>
<surname>Edwards</surname> <given-names>J</given-names>
</name>
<name>
<surname>McEachran</surname> <given-names>AD</given-names>
</name>
<name>
<surname>Mansouri</surname> <given-names>K</given-names>
</name>
<name>
<surname>Baker</surname> <given-names>NC</given-names>
</name>
<etal/>
</person-group>. <article-title>The CompTox Chemistry Dashboard: a community data resource for environmental chemistry</article-title>. <source>J Cheminform</source> (<year>2017</year>) <volume>9</volume>:<fpage>61</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13321-017-0247-6</pub-id>
</citation>
</ref>
<ref id="B55">
<label>55</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ruddigkeit</surname> <given-names>L</given-names>
</name>
<name>
<surname>van Deursen</surname> <given-names>R</given-names>
</name>
<name>
<surname>Blum</surname> <given-names>LC</given-names>
</name>
<name>
<surname>Reymond</surname> <given-names>J-L</given-names>
</name>
</person-group>. <article-title>Enumeration of 166 billion organic small molecules in the chemical universe database GDB-17</article-title>. <source>J Chem Inf Model</source> (<year>2012</year>) <volume>52</volume>:<page-range>2864&#x2013;75</page-range>. doi: <pub-id pub-id-type="doi">10.1021/ci300415d</pub-id>
</citation>
</ref>
<ref id="B56">
<label>56</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ramakrishnan</surname> <given-names>R</given-names>
</name>
<name>
<surname>Dral</surname> <given-names>PO</given-names>
</name>
<name>
<surname>Rupp</surname> <given-names>M</given-names>
</name>
<name>
<surname>von Lilienfeld</surname> <given-names>OA</given-names>
</name>
</person-group>. <article-title>Quantum chemistry structures and properties of 134 kilo molecules</article-title>. <source>Sci. Data</source> (<year>2014</year>) <volume>1</volume>:<fpage>140022</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/sdata.2014.22</pub-id>
</citation>
</ref>
<ref id="B57">
<label>57</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tingle</surname> <given-names>BI</given-names>
</name>
<name>
<surname>Tang</surname> <given-names>KG</given-names>
</name>
<name>
<surname>Castanon</surname> <given-names>M</given-names>
</name>
<name>
<surname>Gutierrez</surname> <given-names>JJ</given-names>
</name>
<name>
<surname>Khurelbaatar</surname> <given-names>M</given-names>
</name>
<name>
<surname>Dandarchuluun</surname> <given-names>C</given-names>
</name>
<etal/>
</person-group>. <article-title>ZINC-22&#x2500;A free multi-billion-scale database of tangible compounds for ligand discovery</article-title>. <source>J Chem Inf Model</source> (<year>2023</year>). doi: <pub-id pub-id-type="doi">10.26434/chemrxiv-2022-82czl</pub-id>
</citation>
</ref>
<ref id="B58">
<label>58</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Berman</surname> <given-names>HM</given-names>
</name>
</person-group>. <article-title>The protein data bank</article-title>. <source>Nucleic Acids Res</source> (<year>2000</year>) <volume>28</volume>:<page-range>235&#x2013;42</page-range>. doi: <pub-id pub-id-type="doi">10.1093/nar/28.1.235</pub-id>
</citation>
</ref>
<ref id="B59">
<label>59</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<collab>The UniProt Consortium</collab>
<name>
<surname>Bateman</surname> <given-names>A</given-names>
</name>
<name>
<surname>Martin</surname> <given-names>M-J</given-names>
</name>
<name>
<surname>Orchard</surname> <given-names>S</given-names>
</name>
<name>
<surname>Magrane</surname> <given-names>M</given-names>
</name>
<name>
<surname>Ahmad</surname> <given-names>S</given-names>
</name>
<etal/>
</person-group>. <article-title>UniProt: the universal protein knowledgebase in 2023</article-title>. <source>Nucleic Acids Res</source> (<year>2023</year>) <volume>51</volume>:<page-range>D523&#x2013;31</page-range>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkac1052</pub-id>
</citation>
</ref>
<ref id="B60">
<label>60</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>D</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>X</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>X</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>Y</given-names>
</name>
<etal/>
</person-group>. <article-title>TTD: Therapeutic Target Database describing target druggability information</article-title>. <source>Nucleic Acids Res</source> (<year>2024</year>) <volume>52</volume>(<issue>D1</issue>):<page-range>D1465&#x2013;D1477</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gkad751</pub-id>
</citation>
</ref>
<ref id="B61">
<label>61</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Olivecrona</surname> <given-names>M</given-names>
</name>
<name>
<surname>Blaschke</surname> <given-names>T</given-names>
</name>
<name>
<surname>Engkvist</surname> <given-names>O</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>H</given-names>
</name>
</person-group>. <article-title>Molecular <italic>de-novo</italic> design through deep reinforcement learning</article-title>. <source>J Cheminform</source> (<year>2017</year>) <volume>9</volume>:<fpage>48</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13321-017-0235-x</pub-id>
</citation>
</ref>
<ref id="B62">
<label>62</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>&#x160;&#xed;cho</surname> <given-names>M</given-names>
</name>
<name>
<surname>Luukkonen</surname> <given-names>S</given-names>
</name>
<name>
<surname>Van Den Maagdenberg</surname> <given-names>HW</given-names>
</name>
<name>
<surname>Schoenmaker</surname> <given-names>L</given-names>
</name>
<name>
<surname>B&#xe9;quignon</surname> <given-names>OJM</given-names>
</name>
<name>
<surname>Van Westen</surname> <given-names>GJP</given-names>
</name>
</person-group>. <article-title>DrugEx: deep learning models and tools for exploration of drug-like chemical space</article-title>. <source>J Chem Inf Model</source> (<year>2023</year>) <volume>63</volume>:<fpage>3629</fpage>&#x2013;<lpage>3636</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/acs.jcim.3c00434</pub-id>
</citation>
</ref>
<ref id="B63">
<label>63</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>X</given-names>
</name>
<name>
<surname>Ye</surname> <given-names>K</given-names>
</name>
<name>
<surname>Van Vlijmen</surname> <given-names>HWT</given-names>
</name>
<name>
<surname>Emmerich</surname> <given-names>MTM</given-names>
</name>
<name>
<surname>IJzerman</surname> <given-names>AP</given-names>
</name>
<name>
<surname>Van Westen</surname> <given-names>GJP</given-names>
</name>
</person-group>. <article-title>DrugEx v2: <italic>de novo</italic> design of drug molecules by Pareto-based multi-objective reinforcement learning in polypharmacology</article-title>. <source>J Cheminform</source> (<year>2021</year>) <volume>13</volume>:<fpage>85</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13321-021-00561-9</pub-id>
</citation>
</ref>
<ref id="B64">
<label>64</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Drot&#xe1;r</surname> <given-names>P</given-names>
</name>
<name>
<surname>Jamasb</surname> <given-names>AR</given-names>
</name>
<name>
<surname>Day</surname> <given-names>B</given-names>
</name>
<name>
<surname>Cangea</surname> <given-names>C</given-names>
</name>
<name>
<surname>Li&#xf2;</surname> <given-names>P</given-names>
</name>
</person-group>. <source>Structure-aware generation of drug-like molecules</source>. [preprint]. (<year>2021</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2111.04107</pub-id>
</citation>
</ref>
<ref id="B65">
<label>65</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shen</surname> <given-names>C</given-names>
</name>
<name>
<surname>Krenn</surname> <given-names>M</given-names>
</name>
<name>
<surname>Eppel</surname> <given-names>S</given-names>
</name>
<name>
<surname>Aspuru-Guzik</surname> <given-names>A</given-names>
</name>
</person-group>. <article-title>Deep molecular dreaming: inverse machine learning for <italic>de-novo</italic> molecular design and interpretability with surjective representations</article-title>. <source>Mach Learn: Sci Technol</source> (<year>2021</year>) <volume>2</volume>:<fpage>11</fpage>. doi: <pub-id pub-id-type="doi">10.1088/2632-2153/ac09d6</pub-id>
</citation>
</ref>
<ref id="B66">
<label>66</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Sanchez-Lengeling</surname> <given-names>B</given-names>
</name>
<name>
<surname>Outeiral</surname> <given-names>C</given-names>
</name>
<name>
<surname>Guimaraes</surname> <given-names>GL</given-names>
</name>
<name>
<surname>Aspuru-Guzik</surname> <given-names>A</given-names>
</name>
</person-group>. <source>Optimizing distributions over molecular space. An Objective-Reinforced Generative Adversarial Network for Inverse-design Chemistry (ORGANIC)</source>. [preprint]. (<year>2017</year>), Chemistry. doi:&#xa0;<pub-id pub-id-type="doi">10.26434/chemrxiv.5309668.v3</pub-id>
</citation>
</ref>
<ref id="B67">
<label>67</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Putin</surname> <given-names>E</given-names>
</name>
<name>
<surname>Asadulaev</surname> <given-names>A</given-names>
</name>
<name>
<surname>Vanhaelen</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Ivanenkov</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Aladinskaya</surname> <given-names>AV</given-names>
</name>
<name>
<surname>Aliper</surname> <given-names>A</given-names>
</name>
<etal/>
</person-group>. <article-title>Adversarial threshold neural computer for molecular <italic>de novo</italic> design</article-title>. <source>Mol. Pharmaceutics</source> (<year>2018</year>) <volume>15</volume>:<page-range>4386&#x2013;97</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/acs.molpharmaceut.7b01137</pub-id>
</citation>
</ref>
<ref id="B68">
<label>68</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>De Cao</surname> <given-names>N</given-names>
</name>
<name>
<surname>Kipf</surname> <given-names>T</given-names>
</name>
</person-group>. <source>MolGAN: An implicit generative model for small molecular graphs</source>. (<year>2018</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1805.11973</pub-id>
</citation>
</ref>
<ref id="B69">
<label>69</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maziarka</surname> <given-names>&#x141;</given-names>
</name>
<name>
<surname>Pocha</surname> <given-names>A</given-names>
</name>
<name>
<surname>Kaczmarczyk</surname> <given-names>J</given-names>
</name>
<name>
<surname>Rataj</surname> <given-names>K</given-names>
</name>
<name>
<surname>Danel</surname> <given-names>T</given-names>
</name>
<name>
<surname>Warcho&#x142;</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Mol-CycleGAN: a generative model for molecular optimization</article-title>. <source>J Cheminform</source> (<year>2020</year>) <volume>12</volume>:<fpage>2</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13321-019-0404-1</pub-id>
</citation>
</ref>
<ref id="B70">
<label>70</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jacobs</surname> <given-names>I</given-names>
</name>
<name>
<surname>Maragoudakis</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title><italic>De novo</italic> drug design using artificial intelligence applied on SARS-CoV-2 viral proteins ASYNT-GAN</article-title>. <source>BioChem</source> (<year>2021</year>) <volume>1</volume>:<fpage>36</fpage>&#x2013;<lpage>48</lpage>. doi: <pub-id pub-id-type="doi">10.3390/biochem1010004</pub-id>
</citation>
</ref>
<ref id="B71">
<label>71</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bai</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Tan</surname> <given-names>S</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>T</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>H</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>J</given-names>
</name>
<name>
<surname>Yao</surname> <given-names>X</given-names>
</name>
</person-group>. <article-title>MolAICal: a soft tool for 3D drug design of protein targets by artificial intelligence and classical algorithm</article-title>. <source>Briefings Bioinf</source> (<year>2021</year>) <volume>22</volume>:<fpage>bbaa161</fpage>. doi: <pub-id pub-id-type="doi">10.1093/bib/bbaa161</pub-id>
</citation>
</ref>
<ref id="B72">
<label>72</label>
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Ahmad</surname> <given-names>W</given-names>
</name>
<name>
<surname>Simon</surname> <given-names>E</given-names>
</name>
<name>
<surname>Chithrananda</surname> <given-names>S</given-names>
</name>
<name>
<surname>Grand</surname> <given-names>G</given-names>
</name>
<name>
<surname>Ramsundar</surname> <given-names>B</given-names>
</name>
</person-group>. <source>ChemBERTa-2: towards chemical foundation models</source> (<year>2022</year>). Available at: <uri xlink:href="http://arxiv.org/abs/2209.01712">http://arxiv.org/abs/2209.01712</uri> (Accessed <access-date>September 16, 2023</access-date>).</citation>
</ref>
<ref id="B73">
<label>73</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>He</surname> <given-names>J</given-names>
</name>
<name>
<surname>You</surname> <given-names>H</given-names>
</name>
<name>
<surname>Sandstr&#xf6;m</surname> <given-names>E</given-names>
</name>
<name>
<surname>Nittinger</surname> <given-names>E</given-names>
</name>
<name>
<surname>Bjerrum</surname> <given-names>EJ</given-names>
</name>
<name>
<surname>Tyrchan</surname> <given-names>C</given-names>
</name>
<etal/>
</person-group>. <article-title>Molecular optimization by capturing chemist&#x2019;s intuition using deep neural networks</article-title>. <source>J Cheminform</source> (<year>2021</year>) <volume>13</volume>:<fpage>26</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13321-021-00497-0</pub-id>
</citation>
</ref>
<ref id="B74">
<label>74</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tysinger</surname> <given-names>EP</given-names>
</name>
<name>
<surname>Rai</surname> <given-names>BK</given-names>
</name>
<name>
<surname>Sinitskiy</surname> <given-names>AV</given-names>
</name>
</person-group>. <article-title>Can we quickly learn to &#x201c;Translate&#x201d; Bioactive molecules with transformer models</article-title>? <source>J Chem Inf Model</source> (<year>2023</year>) <volume>63</volume>:<page-range>1734&#x2013;44</page-range>. doi: <pub-id pub-id-type="doi">10.1021/acs.jcim.2c01618</pub-id>
</citation>
</ref>
<ref id="B75">
<label>75</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Degen</surname> <given-names>J</given-names>
</name>
<name>
<surname>Wegscheid-Gerlach</surname> <given-names>C</given-names>
</name>
<name>
<surname>Zaliani</surname> <given-names>A</given-names>
</name>
<name>
<surname>Rarey</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>On the art of compiling and using &#x201c;Drug-like&#x201d; Chemical fragment spaces</article-title>. <source>ChemMedChem</source> (<year>2008</year>) <volume>3</volume>:<page-range>1503&#x2013;7</page-range>. doi: <pub-id pub-id-type="doi">10.1002/cmdc.200800178</pub-id>
</citation>
</ref>
<ref id="B76">
<label>76</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lewell</surname> <given-names>XQ</given-names>
</name>
<name>
<surname>Judd</surname> <given-names>DB</given-names>
</name>
<name>
<surname>Watson</surname> <given-names>SP</given-names>
</name>
<name>
<surname>Hann</surname> <given-names>MM</given-names>
</name>
</person-group>. <article-title>RECAP retrosynthetic combinatorial analysis procedure:&#x2009; A powerful new technique for identifying privileged molecular fragments with useful applications in combinatorial chemistry</article-title>. <source>J Chem Inf Comput Sci</source> (<year>1998</year>) <volume>38</volume>:<page-range>511&#x2013;22</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/ci970429i</pub-id>
</citation>
</ref>
<ref id="B77">
<label>77</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cramer</surname> <given-names>RD</given-names>
</name>
<name>
<surname>Soltanshahi</surname> <given-names>F</given-names>
</name>
<name>
<surname>Jilek</surname> <given-names>R</given-names>
</name>
<name>
<surname>Campbell</surname> <given-names>B</given-names>
</name>
</person-group>. <article-title>AllChem: generating and searching 1020 synthetically accessible structures</article-title>. <source>J Comput Aided Mol Des</source> (<year>2007</year>) <volume>21</volume>:<page-range>341&#x2013;50</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10822-006-9093-8</pub-id>
</citation>
</ref>
<ref id="B78">
<label>78</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hartenfeller</surname> <given-names>M</given-names>
</name>
<name>
<surname>Zettl</surname> <given-names>H</given-names>
</name>
<name>
<surname>Walter</surname> <given-names>M</given-names>
</name>
<name>
<surname>Rupp</surname> <given-names>M</given-names>
</name>
<name>
<surname>Reisen</surname> <given-names>F</given-names>
</name>
<name>
<surname>Proschak</surname> <given-names>E</given-names>
</name>
<etal/>
</person-group>. <article-title>DOGS: Reaction-Driven <italic>de novo</italic> Design of Bioactive Compounds</article-title>. <source>PloS Comput Biol</source> (<year>2012</year>) <volume>8</volume>:<elocation-id>e1002380</elocation-id>. doi: <pub-id pub-id-type="doi">10.1371/journal.pcbi.1002380</pub-id>
</citation>
</ref>
<ref id="B79">
<label>79</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zabolotna</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Volochnyuk</surname> <given-names>DM</given-names>
</name>
<name>
<surname>Ryabukhin</surname> <given-names>SV</given-names>
</name>
<name>
<surname>Gavrylenko</surname> <given-names>K</given-names>
</name>
<name>
<surname>Horvath</surname> <given-names>D</given-names>
</name>
<name>
<surname>Klimchuk</surname> <given-names>O</given-names>
</name>
<etal/>
</person-group>. <article-title>SynthI: A new open-source tool for synthon-based library design</article-title>. <source>J Chem Inf Model</source> (<year>2022</year>) <volume>62</volume>:<page-range>2151&#x2013;63</page-range>. doi: <pub-id pub-id-type="doi">10.1021/acs.jcim.1c00754</pub-id>
</citation>
</ref>
<ref id="B80">
<label>80</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hoffmann</surname> <given-names>T</given-names>
</name>
<name>
<surname>Gastreich</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>The next level in chemical space navigation: going far beyond enumerable compound libraries</article-title>. <source>Drug Discovery Today</source> (<year>2019</year>) <volume>24</volume>:<page-range>1148&#x2013;56</page-range>. doi: <pub-id pub-id-type="doi">10.1016/j.drudis.2019.02.013</pub-id>
</citation>
</ref>
<ref id="B81">
<label>81</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alnammi</surname> <given-names>M</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>S</given-names>
</name>
<name>
<surname>Ericksen</surname> <given-names>SS</given-names>
</name>
<name>
<surname>Ananiev</surname> <given-names>GE</given-names>
</name>
<name>
<surname>Voter</surname> <given-names>AF</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>S</given-names>
</name>
<etal/>
</person-group>. <article-title>Evaluating scalable supervised learning for synthesize-on-demand chemical libraries</article-title>. <source>J Chem Inf Model</source> (<year>2023</year>) <volume>63</volume>(<issue>17</issue>):<page-range>5513&#x2013;28</page-range>. doi: <pub-id pub-id-type="doi">10.1021/acs.jcim.3c00912</pub-id>
</citation>
</ref>
<ref id="B82">
<label>82</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Leach</surname> <given-names>AG</given-names>
</name>
<name>
<surname>Jones</surname> <given-names>HD</given-names>
</name>
<name>
<surname>Cosgrove</surname> <given-names>DA</given-names>
</name>
<name>
<surname>Kenny</surname> <given-names>PW</given-names>
</name>
<name>
<surname>Ruston</surname> <given-names>L</given-names>
</name>
<name>
<surname>MacFaul</surname> <given-names>P</given-names>
</name>
<etal/>
</person-group>. <article-title>Matched molecular pairs as a guide in the optimization of pharmaceutical properties; a study of aqueous solubility, plasma protein binding and oral exposure</article-title>. <source>J Med Chem</source> (<year>2006</year>) <volume>49</volume>:<page-range>6672&#x2013;82</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/jm0605233</pub-id>
</citation>
</ref>
<ref id="B83">
<label>83</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>S</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>L</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>A</given-names>
</name>
<name>
<surname>Hou</surname> <given-names>T</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>D</given-names>
</name>
</person-group>. <article-title>Matched molecular pair analysis in drug discovery: methods and recent applications</article-title>. <source>J Med Chem</source> (<year>2023</year>) <volume>66</volume>:<page-range>4361&#x2013;77</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/acs.jmedchem.2c01787</pub-id>
</citation>
</ref>
<ref id="B84">
<label>84</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cucurull-Sanchez</surname> <given-names>L</given-names>
</name>
</person-group>. <article-title>Successful identification of key chemical structure modifications that lead to improved ADME profiles</article-title>. <source>J Comput Aided Mol Des</source> (<year>2010</year>) <volume>24</volume>:<page-range>449&#x2013;58</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10822-010-9361-5</pub-id>
</citation>
</ref>
<ref id="B85">
<label>85</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dossetter</surname> <given-names>AG</given-names>
</name>
<name>
<surname>Douglas</surname> <given-names>A</given-names>
</name>
<name>
<surname>O&#x2019;Donnell</surname> <given-names>C</given-names>
</name>
</person-group>. <article-title>A matched molecular pair analysis of <italic>in vitro</italic> human microsomal metabolic stability measurements for heterocyclic replacements of di-substituted benzene containing compounds &#x2013; identification of those isosteres more likely to have beneficial effects</article-title>. <source>Med Chem Commun</source> (<year>2012</year>) <volume>3</volume>:<fpage>1164</fpage>. doi: <pub-id pub-id-type="doi">10.1039/c2md20155k</pub-id>
</citation>
</ref>
<ref id="B86">
<label>86</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jensen</surname> <given-names>JH</given-names>
</name>
</person-group>. <article-title>A graph-based genetic algorithm and generative model/Monte Carlo tree search for the exploration of chemical space</article-title>. <source>Chem Sci</source> (<year>2019</year>) <volume>10</volume>:<page-range>3567&#x2013;72</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1039/c8sc05372c</pub-id>
</citation>
</ref>
<ref id="B87">
<label>87</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Leguy</surname> <given-names>J</given-names>
</name>
<name>
<surname>Cauchy</surname> <given-names>T</given-names>
</name>
<name>
<surname>Glavatskikh</surname> <given-names>M</given-names>
</name>
<name>
<surname>Duval</surname> <given-names>B</given-names>
</name>
<name>
<surname>Da Mota</surname> <given-names>B</given-names>
</name>
</person-group>. <article-title>EvoMol: a flexible and interpretable evolutionary algorithm for unbiased <italic>de novo</italic> molecular generation</article-title>. <source>J Cheminform</source> (<year>2020</year>) <volume>12</volume>:<fpage>55</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13321-020-00458-z</pub-id>
</citation>
</ref>
<ref id="B88">
<label>88</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname> <given-names>J</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>D</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Uncertainty quantification: Can we trust artificial intelligence in drug discovery</article-title>? <source>iScience</source> (<year>2022</year>) <volume>25</volume>:<fpage>104814</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.isci.2022.104814</pub-id>
</citation>
</ref>
<ref id="B89">
<label>89</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Van Tilborg</surname> <given-names>D</given-names>
</name>
<name>
<surname>Grisoni</surname> <given-names>F</given-names>
</name>
</person-group>. <article-title>Traversing chemical space with active deep learning</article-title>. <source>[preprint] Chem</source> (<year>2023</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.26434/chemrxiv-2023-wgl32</pub-id>
</citation>
</ref>
<ref id="B90">
<label>90</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reker</surname> <given-names>D</given-names>
</name>
</person-group>. <article-title>Practical considerations for active machine learning in drug discovery</article-title>. <source>Drug Discovery Today: Technol</source> (<year>2019</year>) <volume>32-33</volume>:<page-range>73&#x2013;9</page-range>. doi: <pub-id pub-id-type="doi">10.1016/j.ddtec.2020.06.001</pub-id>
</citation>
</ref>
<ref id="B91">
<label>91</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reker</surname> <given-names>D</given-names>
</name>
<name>
<surname>Schneider</surname> <given-names>G</given-names>
</name>
</person-group>. <article-title>Active-learning strategies in computer-assisted drug discovery</article-title>. <source>Drug Discovery Today</source> (<year>2015</year>) <volume>20</volume>:<page-range>458&#x2013;65</page-range>. doi: <pub-id pub-id-type="doi">10.1016/j.drudis.2014.12.004</pub-id>
</citation>
</ref>
<ref id="B92">
<label>92</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lipinski</surname> <given-names>CA</given-names>
</name>
</person-group>. <article-title>Rule of five in 2015 and beyond: Target and ligand structural limitations, ligand chemistry structure and drug discovery project decisions</article-title>. <source>Adv Drug Deliv Rev</source> (<year>2016</year>) <volume>101</volume>:<fpage>34</fpage>&#x2013;<lpage>41</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.addr.2016.04.029</pub-id>
</citation>
</ref>
<ref id="B93">
<label>93</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bickerton</surname> <given-names>GR</given-names>
</name>
<name>
<surname>Paolini</surname> <given-names>GV</given-names>
</name>
<name>
<surname>Besnard</surname> <given-names>J</given-names>
</name>
<name>
<surname>Muresan</surname> <given-names>S</given-names>
</name>
<name>
<surname>Hopkins</surname> <given-names>AL</given-names>
</name>
</person-group>. <article-title>Quantifying the chemical beauty of drugs</article-title>. <source>Nat Chem</source> (<year>2012</year>) <volume>4</volume>:<page-range>90&#x2013;8</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nchem.1243</pub-id>
</citation>
</ref>
<ref id="B94">
<label>94</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baell</surname> <given-names>JB</given-names>
</name>
<name>
<surname>Holloway</surname> <given-names>GA</given-names>
</name>
</person-group>. <article-title>New substructure filters for removal of pan assay interference compounds (PAINS) from screening libraries and for their exclusion in bioassays</article-title>. <source>J Med Chem</source> (<year>2010</year>) <volume>53</volume>:<page-range>2719&#x2013;40</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/jm901137j</pub-id>
</citation>
</ref>
<ref id="B95">
<label>95</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brenk</surname> <given-names>R</given-names>
</name>
<name>
<surname>Schipani</surname> <given-names>A</given-names>
</name>
<name>
<surname>James</surname> <given-names>D</given-names>
</name>
<name>
<surname>Krasowski</surname> <given-names>A</given-names>
</name>
<name>
<surname>Gilbert</surname> <given-names>I</given-names>
</name>
<name>
<surname>Frearson</surname> <given-names>J</given-names>
</name>
<etal/>
</person-group>. <article-title>Lessons learnt from assembling screening libraries for drug discovery for neglected diseases</article-title>. <source>ChemMedChem</source> (<year>2008</year>) <volume>3</volume>:<page-range>435&#x2013;44</page-range>. doi: <pub-id pub-id-type="doi">10.1002/cmdc.200700139</pub-id>
</citation>
</ref>
<ref id="B96">
<label>96</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ertl</surname> <given-names>P</given-names>
</name>
<name>
<surname>Schuffenhauer</surname> <given-names>A</given-names>
</name>
</person-group>. <article-title>Estimation of synthetic accessibility score of drug-like molecules based on molecular complexity and fragment contributions</article-title>. <source>J Cheminform</source> (<year>2009</year>) <volume>1</volume>:<fpage>8</fpage>. doi: <pub-id pub-id-type="doi">10.1186/1758-2946-1-8</pub-id>
</citation>
</ref>
<ref id="B97">
<label>97</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname> <given-names>W</given-names>
</name>
<name>
<surname>Coley</surname> <given-names>CW</given-names>
</name>
</person-group>. <article-title>The synthesizability of molecules proposed by generative models</article-title>. <source>J Chem Inf Model</source> (<year>2020</year>) <volume>60</volume>:<page-range>5714&#x2013;23</page-range>. doi: <pub-id pub-id-type="doi">10.1021/acs.jcim.0c00174</pub-id>
</citation>
</ref>
<ref id="B98">
<label>98</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Coley</surname> <given-names>CW</given-names>
</name>
<name>
<surname>Thomas</surname> <given-names>DA</given-names>
</name>
<name>
<surname>Lummiss</surname> <given-names>JAM</given-names>
</name>
<name>
<surname>Jaworski</surname> <given-names>JN</given-names>
</name>
<name>
<surname>Breen</surname> <given-names>CP</given-names>
</name>
<name>
<surname>Schultz</surname> <given-names>V</given-names>
</name>
<etal/>
</person-group>. <article-title>A robotic platform for flow synthesis of organic compounds informed by AI planning</article-title>. <source>Science</source> (<year>2019</year>) <volume>365</volume>:<elocation-id>eaax1566</elocation-id>. doi: <pub-id pub-id-type="doi">10.1126/science.aax1566</pub-id>
</citation>
</ref>
<ref id="B99">
<label>99</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Genheden</surname> <given-names>S</given-names>
</name>
<name>
<surname>Thakkar</surname> <given-names>A</given-names>
</name>
<name>
<surname>Chadimov&#xe1;</surname> <given-names>V</given-names>
</name>
<name>
<surname>Reymond</surname> <given-names>J-L</given-names>
</name>
<name>
<surname>Engkvist</surname> <given-names>O</given-names>
</name>
<name>
<surname>Bjerrum</surname> <given-names>E</given-names>
</name>
</person-group>. <article-title>AiZynthFinder: a fast, robust and flexible open-source software for retrosynthetic planning</article-title>. <source>J Cheminform</source> (<year>2020</year>) <volume>12</volume>:<fpage>70</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13321-020-00472-1</pub-id>
</citation>
</ref>
<ref id="B100">
<label>100</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>Z-Y</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Z-J</given-names>
</name>
<name>
<surname>He</surname> <given-names>J-H</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>A-P</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>S</given-names>
</name>
<name>
<surname>Hou</surname> <given-names>T-J</given-names>
</name>
<etal/>
</person-group>. <article-title>Benchmarking the mechanisms of frequent hitters: limitation of PAINS alerts</article-title>. <source>Drug Discovery Today</source> (<year>2021</year>) <volume>26</volume>:<page-range>1353&#x2013;8</page-range>. doi: <pub-id pub-id-type="doi">10.1016/j.drudis.2021.02.003</pub-id>
</citation>
</ref>
<ref id="B101">
<label>101</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Toropov</surname> <given-names>AA</given-names>
</name>
<name>
<surname>Toropova</surname> <given-names>AP</given-names>
</name>
</person-group>. <article-title>QSPR/QSAR: state-of-art, weirdness, the future</article-title>. <source>Molecules</source> (<year>2020</year>) <volume>25</volume>:<fpage>1292</fpage>. doi: <pub-id pub-id-type="doi">10.3390/molecules25061292</pub-id>
</citation>
</ref>
<ref id="B102">
<label>102</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ballabio</surname> <given-names>D</given-names>
</name>
<name>
<surname>Grisoni</surname> <given-names>F</given-names>
</name>
<name>
<surname>Consonni</surname> <given-names>V</given-names>
</name>
<name>
<surname>Todeschini</surname> <given-names>R</given-names>
</name>
</person-group>. <article-title>Integrated QSAR models to predict acute oral systemic toxicity</article-title>. <source>Mol. Inf</source> (<year>2019</year>) <volume>38</volume>:<fpage>1800124</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/minf.201800124</pub-id>
</citation>
</ref>
<ref id="B103">
<label>103</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Palmeira</surname> <given-names>A</given-names>
</name>
<name>
<surname>Rodrigues</surname> <given-names>F</given-names>
</name>
<name>
<surname>Sousa</surname> <given-names>E</given-names>
</name>
<name>
<surname>Pinto</surname> <given-names>M</given-names>
</name>
<name>
<surname>Vasconcelos</surname> <given-names>MH</given-names>
</name>
<name>
<surname>Fernandes</surname> <given-names>MX</given-names>
</name>
</person-group>. <article-title>New uses for old drugs: pharmacophore-based screening for the discovery of P-glycoprotein inhibitors: pharmacophore-based screening for the discovery of P-glycoprotein inhibitors</article-title>. <source>Chem. Biol. Drug Design</source> (<year>2011</year>) <volume>78</volume>:<fpage>57</fpage>&#x2013;<lpage>72</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/j.1747-0285.2011.01089.x</pub-id>
</citation>
</ref>
<ref id="B104">
<label>104</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mousa</surname> <given-names>LA</given-names>
</name>
<name>
<surname>Hatmal</surname> <given-names>MM</given-names>
</name>
<name>
<surname>Taha</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Exploiting activity cliffs for building pharmacophore models and comparison with other pharmacophore generation methods: sphingosine kinase 1 as case study</article-title>. <source>J Comput Aided Mol Des</source> (<year>2022</year>) <volume>36</volume>:<fpage>39</fpage>&#x2013;<lpage>62</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10822-021-00435-0</pub-id>
</citation>
</ref>
<ref id="B105">
<label>105</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Riniker</surname> <given-names>S</given-names>
</name>
<name>
<surname>Landrum</surname> <given-names>GA</given-names>
</name>
</person-group>. <article-title>Open-source platform to benchmark fingerprints for ligand-based virtual screening</article-title>. <source>J Cheminform</source> (<year>2013</year>) <volume>5</volume>:<fpage>26</fpage>. doi: <pub-id pub-id-type="doi">10.1186/1758-2946-5-26</pub-id>
</citation>
</ref>
<ref id="B106">
<label>106</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kuhlman</surname> <given-names>B</given-names>
</name>
<name>
<surname>Bradley</surname> <given-names>P</given-names>
</name>
</person-group>. <article-title>Advances in protein structure prediction and design</article-title>. <source>Nat Rev Mol Cell Biol</source> (<year>2019</year>) <volume>20</volume>:<page-range>681&#x2013;97</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41580-019-0163-x</pub-id>
</citation>
</ref>
<ref id="B107">
<label>107</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jumper</surname> <given-names>J</given-names>
</name>
<name>
<surname>Evans</surname> <given-names>R</given-names>
</name>
<name>
<surname>Pritzel</surname> <given-names>A</given-names>
</name>
<name>
<surname>Green</surname> <given-names>T</given-names>
</name>
<name>
<surname>Figurnov</surname> <given-names>M</given-names>
</name>
<name>
<surname>Ronneberger</surname> <given-names>O</given-names>
</name>
<etal/>
</person-group>. <article-title>Highly accurate protein structure prediction with AlphaFold</article-title>. <source>Nature</source> (<year>2021</year>) <volume>596</volume>:<page-range>583&#x2013;9</page-range>. doi: <pub-id pub-id-type="doi">10.1038/s41586-021-03819-2</pub-id>
</citation>
</ref>
<ref id="B108">
<label>108</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Caballero</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>The latest automated docking technologies for novel drug discovery</article-title>. <source>Expert Opin. Drug Discov</source> (<year>2021</year>) <volume>16</volume>:<page-range>625&#x2013;45</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/17460441.2021.1858793</pub-id>
</citation>
</ref>
<ref id="B109">
<label>109</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Desaphy</surname> <given-names>J</given-names>
</name>
<name>
<surname>Raimbaud</surname> <given-names>E</given-names>
</name>
<name>
<surname>Ducrot</surname> <given-names>P</given-names>
</name>
<name>
<surname>Rognan</surname> <given-names>D</given-names>
</name>
</person-group>. <article-title>Encoding protein&#x2013;ligand interaction patterns in fingerprints and graphs</article-title>. <source>J Chem Inf Model</source> (<year>2013</year>) <volume>53</volume>:<page-range>623&#x2013;37</page-range>. doi: <pub-id pub-id-type="doi">10.1021/ci300566n</pub-id>
</citation>
</ref>
<ref id="B110">
<label>110</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Renner</surname> <given-names>S</given-names>
</name>
<name>
<surname>Derksen</surname> <given-names>S</given-names>
</name>
<name>
<surname>Radestock</surname> <given-names>S</given-names>
</name>
<name>
<surname>M&#xf6;rchen</surname> <given-names>F</given-names>
</name>
</person-group>. <article-title>Maximum common binding modes (MCBM):&#x2009; Consensus docking scoring using multiple ligand information and interaction fingerprints</article-title>. <source>J Chem Inf Model</source> (<year>2008</year>) <volume>48</volume>:<page-range>319&#x2013;32</page-range>. doi: <pub-id pub-id-type="doi">10.1021/ci7003626</pub-id>
</citation>
</ref>
<ref id="B111">
<label>111</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yasuo</surname> <given-names>N</given-names>
</name>
<name>
<surname>Sekijima</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Improved method of structure-based virtual screening via interaction-energy-based learning</article-title>. <source>J Chem Inf Model</source> (<year>2019</year>) <volume>59</volume>:<page-range>1050&#x2013;61</page-range>. doi: <pub-id pub-id-type="doi">10.1021/acs.jcim.8b00673</pub-id>
</citation>
</ref>
<ref id="B112">
<label>112</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Braun</surname> <given-names>E</given-names>
</name>
<name>
<surname>Gilmer</surname> <given-names>J</given-names>
</name>
<name>
<surname>Mayes</surname> <given-names>HB</given-names>
</name>
<name>
<surname>Mobley</surname> <given-names>DL</given-names>
</name>
<name>
<surname>Monroe</surname> <given-names>JI</given-names>
</name>
<name>
<surname>Prasad</surname> <given-names>S</given-names>
</name>
<etal/>
</person-group>. <article-title>Best practices for foundations in molecular simulations [Article v1.0]</article-title>. <source>Living J. Comput. Mol. Sci</source> (<year>2019</year>) <volume>1</volume>:<page-range>5957&#x2013;7</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.33011/livecoms.1.1.5957</pub-id>
</citation>
</ref>
<ref id="B113">
<label>113</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wade</surname> <given-names>AD</given-names>
</name>
<name>
<surname>Bhati</surname> <given-names>AP</given-names>
</name>
<name>
<surname>Wan</surname> <given-names>S</given-names>
</name>
<name>
<surname>Coveney</surname> <given-names>PV</given-names>
</name>
</person-group>. <article-title>Alchemical free energy estimators and molecular dynamics engines: accuracy, precision, and reproducibility</article-title>. <source>J Chem Theory Comput</source> (<year>2022</year>) <volume>18</volume>:<page-range>3972&#x2013;87</page-range>. doi: <pub-id pub-id-type="doi">10.1021/acs.jctc.2c00114</pub-id>
</citation>
</ref>
<ref id="B114">
<label>114</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Blay</surname> <given-names>V</given-names>
</name>
<name>
<surname>Tolani</surname> <given-names>B</given-names>
</name>
<name>
<surname>Ho</surname> <given-names>SP</given-names>
</name>
<name>
<surname>Arkin</surname> <given-names>MR</given-names>
</name>
</person-group>. <article-title>High-Throughput Screening: today&#x2019;s biochemical and cell-based approaches</article-title>. <source>Drug Discovery Today</source> (<year>2020</year>) <volume>25</volume>:<page-range>1807&#x2013;21</page-range>. doi: <pub-id pub-id-type="doi">10.1016/j.drudis.2020.07.024</pub-id>
</citation>
</ref>
<ref id="B115">
<label>115</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Renaud</surname> <given-names>J-P</given-names>
</name>
<name>
<surname>Chung</surname> <given-names>C</given-names>
</name>
<name>
<surname>Danielson</surname> <given-names>UH</given-names>
</name>
<name>
<surname>Egner</surname> <given-names>U</given-names>
</name>
<name>
<surname>Hennig</surname> <given-names>M</given-names>
</name>
<name>
<surname>Hubbard</surname> <given-names>RE</given-names>
</name>
<etal/>
</person-group>. <article-title>Biophysics in drug discovery: impact, challenges and opportunities</article-title>. <source>Nat. Rev. Drug Discov</source> (<year>2016</year>) <volume>15</volume>:<page-range>679&#x2013;98</page-range>. doi: <pub-id pub-id-type="doi">10.1038/nrd.2016.123</pub-id>
</citation>
</ref>
<ref id="B116">
<label>116</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brown</surname> <given-names>N</given-names>
</name>
<name>
<surname>Fiscato</surname> <given-names>M</given-names>
</name>
<name>
<surname>Segler</surname> <given-names>MHS</given-names>
</name>
<name>
<surname>Vaucher</surname> <given-names>AC</given-names>
</name>
</person-group>. <article-title>GuacaMol: benchmarking models for <italic>de novo</italic> molecular design</article-title>. <source>J Chem Inf Model</source> (<year>2019</year>) <volume>59</volume>:<page-range>1096&#x2013;108</page-range>. doi: <pub-id pub-id-type="doi">10.1021/acs.jcim.8b00839</pub-id>
</citation>
</ref>
<ref id="B117">
<label>117</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Preuer</surname> <given-names>K</given-names>
</name>
<name>
<surname>Renz</surname> <given-names>P</given-names>
</name>
<name>
<surname>Unterthiner</surname> <given-names>T</given-names>
</name>
<name>
<surname>Hochreiter</surname> <given-names>S</given-names>
</name>
<name>
<surname>Klambauer</surname> <given-names>G</given-names>
</name>
</person-group>. <article-title>Fr&#xe9;chet chemNet distance: A metric for generative models for molecules in drug discovery</article-title>. <source>J Chem Inf Model</source> (<year>2018</year>) <volume>58</volume>:<page-range>1736&#x2013;41</page-range>. doi: <pub-id pub-id-type="doi">10.1021/acs.jcim.8b00234</pub-id>
</citation>
</ref>
<ref id="B118">
<label>118</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Woodward</surname> <given-names>DJ</given-names>
</name>
<name>
<surname>Bradley</surname> <given-names>AR</given-names>
</name>
<name>
<surname>van Hoorn</surname> <given-names>WP</given-names>
</name>
</person-group>. <article-title>Coverage score: A model agnostic method to efficiently explore chemical space</article-title>. <source>J Chem Inf Model</source> (<year>2022</year>) <volume>62</volume>(<issue>12</issue>):<page-range>4391&#x2013;402</page-range>. doi: <pub-id pub-id-type="doi">10.1021/acs.jcim.2c00258</pub-id>
</citation>
</ref>
<ref id="B119">
<label>119</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Xie</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>J</given-names>
</name>
<name>
<surname>Mei</surname> <given-names>Q</given-names>
</name>
</person-group>. <source>How much space has been explored? Measuring the chemical space covered by databases and machine-generated molecules</source>. [preprint]. (<year>2023</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2112.12542</pub-id>.</citation>
</ref>
<ref id="B120">
<label>120</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Polykovskiy</surname> <given-names>D</given-names>
</name>
<name>
<surname>Zhebrak</surname> <given-names>A</given-names>
</name>
<name>
<surname>Sanchez-Lengeling</surname> <given-names>B</given-names>
</name>
<name>
<surname>Golovanov</surname> <given-names>S</given-names>
</name>
<name>
<surname>Tatanov</surname> <given-names>O</given-names>
</name>
<name>
<surname>Belyaev</surname> <given-names>S</given-names>
</name>
<etal/>
</person-group>. <article-title>Molecular sets (MOSES): A benchmarking platform for molecular generation models</article-title>. <source>Front Pharmacol</source> (<year>2020</year>) <volume>0</volume>. doi: <pub-id pub-id-type="doi">10.3389/fphar.2020.565644</pub-id>
</citation>
</ref>
<ref id="B121">
<label>121</label>
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Cieplinski</surname> <given-names>T</given-names>
</name>
<name>
<surname>Danel</surname> <given-names>T</given-names>
</name>
<name>
<surname>Podlewska</surname> <given-names>S</given-names>
</name>
<name>
<surname>Jastrzebski</surname> <given-names>S</given-names>
</name>
</person-group>. <source>We Should at Least Be Able to Design Molecules That Dock Well</source> (<year>2021</year>). Available at: <uri xlink:href="http://arxiv.org/abs/2006.16955">http://arxiv.org/abs/2006.16955</uri> (Accessed <access-date>July 8, 2022</access-date>).</citation>
</ref>
<ref id="B122">
<label>122</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ciepli&#x144;ski</surname> <given-names>T</given-names>
</name>
<name>
<surname>Danel</surname> <given-names>T</given-names>
</name>
<name>
<surname>Podlewska</surname> <given-names>S</given-names>
</name>
<name>
<surname>Jastrzbski</surname> <given-names>S</given-names>
</name>
</person-group>. <article-title>Generative models should at least be able to design molecules that dock well: A new benchmark</article-title>. <source>J Chem Inf Model</source> (<year>2023</year>) <volume>63</volume>(<issue>11</issue>):<page-range>3238&#x2013;47</page-range>. doi: <pub-id pub-id-type="doi">10.1021/acs.jcim.2c01355</pub-id>
</citation>
</ref>
<ref id="B123">
<label>123</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Merk</surname> <given-names>D</given-names>
</name>
<name>
<surname>Friedrich</surname> <given-names>L</given-names>
</name>
<name>
<surname>Grisoni</surname> <given-names>F</given-names>
</name>
<name>
<surname>Schneider</surname> <given-names>G</given-names>
</name>
</person-group>. <article-title><italic>De novo</italic> design of bioactive small molecules by artificial intelligence</article-title>. <source>Mol. Inf</source> (<year>2018</year>) <volume>37</volume>:<fpage>1700153</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/minf.201700153</pub-id>
</citation>
</ref>
<ref id="B124">
<label>124</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stanley</surname> <given-names>M</given-names>
</name>
<name>
<surname>Segler</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Fake it until you make it? Generative <italic>de novo</italic> design and virtual screening of synthesizable molecules</article-title>. <source>Curr Opin Struct Biol</source> (<year>2023</year>) <volume>82</volume>:<fpage>102658</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.sbi.2023.102658</pub-id>
</citation>
</ref>
<ref id="B125">
<label>125</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jang</surname> <given-names>SH</given-names>
</name>
<name>
<surname>Sivakumar</surname> <given-names>D</given-names>
</name>
<name>
<surname>Mudedla</surname> <given-names>SK</given-names>
</name>
<name>
<surname>Choi</surname> <given-names>J</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>S</given-names>
</name>
<name>
<surname>Jeon</surname> <given-names>M</given-names>
</name>
<etal/>
</person-group>. <article-title>PCW-A1001, AI-assisted <italic>de novo</italic> design approach to design a selective inhibitor for FLT-3(D835Y) in acute myeloid leukemia</article-title>. <source>Front Mol Biosci</source> (<year>2022</year>) <volume>9</volume>:<elocation-id>1072028</elocation-id>. doi: <pub-id pub-id-type="doi">10.3389/fmolb.2022.1072028</pub-id>
</citation>
</ref>
<ref id="B126">
<label>126</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Lee</surname> <given-names>S</given-names>
</name>
<name>
<surname>Jo</surname> <given-names>J</given-names>
</name>
<name>
<surname>Hwang</surname> <given-names>SJ</given-names>
</name>
</person-group>. <source>Exploring chemical space with score-based out-of-distribution generation</source>. [preprint]. (<year>2022</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2206.07632</pub-id>
</citation>
</ref>
<ref id="B127">
<label>127</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ivanenkov</surname> <given-names>YA</given-names>
</name>
<name>
<surname>Polykovskiy</surname> <given-names>D</given-names>
</name>
<name>
<surname>Bezrukov</surname> <given-names>D</given-names>
</name>
<name>
<surname>Zagribelnyy</surname> <given-names>B</given-names>
</name>
<name>
<surname>Aladinskiy</surname> <given-names>V</given-names>
</name>
<name>
<surname>Kamya</surname> <given-names>P</given-names>
</name>
<etal/>
</person-group>. <article-title>Chemistry42: an AI-driven platform for molecular design and optimization</article-title>. <source>J Chem Inf Model</source> (<year>2023</year>) <volume>63</volume>:<fpage>695</fpage>&#x2013;<lpage>701</lpage>. doi: <pub-id pub-id-type="doi">10.1021/acs.jcim.2c01191</pub-id>
</citation>
</ref>
<ref id="B128">
<label>128</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bleicher</surname> <given-names>LS</given-names>
</name>
<name>
<surname>Van Daelen</surname> <given-names>T</given-names>
</name>
<name>
<surname>Honeycutt</surname> <given-names>JD</given-names>
</name>
<name>
<surname>Hassan</surname> <given-names>M</given-names>
</name>
<name>
<surname>Chandrasekhar</surname> <given-names>J</given-names>
</name>
<name>
<surname>Shirley</surname> <given-names>W</given-names>
</name>
<etal/>
</person-group>. <article-title>Enhanced utility of AI/ML methods during lead optimization by inclusion of 3D ligand information</article-title>. <source>Front. Drug Discov</source> (<year>2022</year>) <volume>2</volume>:<elocation-id>1074797</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fddsv.2022.1074797</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>
