<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Chem.</journal-id>
<journal-title>Frontiers in Chemistry</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Chem.</abbrev-journal-title>
<issn pub-type="epub">2296-2646</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">800133</article-id>
<article-id pub-id-type="doi">10.3389/fchem.2021.800133</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Chemistry</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Computational Discovery of TTF Molecules with Deep Generative Models</article-title>
<alt-title alt-title-type="left-running-head">Yakubovich et&#x20;al.</alt-title>
<alt-title alt-title-type="right-running-head">Discovery of TTF Molecules</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Yakubovich</surname>
<given-names>Alexander</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">
<sup>&#x2a;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1527618/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Odinokov</surname>
<given-names>Alexey</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1553309/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Nikolenko</surname>
<given-names>Sergey</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/31995/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Jung</surname>
<given-names>Yongsik</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="corresp" rid="c001">
<sup>&#x2a;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1561060/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Choi</surname>
<given-names>Hyeonho</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1541144/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Samsung R&#x0026;D Institute Russia (SRR), Samsung Electronics</institution>, <addr-line>Moscow</addr-line>, <country>Russia</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Steklov Institute of Mathematics at Saint Petersburg</institution>, <addr-line>Saint Petersburg</addr-line>, <country>Russia</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>ISP RAS Research Center for Trusted Artificial Intelligence</institution>, <addr-line>Moscow</addr-line>, <country>Russia</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Samsung Advanced Institute of Technology (SAIT), Samsung Electronics</institution>, <addr-line>Yeongtong-gu</addr-line>, <country>South Korea</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1263081/overview">Paul Winget</ext-link>, Schrodinger, United&#x20;States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/653186/overview">Pedro Henrique De Oliveira Neto</ext-link>, University of Brasilia, Brazil</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/701133/overview">Ablikim Obolda</ext-link>, Xinjiang Agricultural University, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Alexander Yakubovich, <email>a.yakubovich@samsung.com</email>; Yongsik Jung, <email>ys327.jung@samsung.com</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Physical Chemistry and Chemical Physics, a section of the journal Frontiers in Chemistry</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>23</day>
<month>12</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>9</volume>
<elocation-id>800133</elocation-id>
<history>
<date date-type="received">
<day>22</day>
<month>10</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>30</day>
<month>11</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2021 Yakubovich, Odinokov, Nikolenko, Jung and Choi.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Yakubovich, Odinokov, Nikolenko, Jung and Choi</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these&#x20;terms.</p>
</license>
</permissions>
<abstract>
<p>We present a computational workflow based on quantum chemical calculations and generative models based on deep neural networks for the discovery of novel materials. We apply the developed workflow to search for molecules suitable for the fusion of triplet-triplet excitations (triplet-triplet fusion, TTF) in blue OLED devices. By applying generative machine learning models, we have been able to pinpoint the most promising regions of the chemical space for further exploration. Another neural network based on graph convolutions was trained to predict excitation energies; with this network, we estimate the alignment of energy levels and filter molecules before running time-consuming quantum chemical calculations. We present a comprehensive computational evaluation of several generative models, choosing a modification of the Junction Tree VAE (JT-VAE) as the best one in this application. The proposed approach can be useful for computer-aided design of materials with energy level alignment favorable for efficient energy transfer, triplet harvesting, and exciton fusion processes, which are crucial for the development of the next generation OLED materials.</p>
</abstract>
<kwd-group>
<kwd>generative model</kwd>
<kwd>OLED</kwd>
<kwd>organic light emitting devices/display</kwd>
<kwd>computational materials discovery</kwd>
<kwd>quantum chemistry</kwd>
<kwd>autoencoder</kwd>
<kwd>molecular database screening</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Operation of organic light emitting and photovoltaic devices can be greatly improved by utilizing the triplet&#x2013;triplet fusion (TTF) process, when two triplet excitons of low energy merge into one singlet exciton of higher energy (<xref ref-type="bibr" rid="B12">Gray et&#x20;al., 2014</xref>). Despite some successes in the discovery of TTF materials (<xref ref-type="bibr" rid="B24">Kondakov, 2015</xref>), their number is still limited, the main reason being strict requirements on the alignment of the lowest singlet and triplet energy levels that is difficult to satisfy by randomly picking a compound (<xref ref-type="bibr" rid="B8">G&#xf3;mez-Bombarelli et&#x20;al., 2016</xref>; <xref ref-type="bibr" rid="B44">Wang et&#x20;al., 2020</xref>).</p>
<p>Compounds with TTF activity often contain a &#x201c;core&#x201d;, a fused heterocyclic fragment responsible for their basic properties, as shown in <xref ref-type="fig" rid="F1">Figure&#x20;1</xref>; then redox potentials and excitation energies of the compound can be further modified by adding side groups. A convenient way of designing new materials is to start from an already known prototype and modulate its properties by varying functional groups. Moreover, any core requires certain chemical modifications to become a real-life TTF material due to technology-related reasons; these modifications may be needed to increase solubility, prevent undesired aggregation, or reduce photochemical degradation. In such cases, one should be careful not to spoil a promising core by inappropriate substitutions. Moreover, another strict constraint appears in the case of deep blue OLED emitters, namely high singlet excitation energy <italic>S</italic>
<sub>1</sub>, which makes it extremely difficult to perform concise chemical modifications. Under these circumstances, it becomes especially important to find new original cores with favorable arrangements of energy levels. An efficient search strategy should be able to sample the space of functionalized heterocyclic compounds and suggest candidates based both on the core structure and nature of the side groups.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Typical TTF materials, tetra-tert-butylperylene (<xref ref-type="bibr" rid="B33">Ravetz et&#x20;al., 2019</xref>) and rubrene (<xref ref-type="bibr" rid="B3">Cheng et&#x20;al., 2010</xref>), and the scheme of energy levels favorable for the TTF process. Cores of the compounds are highlighted in red. For efficient TTF, energy differences <italic>&#x3b4;</italic>
<sub>
<italic>b</italic>
</sub> and <italic>&#x3b4;</italic>
<sub>
<italic>c</italic>
</sub> should be positive.</p>
</caption>
<graphic xlink:href="fchem-09-800133-g001.tif"/>
</fig>
<p>In this work, we demonstrate a general and computationally efficient approach for the search for novel TTF materials. The approach is based on three steps. First, we generate all possible polycyclic molecular graphs within predefined limits and then decorate them with heteroatoms and side groups, allowing for dense coverage of large regions in the chemical space. Second, we apply a fast semiempirical (SE) method to calculate low-lying singlet and triplet energy levels, allowing for high-throughput screening of molecular databases. Third, we use a generative machine learning (ML) model based on deep neural networks to suggest new compounds with the distribution of generated molecules biased towards blue TTF emitters. In particular, we compare several different generative models and choose the best one for further discovery of leads in a larger chemical&#x20;space.</p>
<p>Modification of cores with side groups greatly expands the considered areas of chemical space and makes it necessary to apply ML-based models to perform guided search for promising candidates. For a comprehensive validation of ML-based models, we have conducted a complete screening of a subset of the relevant &#x201c;core&#x201d; chemical space, which has allowed us to choose the best deep generative model for the task. Design of novel TTF materials is based on a multi-step workflow that begins with the generation of training datasets and repeatedly provides lead compounds in a batch-wise manner, aiming to provide leads for further expert-based selection and experimental trials.</p>
</sec>
<sec id="s2">
<title>2 Methods</title>
<sec id="s2-1">
<title>2.1 Target Properties of TTF Candidates</title>
<p>The triplet-triplet fusion process occurs when two <italic>T</italic>
<sub>1</sub> excitations transform into one excited singlet state. To ensure high internal conversion efficiency, it is important to suppress the formation of higher triplet states. Therefore, a criterion for a molecule to be an appropriate candidate for the design of a TTF material is usually expressed in terms of the lowest singlet (<italic>S</italic>
<sub>1</sub>) and two lowest triplet (<italic>T</italic>
<sub>1</sub> and <italic>T</italic>
<sub>2</sub>) energy levels as follows:<disp-formula id="e1">
<mml:math id="m1">
<mml:mn>2</mml:mn>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3e;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mspace width="2em"/>
<mml:mn>2</mml:mn>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:math>
<label>(1)</label>
</disp-formula>where the first inequality ensures that there is enough energy in two triplet states to form a singlet excitation, and the second inequality prohibits the formation of higher excited triplet states, thus favoring only singlet excitation formation. Large splitting between <italic>S</italic>
<sub>1</sub> and <italic>T</italic>
<sub>1</sub> implies that both states originate from <italic>&#x3c0;&#x3c0;</italic>&#x2a; excitations. Under this assumption, it makes sense to focus the methodology on accurate prediction of <italic>&#x3c0;&#x3c0;</italic>&#x2a; states, and tolerate lower performance for <italic>n&#x3c0;</italic>&#x2a; states. For example, <italic>&#x3c0;&#x3c0;</italic>&#x2a; excitations are relatively unaffected by the solvent polarity, so vacuum calculations should be sufficient and allow for faster computations.</p>
<p>The present work is focused on blue OLED light-emitting materials that require a certain threshold for <italic>S</italic>
<sub>1</sub> energy. In particular, we can define three numerical criteria to filter compounds appropriate for TTF applications as follows:<disp-formula id="e2">
<mml:math id="m2">
<mml:mtable class="eqnarray">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x3e;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x3e;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mn>2</mml:mn>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x3e;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
<label>(2)</label>
</disp-formula>where numerical values of the threshold parameters <italic>&#x3b4;</italic>
<sub>
<italic>a</italic>
</sub>, <italic>&#x3b4;</italic>
<sub>
<italic>b</italic>
</sub>, and <italic>&#x3b4;</italic>
<sub>
<italic>c</italic>
</sub> can be adjusted to find a better tradeoff between the number and quality of final candidates. In the ideal case, we should set <italic>&#x3b4;</italic>
<sub>
<italic>a</italic>
</sub> &#x3d; 2.8&#xa0;eV, <italic>&#x3b4;</italic>
<sub>
<italic>b</italic>
</sub> &#x3d; <italic>&#x3b4;</italic>
<sub>
<italic>c</italic>
</sub> &#x3d; 0&#xa0;eV, but in practice we use less restrictive values to allow for intrinsic inaccuracies of simulation approaches and finite width of excitation energy levels of the molecules in the OLED emission&#x20;layer.</p>
</sec>
<sec id="s2-2">
<title>2.2 Algorithm for the Generation of Molecular Topology</title>
<p>In this section, we present our algorithm for the generation of molecular structures. It includes several consecutive steps, illustrated in <xref ref-type="fig" rid="F2">Figure&#x20;2</xref> with Roman numerals. The procedure can be subdivided into two parts. First, we generate a skeleton frame, that is, a graph of connected points, that does not yet specify the atomic types or bond orders (steps I-III in <xref ref-type="fig" rid="F2">Figure&#x20;2</xref>). Second, the frame needs to be populated with heteroatoms, double bonds, and side groups that correspond to the correct Kekul&#xe9; structure of a specific molecule (steps IV-VI in <xref ref-type="fig" rid="F2">Figure&#x20;2</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Algorithm for the generation of molecular structures. Numbers show the actual number of descendants that can be obtained from a given structure (but are not shown in the figure). Blue markers indicate two neighboring sites chosen for ring fusion. Sites with green markers are added automatically to the new&#x20;ring.</p>
</caption>
<graphic xlink:href="fchem-09-800133-g002.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="F2">Figure&#x20;2</xref> presents a branch of the structure generation tree that starts from a single frame made of three 6-membered rings (step I). On step II we need to mark the places where a new ring can be attached. All possible pairs of connected atoms located on the perimeter of the frame should be considered. On step III, 5 and 6-membered rings are attached to the marked places. Duplicates already stored in the database are removed, and some simple heuristics are applied to filter out structures with steric hindrances. After processing all frames consisting of <italic>N</italic>-membered rings, we obtain the next &#x201c;generation&#x201d; of frames consisting of (<italic>N</italic>&#x20;&#x2b; 1)-membered&#x20;rings.</p>
<p>On step IV, heteroatoms are placed in the frame according to the following rules: at most 4 heteroatoms in the frame; only nitrogen and oxygen are considered; even number of <italic>&#x3c0;</italic> electrons is required; no pair of heteroatoms can be connected by a covalent bond except for two nitrogens. The last rule is introduced to exclude extremely exotic compounds with peculiar distributions of heteroatoms in the molecule that <italic>a priori</italic> have a very little chance to be synthesizable and photostable.</p>
<p>If the resulting labeled graph can be successfully kekulized with <italic>openbabel</italic> (<xref ref-type="bibr" rid="B27">O&#x2019;Boyle et&#x20;al., 2011</xref>), it is considered to be a valid molecule, and its SMILES string is stored in the database. Again, all duplicates are removed. The resulting molecules comprise the set of &#x201c;cores&#x201d;, compact polycyclic fragments without side groups that can serve as building blocks to make more complex TTF materials. The chemical space of cores made of 4 or less rings consists of 472, 505 structures, and all of them can be enumerated with the above algorithm.</p>
<p>To make the resulting materials more likely to be applicable in real-life OLEDs, the cores should be modified further. First of all, the N-H bond in secondary amines should be capped with some residue because of low photochemical stability of the N-H bond. We replaced hydrogens with phenyls to simulate adjacent (presumably aromatic) parts of the complex TTF material (see step V in <xref ref-type="fig" rid="F2">Figure&#x20;2</xref>). On step VI, the required number of side groups can be placed at the given positions, leading to the final compound.</p>
</sec>
<sec id="s2-3">
<title>2.3 Experimental Data for Validation</title>
<p>We have collected data from the literature on well-resolved 0&#x2013;0 transitions of 55 polycyclic molecules with measured absorption or luminescence from the first singlet or triplet excited states (<xref ref-type="bibr" rid="B17">Halverson and Hirt, 1949</xref>; <xref ref-type="bibr" rid="B41">Sponer and Rush, 1949</xref>; <xref ref-type="bibr" rid="B16">Halverson and Hirt, 1951</xref>; <xref ref-type="bibr" rid="B18">Hirt et&#x20;al., 1954</xref>; <xref ref-type="bibr" rid="B6">Evans, 1957</xref>; <xref ref-type="bibr" rid="B19">Ito et&#x20;al., 1957</xref>; <xref ref-type="bibr" rid="B10">Goodman and Kasha, 1958</xref>; <xref ref-type="bibr" rid="B11">Goodman, 1961</xref>; <xref ref-type="bibr" rid="B40">Shimada, 1961</xref>; <xref ref-type="bibr" rid="B5">Dorr and Gropper, 1963</xref>; <xref ref-type="bibr" rid="B14">Gropper and Dorr, 1963</xref>; <xref ref-type="bibr" rid="B2">Burgos et&#x20;al., 1977</xref>; <xref ref-type="bibr" rid="B38">Schmidt, 1977</xref>; <xref ref-type="bibr" rid="B37">Schiedt and Weinkauf, 1997</xref>; <xref ref-type="bibr" rid="B34">Reineke and Baldo, 2014</xref>; <xref ref-type="bibr" rid="B28">Padula et&#x20;al., 2019</xref>). The structures are presented in <xref ref-type="sec" rid="s11">Supplementary Figure S2</xref>. The compounds can be clearly divided into three groups: pure aromatic hydrocarbons (composition HC), nitrogen-containing compounds (composition HCN) and oxygen-containing compounds (composition HCNO). We made no distinction between absorption and luminescence, since we used data on 0&#x2013;0 transitions. We also used experiments performed in different media: gas phase, non-polar solvents, rigid matrix or alcohols. Water and other highly polar solvents were not present. We compare this experimental data with calculations performed in the vacuum; this is a reasonable simplification due to the weak dependence of <italic>&#x3c0;&#x3c0;</italic>&#x2a; transitions on the solvent polarity. In any case, data points obtained in different solvents follow the same trend, and the number of experiments performed in every particular medium is too low for reliable statistical analysis. Under these approximations, we were able to collect a dataset suitable for the validation of the utilized computational approach.</p>
</sec>
<sec id="s2-4">
<title>2.4 Calculation of the Excitation Energies</title>
<p>Meeting growing needs of computational chemistry, various benchmark molecular datasets are being continuously created nowadays (<xref ref-type="bibr" rid="B46">Wu et&#x20;al., 2018</xref>). A typical dataset contains molecular structures and properties calculated using density functional theory (DFT). One of the most famous developments of this kind in the area of material science has been the Harvard Clean Energy Project (<xref ref-type="bibr" rid="B15">Hachmann et&#x20;al., 2011</xref>), spanning 2.3&#xa0;million candidate organic photovoltaic materials. However, most datasets do not provide spectral properties since the calculation of excited states using time-dependent DFT (TDDFT) is more time-consuming and often less reliable than the calculation of the ground state. On the other hand, datasets containing spectral properties are either not large enough (<xref ref-type="bibr" rid="B1">Abreha et&#x20;al., 2019</xref>) or have small overlap with compounds relevant for TTF applications (<xref ref-type="bibr" rid="B46">Wu et&#x20;al., 2018</xref>). This makes it necessary to prepare our own training dataset in order to search for candidate TTF compounds.</p>
<p>Since our generated structures amount to four hundred thousands compounds with more than 10 heavy atoms in average, the use of TDDFT to assess spectral properties is extremely computationally expensive. We estimate that TDDFT computations for a dataset of 0.5 million TTF molecules would require more than 100&#x20;CPU-years. Moreover, the validity of TDDFT as the correct <italic>ab initio</italic> method is questionable. One well-known issue is, for instance, the uneven treatment of excitations of different nature or spin multiplicity (<xref ref-type="bibr" rid="B29">Parac and Grimme, 2003</xref>). Even valence <italic>&#x3c0;&#x3c0;</italic>&#x2a; excitations of polycyclic compounds can pose substantial challenges (<xref ref-type="bibr" rid="B13">Grimme and Parac, 2003</xref>; <xref ref-type="bibr" rid="B32">Prlj et&#x20;al., 2016</xref>). In order to combine computational efficiency with accurate prediction of spectral properties, we used semiempirical methods of quantum chemistry. Despite them not being <italic>ab initio</italic> approaches, many semiempirical methods, including &#x201c;spectral&#x201d; modifications, were initially parametrized on small aromatic and other flat conjugated organic molecules. The accuracy of semiempirical methods for the prediction of the lowest excitation energies is expected to be on par with TDDFT, while greatly speeding up calculations. One can compare different approaches and estimate their typical errors by validating computational approaches against experimental data. Reference data for a small validation dataset can also be obtained with high-level <italic>ab initio</italic> methods. For molecules of moderate size, such as the TTF cores we consider in this work, even multiconfiguration calculations can be theoretically feasible. We have attempted to apply the complete active space self-consisting field (CASSCF) method supplemented with multiconfiguration second-order perturbation theory (MCQDPT). The maximum reasonable size of active space was (12, 12), which was found to be sufficient for triplet excitation to converge in almost all cases. Unfortunately, first singlet excitations converged much more slowly. Even after some admixture of the ground state, convergence was not achieved. It appears that the CASSCF/MCQDPT approach cannot be used for blind screening without manual inspection of every particular case, so we limited the validation to experimental references.</p>
<p>The first step in the calculation of excitation energies is the optimization of molecular geometry. For this purpose, we have used the SE method PM3 as implemented in the Gaussian 16 software package (<xref ref-type="bibr" rid="B7">Frisch et&#x20;al., 2016</xref>). We used the configuration interaction singles (CIS) approach to compute excitation energies. We have tested three semiempirical methods: AM1 (<xref ref-type="bibr" rid="B4">Dewar et&#x20;al., 1985</xref>), PM3 (<xref ref-type="bibr" rid="B42">Stewart, 1989</xref>), and ZINDO/S (<xref ref-type="bibr" rid="B36">Ridley and Zerner, 1973</xref>) as implemented in the Gaussian 16 software package. For comparison, we also calculated excitation energies using DFT with the settings optimized for mixed-valence organic compounds (<xref ref-type="bibr" rid="B35">Renz et&#x20;al., 2009</xref>): BLYP35/def2-TZVP (<xref ref-type="bibr" rid="B45">Weigend and Ahlrichs, 2005</xref>) geometry optimization followed by TDDFT for <italic>S</italic>
<sub>1</sub> and <italic>T</italic>
<sub>2</sub> states, or by &#x394;SCF for <italic>T</italic>
<sub>1</sub> state using M062X exchange-correlation functional (<xref ref-type="bibr" rid="B47">Zhao and Truhlar, 2008</xref>) and the same basis set. It important to note here that our final goal is to develop a method to predict transition energies within a series of polycyclic organic molecules, so we are not interested in the absolute accuracy of the method but rather in its high precision. Bias can be corrected with a linear transformation applied after the calculation. The performances of different methods are compared in <xref ref-type="table" rid="T1">Table&#x20;1</xref>. From the perspective of these results, we can suggest the SE method PM3 as the optimal choice for all further calculations on large molecular datasets. After proper linear correction, it outperforms other SE methods for triplets and is almost on par with ZINDO/S for singlets. PM3 is also on par with corrected DFT and significantly improves over DFT without correction. Plots of calculated versus experimental transition energies for PM3 and DFT can be found in <xref ref-type="sec" rid="s11">Supplementary Figure&#x20;S3</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Root mean squared error (RMSE, in eV) between the predicted and experimental excited state energies. Values for singlet and triplet states are presented separately.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Method</th>
<th align="center">State</th>
<th align="center">RMSE</th>
<th align="center">RMSE,corrected</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="2" align="left">PM3</td>
<td align="center">S<sub>1</sub>
</td>
<td align="char" char=".">0.286</td>
<td align="center">0.184</td>
</tr>
<tr>
<td align="center">T<sub>1</sub>
</td>
<td align="char" char=".">0.720</td>
<td align="center">0.279</td>
</tr>
<tr>
<td rowspan="2" align="left">AM1</td>
<td align="center">S<sub>1</sub>
</td>
<td align="char" char=".">0.257</td>
<td align="center">0.233</td>
</tr>
<tr>
<td align="center">T<sub>1</sub>
</td>
<td align="char" char=".">0.751</td>
<td align="center">0.342</td>
</tr>
<tr>
<td rowspan="2" align="left">ZINDO/S</td>
<td align="center">S<sub>1</sub>
</td>
<td align="char" char=".">0.276</td>
<td align="center">0.267</td>
</tr>
<tr>
<td align="center">T<sub>1</sub>
</td>
<td align="char" char=".">0.835</td>
<td align="center">0.381</td>
</tr>
<tr>
<td rowspan="2" align="left">DFT</td>
<td align="center">S<sub>1</sub>
</td>
<td align="char" char=".">0.549</td>
<td align="center">0.173</td>
</tr>
<tr>
<td align="center">T<sub>1</sub>
</td>
<td align="char" char=".">0.858</td>
<td align="center">0.242</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2-5">
<title>2.5 Machine Learning-Assisted Design</title>
<p>Although exhaustive enumeration of chemical compounds is possible for certain restricted areas of the chemical space, it is always desirable to &#x201c;soften&#x201d; the constraints and search for promising compounds within less restricted regions. Moreover, it is often not easy to formulate clear and complete rules on the chemical diversity of all possible candidate compounds and implement the corresponding deterministic algorithms for library generation. In such cases, approaches based on machine learning and, in particular, deep neural networks can be of great help. The general idea of ML-assisted design proceeds as follows: first, we construct computationally (or extract from experiments) a database with a certain set of molecules that we assume to be relevant for the considered problem. Then, we train a generative machine learning model (usually a deep neural network) on that database, in the hope that the model will capture fundamental structural and chemical features of the dataset and will be capable of suggesting new molecules beyond the training set. If the architecture of the generative model and learning procedures are organized well, one can expect that a large fraction of generated molecules will be relevant for the problem of interest, thus greatly reducing the search space for subsequent validation. In the particular case of TTF compounds, we expect that the model will generate chemical structures featuring excitation energies applicable for the TTF process (see <xref ref-type="disp-formula" rid="e2">formula (2)</xref> and discussion below for details).</p>
<p>To test the applicability of generative models for computational discovery of TTF materials, we have utilized and compared several deep generative models, including character-level recurrent neural networks, adversarial autoencoders, and variational autoencoders (see <xref ref-type="sec" rid="s3-2">Section 3.2</xref>). The best model that we recommend for practical use is the junction tree variational autoencoder (JT-VAE) architecture introduced by <xref ref-type="bibr" rid="B20">Jin et&#x20;al. (2018)</xref>. The decoder of JT-VAE consists of two parts: a graph convolutional neural network (CNN) and a junction tree convolutional neural network. The choice of the architecture was motivated by JT-VAE&#x2019;s superior ability to encode and decode cyclic fragments of molecules. The latter is often challenging for conventional molecular graph CNNs but is of primary importance for TTF molecules that feature distributed <italic>&#x3c0;</italic>&#x2014;electronic orbitals.</p>
</sec>
<sec id="s2-6">
<title>2.6 Prediction of Excitation Energies</title>
<p>Apart from the task of molecular generation, neural networks can also be used to predict excitation energy levels of the molecule. We have trained the neural network to predict energies of the singlet and first two triplet states of TTF molecules. Our neural network for energy prediction is also based on junction tree convolutions; the high-level architecture of this neural network, which we call JT-E (Junction Tree Energies), is presented in <xref ref-type="fig" rid="F3">Figure&#x20;3</xref>. JT-E is constructed as follows: the layer of Junction-Tree encoder preceding the latent space is connected to a network with several fully connected layers of decreasing dimensionality. The last layer of that network has three heads that correspond to <italic>S</italic>
<sub>1</sub>, <italic>T</italic>
<sub>1</sub>, and <italic>T</italic>
<sub>2</sub> excitation energies. The neural network is trained to minimize the sum of root mean square errors between predicted and calculated values of excitation energies. The JT-E model has allowed us to predict with good accuracy if a molecule might be suitable for TTF knowing only its SMILES notation (see <xref ref-type="sec" rid="s11">Supplementary Table S1</xref> for numerical results on existing benchmarks).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Architecture of the JT-E network for excitation energies prediction. Latent vectors of the JT-VAE encoder corresponding to the junction tree and molecular graph are connected to several fully connected layers. The last layer has three heads, corresponding to <italic>S</italic>
<sub>1</sub>, <italic>T</italic>
<sub>1</sub>, and <italic>T</italic>
<sub>2</sub> excitation energies.</p>
</caption>
<graphic xlink:href="fchem-09-800133-g003.tif"/>
</fig>
</sec>
</sec>
<sec id="s3">
<title>3 Results</title>
<sec id="s3-1">
<title>3.1&#x20;Brute-Force Screening of the Core Compounds</title>
<p>The structure generation algorithm shown in <xref ref-type="fig" rid="F2">Figure&#x20;2</xref> has provided us with 472, 505 non-equivalent compounds with at most 4 rings, which constitutes an exhaustive sampling of the chemical space defined by the constraints listed in <xref ref-type="sec" rid="s2-2">Section. 2.2</xref>. Applying <xref ref-type="disp-formula" rid="e2">formula (2)</xref>, we have selected 5,690 candidates from the set of 472, 505 compounds that are most promising for deep blue TTF applications. These candidates should be subjected to more detailed analysis. An important additional target here is potential synthesizability. The most robust way to ensure synthesizability is to search for already known compounds. Among 5,690 candidates, we found 107 compounds contained in the PubChem (<xref ref-type="bibr" rid="B22">Kim et&#x20;al., 2021</xref>) database. This estimate gives hope that the exploration of considered regions of chemical space can provide a sufficient amount of TTF candidates, both core structures and their derivatives.</p>
<p>Among 107 PubChem hits, several distinct groups of compounds can be identified. The first group of 16 compounds consists of anthracene and its nitrogen-containing structural and isoelectronic analogues. Then, one can found 19 analogues of tetracene, 11 analogues of isobenzofurane, as well as pyrene and two of its analogues. A large and diverse group of 28 compounds contains furane moiety as part of the system of fused rings. The remaining 30 compounds are not so closely related to existing TTF materials. The major part of molecules from the set with 107 elements are analogues of molecules with registered TTF activity (<xref ref-type="bibr" rid="B44">Wang et&#x20;al., 2020</xref>). Introduction of additional nitrogens does not change the electronic configuration, but modulates nuclear charges of chosen atomic sites, which sometimes can make <italic>S</italic>
<sub>1</sub> levels higher (note the cases of tetracene and isobenzofurane), so the conditions for blue TTF materials are satisfied. Review of the core compounds produced by the screening procedure supports the conclusion about the adequacy of applied methodology and underlying SE approach. This success can be partially explained by the loose criteria used in the screening: more than 1% of the original dataset has passed the filters. This is in line with the general strategy of filtering out definitely bad compounds and allowing all that have a chance to prove useful. We believe that the list of PubChem hits contains some indications useful in the search for novel cores with TTF activity. In the subsequent sections, we apply the same computational procedure to prepare training datasets for targeted design of TTF materials based on substituted compounds. The list of PubChem hits, as well as specific details of the screening procedure, can be found in the Supplementary Material (see Note S2.2 and <xref ref-type="sec" rid="s11">Supplementary Table&#x20;S2</xref>).</p>
</sec>
<sec id="s3-2">
<title>3.2 Baselines and Performance of Generative Models</title>
<p>We have investigated the performance of various generative models on a dataset of cores defined in the previous subsection. Well-defined chemical composition of the subspace allows us to measure consistently if generative models are capable of suggesting molecules predominantly from the chemical subspace of interest and whether it is possible to tune generators to suggest novel molecules from the subspace with energies satisfying TTF criteria. We have excluded molecules with low and negative excitation energies from the dataset using the following criteria:<disp-formula id="e3">
<mml:math id="m3">
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>1.0</mml:mn>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mi>e</mml:mi>
<mml:mi>V</mml:mi>
<mml:mo>,</mml:mo>
<mml:mspace width="2em"/>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>0.5</mml:mn>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mi>e</mml:mi>
<mml:mi>V</mml:mi>
<mml:mo>,</mml:mo>
<mml:mspace width="2em"/>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>1.0</mml:mn>
<mml:mspace width="0.3333em" class="nbsp"/>
<mml:mi>e</mml:mi>
<mml:mi>V</mml:mi>
<mml:mo>.</mml:mo>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
<p>Negative excitation energies are nonphysical and correspond to situations where methods of quantum chemistry fail for low lying excitations. Since we focus on the discovery of TTF molecules suitable for blue OLED applications, we are not interested in those molecules because they will only introduce additional noise to the models. The total number of molecules in the truncated dataset is 341,&#x20;433.</p>
<p>Distributions of excitation energies in the dataset are shown in <xref ref-type="fig" rid="F4">Figure&#x20;4</xref>. The figure shows that all energies in the dataset feature unimodal distributions with the following means and standard deviations: <italic>S</italic>
<sub>1</sub> &#x3d; 1.98&#x20;&#xb1; 0.66&#xa0;eV, <italic>T</italic>
<sub>1</sub> &#x3d; 1.52&#x20;&#xb1; 0.51&#xa0;eV, and <italic>T</italic>
<sub>2</sub> &#x3d; 2.19&#x20;&#xb1; 0.47&#xa0;eV. The dataset was split randomly into two parts of the same size that were used as training and validation sets (see SI for details).</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Distributions of <italic>S</italic>
<sub>1</sub>, <italic>T</italic>
<sub>1</sub>, and <italic>T</italic>
<sub>2</sub> excitation energies in the dataset of generated molecular structures.</p>
</caption>
<graphic xlink:href="fchem-09-800133-g004.tif"/>
</fig>
<p>We have investigated the performance of our implementation of the JT-VAE model (modified from <ext-link ext-link-type="uri" xlink:href="https://github.com/wengong-jin/icml18-jtnn">https://github.com/wengong-jin/icml18-jtnn</ext-link>) and three well-known baseline models, namely:</p>
<p>&#x2022; character-level recurrent neural network (CharRNN) (<xref ref-type="bibr" rid="B31">Preuer et&#x20;al., 2018</xref>; <xref ref-type="bibr" rid="B39">Segler et&#x20;al., 2018</xref>) that models the distribution of the next token in a sequence (SMILES string) with a recurrent neural network; &#x2022; variational autoencoder (VAE) (<xref ref-type="bibr" rid="B23">Kingma and Welling, 2014</xref>; <xref ref-type="bibr" rid="B21">Kadurin et&#x20;al., 2017</xref>; <xref ref-type="bibr" rid="B9">G&#xf3;mez-Bombarelli et&#x20;al., 2018</xref>)it consists of two networks, encoder and decoder, that learn a mapping of the input into a low-dimensional latent space by minimizing the reconstruction loss and regularization in the form of the Kullback&#x2013;Leibler divergence between the approximation and the posterior distribution; &#x2022; adversarial autoencoder (AAE) (<xref ref-type="bibr" rid="B26">Makhzani et&#x20;al., 2016</xref>) that replaces the Kullback-Leibler divergence from VAE with an adversarial objective, training a discriminator network to distinguish samples from the latent space and a prior distribution that the model will sample from to generate new instances.</p>
<p>In all generative models, we use SMILES strings as the input and output representations. We have used the implementations of CharRNN, VAE, and AAE models available at<ext-link ext-link-type="uri" xlink:href="https://github.com/molecularsets/moses">https://github.com/molecularsets/moses</ext-link>, the benchmarking platform called MOSES (<xref ref-type="bibr" rid="B30">Polykovskiy et&#x20;al., 2020</xref>). All models were trained on the training dataset using hyperparameters and protocols as suggested by <xref ref-type="bibr" rid="B30">Polykovskiy et&#x20;al. (2020)</xref>.</p>
<p>We have implemented two different regimes for sampling from the latent space of autoencoder models (VAE, AAE, and JT-VAE): random and seeded. The random regime corresponds to &#x201c;conventional&#x201d; sampling of the latent space from the normal distribution <inline-formula id="inf1">
<mml:math id="m4">
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>0,1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> that was used as prior in our models. Seeded sampling was carried out as follows. After training the encoder on the training dataset, a subset of molecules most promising for deep blue applications was selected using the criteria from <xref ref-type="disp-formula" rid="e2">formula (2)</xref>: <italic>&#x3b4;</italic>
<sub>
<italic>a</italic>
</sub> &#x3e; 2.7 eV, <italic>&#x3b4;</italic>
<sub>
<italic>b</italic>
</sub> &#x3d; <italic>&#x3b4;</italic>
<sub>
<italic>c</italic>
</sub> &#x3e;&#x2014;0.1&#xa0;eV. Only 58 out of 171, 716 molecules in the training set satisfy these criteria; we will further refer to molecules satisfying them as <italic>leads</italic>. Latent representation vectors <italic>&#x3bd;</italic>
<sub>
<italic>i</italic>
</sub> were calculated for each of the leads. Then, three lead vectors <italic>&#x3bd;</italic>
<sub>1</sub>, <italic>&#x3bd;</italic>
<sub>2</sub>, and <italic>&#x3bd;</italic>
<sub>3</sub> were selected randomly and multiplied by random positive factors <italic>&#x3b1;</italic>
<sub>1</sub>, <italic>&#x3b1;</italic>
<sub>1</sub> and <italic>&#x3b1;</italic>
<sub>3</sub> that satisfy the following relation: <inline-formula id="inf2">
<mml:math id="m5">
<mml:msubsup>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:math>
</inline-formula>. The sample vector <italic>&#x3bd;</italic> in the latent space was constructed as <inline-formula id="inf3">
<mml:math id="m6">
<mml:mi>&#x3bd;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula>. The molecular structure was obtained by applying the model&#x2019;s decoder to the resulting latent vector <italic>&#x3bd;</italic>. This approach has allowed us to sample the latent space not randomly but mostly in the vicinity of known leads. This should be beneficial if the latent space clusters favorably, separating promising TTF molecules from the rest (see discussion in <xref ref-type="sec" rid="s3-3">Section 3.3</xref>). We have modified the implementations of VAE and AAE models by <xref ref-type="bibr" rid="B30">Polykovskiy et&#x20;al. (2020)</xref> to run sampling in seeded mode. Note that since <italic>CharRNN</italic> is not an autoencoder model, it cannot be &#x201c;seeded&#x201d; with leads. Therefore, we do not present any results for <italic>CharRNN</italic> in the seeded&#x20;mode.</p>
<p>Results for random and seeded sampling of the latent space for different models are presented in <xref ref-type="table" rid="T2">Table&#x20;2</xref>; we have obtained 10, 000 samples from each model. <xref ref-type="table" rid="T2">Table&#x20;2</xref> clearly shows that all models were capable of suggesting valid SMILES, as checked with RDKit (<xref ref-type="bibr" rid="B25">Landrum, 2012</xref>).</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Performance of deep generative models for 10, 000 random samples: number of valid molecules, number of unique molecules, number of molecules from the training set, number of molecules from the validation set, number of leads [defined as <italic>&#x3b4;</italic>
<sub>
<italic>a</italic>
</sub> &#x3e; 2.7&#xa0;eV, <italic>&#x3b4;</italic>
<sub>
<italic>b</italic>
</sub> &#x3d; <italic>&#x3b4;</italic>
<sub>
<italic>c</italic>
</sub> &#x3e;&#x2014;0.1&#xa0;eV in formula (2)] from the train and validation sets and their percentage among the corresponding generated samples.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Model</th>
<th align="center">Sampling</th>
<th align="center">Valid</th>
<th align="center">Unique</th>
<th align="center">From train</th>
<th align="center">From valid</th>
<th align="center">Train Leads</th>
<th align="center">% Train Leads</th>
<th align="center">Valid Leads</th>
<th align="center">% Valid Leads</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">CharRNN</td>
<td align="left">random</td>
<td align="center">9,962</td>
<td align="char" char=".">
<bold>9,760</bold>
</td>
<td align="char" char=".">
<bold>7,487</bold>
</td>
<td align="char" char=".">1886</td>
<td align="char" char=".">2</td>
<td align="char" char=".">0.026</td>
<td align="char" char=".">1</td>
<td align="char" char=".">0.053</td>
</tr>
<tr>
<td align="left">VAE</td>
<td align="left">random</td>
<td align="center">6,312</td>
<td align="char" char=".">6,246</td>
<td align="char" char=".">1999</td>
<td align="char" char=".">1,394</td>
<td align="char" char=".">1</td>
<td align="char" char=".">0.050</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0.000</td>
</tr>
<tr>
<td align="left">AAE</td>
<td align="left">random</td>
<td align="center">7,582</td>
<td align="char" char=".">7,400</td>
<td align="char" char=".">2,371</td>
<td align="char" char=".">1922</td>
<td align="char" char=".">1</td>
<td align="char" char=".">0.052</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0.000</td>
</tr>
<tr>
<td align="left">JT-VAE</td>
<td align="left">random</td>
<td align="center">
<bold>10&#x2009;,000</bold>
</td>
<td align="char" char=".">9,186</td>
<td align="char" char=".">1,026</td>
<td align="char" char=".">1,036</td>
<td align="char" char=".">1</td>
<td align="char" char=".">0.097</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0.000</td>
</tr>
<tr>
<td align="left">VAE</td>
<td align="left">seeded</td>
<td align="center">5,918</td>
<td align="char" char=".">5,781</td>
<td align="char" char=".">1,617</td>
<td align="char" char=".">1,198</td>
<td align="char" char=".">7</td>
<td align="char" char=".">0.433</td>
<td align="char" char=".">1</td>
<td align="char" char=".">0.083</td>
</tr>
<tr>
<td align="left">AAE</td>
<td align="left">seeded</td>
<td align="center">7,974</td>
<td align="char" char=".">7,376</td>
<td align="char" char=".">2,643</td>
<td align="char" char=".">
<bold>2,155</bold>
</td>
<td align="char" char=".">3</td>
<td align="char" char=".">0.114</td>
<td align="char" char=".">4</td>
<td align="char" char=".">0.186</td>
</tr>
<tr>
<td align="left">JT-VAE</td>
<td align="left">seeded</td>
<td align="center">
<bold>10&#x2009;,000</bold>
</td>
<td align="char" char=".">3,472</td>
<td align="char" char=".">559</td>
<td align="char" char=".">558</td>
<td align="char" char=".">
<bold>16</bold>
</td>
<td align="char" char=".">
<bold>2.862</bold>
</td>
<td align="char" char=".">
<bold>11</bold>
</td>
<td align="char" char=".">
<bold>1.971</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Largest entries in each column are presented with bold font.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>One of the most important properties of a generative model is their ability to sample from novel yet not arbitrary regions of chemical space. Since in the present work we are interested in regions of chemical space consisting solely of <italic>&#x3c0;</italic>-conjugated systems with 5 or 6-membered rings, we measure the extrapolating ability of generative models by the number of suggested molecules from the validation set. Note that all generated molecules should not necessarily satisfy the constraints applied during dataset creation, since no formal restrictions on molecular composition were implemented for the generators. Some generated molecules do not belong to the above-mentioned chemical space of <italic>&#x3c0;</italic>-conjugated molecules. Therefore, the number of unique molecules is larger than the sum of molecules from the training and validation sets. <xref ref-type="table" rid="T2">Table&#x20;2</xref> shows that the AAE model was able to suggest the largest fraction of novel molecules (1992 and 2,155 for random and seeded implementations respectively). Almost 15% of the molecules were from the validation set, i.e.,&#x20;novel. Note also that the size and chemical composition of the training and validation sets are identical, so an unbiased generator should suggest a similar number of molecules from both sets. Autoencoder models indeed demonstrate nearly equal number of generated molecules from both datasets. On the contrary, the <italic>CharRNN</italic> model is extremely biased towards the training set, which could be an indication of overfitting; here we do not investigate that question in depth since we used default suggested values of training parameters from MOSES (<xref ref-type="bibr" rid="B30">Polykovskiy et&#x20;al., 2020</xref>).</p>
<p>The most interesting and important part for the problem of the discovery of novel TTF molecules is the number of novel discovered leads. Recall that a lead is a molecule with excitation energies suitable for blue OLED applications. We see that all models performed poorly in the conventional random sampling mode: <italic>CharRNN</italic> suggested one lead from the validations set, while all other models suggested none. The situation is very different for seeded generation. The AAE model was capable of suggesting four leads from the validation set that have not been seen by the model during training. And this is exactly where the JT-VAE model shines: it was able to generate 11 TTF candidates from the validation set, much higher than any other model in the comparison.</p>
<p>Note also that the number of unique molecules generated by AAE and VAE is nearly identical to the total number of valid SMILES both in random and seeded implementations. For JT-VAE, this holds under random sampling, but not in the seeded mode, where nearly 65% of generated molecules turned out to be duplicates. At first glance it might seem to be a drawback, but in fact this property means that fewer molecules need to be checked for excitation energies favorable for TTF, and it shows that the sampling space of JT-VAE in seeded mode is much more concentrated. Ultimately, we are interested in the number of suggested leads, not just the number of unique molecules, and indeed, despite lower number of unique SMILES, the JT-VAE model suggested by far the most leads from the training and validation sets. This advantage is especially striking if we consider the fraction of generated molecules that need to be checked to find a new lead (shown as percentages in <xref ref-type="table" rid="T2">Table&#x20;2</xref>): the probability to find a lead with JT-VAE is <inline-formula id="inf4">
<mml:math id="m7">
<mml:mo>&#x2248;</mml:mo>
<mml:mn>2.4</mml:mn>
</mml:math>
</inline-formula>% for each new suggested SMILES string, which represents a more than 15&#x20;times higher rate than for the VAE and AAE models and 75&#x20;times higher than picking molecules from the validation set at random.</p>
<p>Note that for all autoencoder models we have observed a presumably linear dependence between the number of unique molecules and the number of molecules from the training and validation sets, as indicated by comparing the random and seeded generators. This is indicative of the fact that seeded generation does not alter the fraction of generated molecules belonging to the desired region of the chemical space, in our case <italic>&#x3c0;</italic>-conjugated systems with 4 rings. This observation allows to suggest that application of seeded sampling does not disturb the predefined constraints on molecular composition, but allows to further accelerate lead discovery.</p>
<p>Based on the above analysis, we suggest the JT-VAE model with seeded sampling as the best generative model for the discovery of realistic candidates for deep blue OLED applications.</p>
</sec>
<sec id="s3-3">
<title>3.3 Structure of the Latent Space</title>
<p>To better understand why and how the JT-VAE model generates an increased number of leads with seeded sampling, we have investigated the latent space of the model using the t-distributed Stochastic Neighbor Embedding (t-SNE) to generate a two-dimensional visual representation (<xref ref-type="bibr" rid="B43">van der Maaten and Hinton, 2008</xref>). The results obtained for 25, 000 molecules randomly sampled from the dataset are shown in <xref ref-type="fig" rid="F5">Figure&#x20;5</xref>. Color corresponds to the &#x201c;fitness&#x201d; of a molecule for TTF applications: red indicates a better fit, blue, a worse fit. This means that leads are shown in&#x20;red.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Structure of the JT-VAE latent space model obtained by dimensionality reduction via t-SNE (<xref ref-type="bibr" rid="B43">van der Maaten and Hinton, 2008</xref>). Color corresponds to the &#x201c;fitness&#x201d; of a molecule for TTF applications: red denotes a better fit, blue, a worse&#x20;fit.</p>
</caption>
<graphic xlink:href="fchem-09-800133-g005.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="F5">Figure&#x20;5</xref> shows that the distribution of red, blue, and intermediate points is far from uniform in the latent space: leads and generally molecules with higher fitness tend to cluster together. Therefore, if we choose a random linear combination of latent space vectors for three red points (leads), it will have an increased chance to end up in the close vicinity of another red point (especially when all leads are sampled from the same cluster). This demonstrates that the JT-VAE model in seeded sampling mode allows to discover most of the leads in the chemical space with fewer iterations than other approaches. We note that there are in total only 56 leads in the validation area of the chemical space comprising 170, 716 molecules, and JT-VAE was capable of discovering 11, i.e,. <inline-formula id="inf5">
<mml:math id="m8">
<mml:mo>&#x2248;</mml:mo>
<mml:mn>20</mml:mn>
</mml:math>
</inline-formula>% of those leads in just the first 3,500 unique samples (out of 10, 000 total first samples).</p>
<p>As discussed above, we have analyzed the models&#x2019; performance in a relatively small region of chemical space with up to 4 rings without side chains, a region with less than 0.5&#xa0;million molecules in total. This has allowed us to assess the potential of the models to extrapolate beyond the training set and discover leads in the entire constrained chemical space. In what follows, we apply our conclusions to the discovery of TTF candidates in much larger chemical spaces that cannot be sampled exhaustively.</p>
</sec>
<sec id="s3-4">
<title>3.4 Filtering Based on Predicting Excitation Energies</title>
<p>
<xref ref-type="table" rid="T2">Table&#x20;2</xref> shows that in our restricted subset of the chemical space, the seeded JT-VAE model generates leads at a rate of (11 &#x2b; 16)/(559, &#x2b;, 558) &#x2248; 2.4%. Though one could apply quantum chemistry methods to all generated molecules to discover the leads, there is a more computationally efficient alternative. We have trained the JT-E network as discussed in <xref ref-type="sec" rid="s2-6">Section 2.6</xref> to predict excitation energies for molecules supplied as SMILES strings from the generator. Calculations of excitation energies for the training dataset were done using PM3 (<xref ref-type="bibr" rid="B42">Stewart, 1989</xref>), the same SE method as we have used above. Mean absolute errors for the excitation energies for validation set are 0.104, 0.054, and 0.086&#xa0;eV for the <italic>S</italic>
<sub>1</sub>, <italic>T</italic>
<sub>1</sub>, and <italic>T</italic>
<sub>2</sub> energies respectively. Note that this is a remarkable accuracy, comparable to the accuracy of the PM3 method itself. The JT-E model is trained independently on the same dataset as the JT-VAE model (see Supplementary Material for details). Based on predicted energies, we have filtered generated molecules according to the same criteria from <xref ref-type="disp-formula" rid="e2">(Eq. 2)</xref>: <italic>&#x3b4;</italic>
<sub>
<italic>a</italic>
</sub> &#x3e; 2.3&#xa0;<italic>eV</italic>, <italic>&#x3b4;</italic>
<sub>
<italic>b</italic>
</sub> &#x3d; <italic>&#x3b4;</italic>
<sub>
<italic>c</italic>
</sub> &#x3e;&#x2014;0.4&#xa0;<italic>eV</italic>. Those are looser criteria than for lead selection since we wanted to give a very safe margin of error for the JT-E model (exceeding 2<italic>&#x3c3;</italic>). Geometries of the molecules satisfying these criteria are then optimized, and excitation energies are computed using PM3. In the next section, we show and discuss the overall workflow for TTF molecules discovery.</p>
</sec>
<sec id="s3-5">
<title>3.5 Workflow for TTF Molecules Discovery</title>
<p>In order to promote the discovery of real TTF materials, we assembled a multi-step workflow acting in the space of <italic>&#x3c0;</italic>&#x2014;conjugated compounds. The training datasets consisted of previously used core structures decorated with side groups. We used two types of side groups: tert-butyl and mesityl moieties, as a model of different bulky, but chemically inert substituents that are often used to prevent flat cores from aggregation. We also replaced all amine hydrogens with phenyls, as required to ensure operational stability of the material. The size of the chemical space for compounds with side groups is much larger than for cores only. Therefore the datasets were not exhaustive and included <inline-formula id="inf6">
<mml:math id="m9">
<mml:mo>&#x2248;</mml:mo>
<mml:mn>450,000</mml:mn>
</mml:math>
</inline-formula> molecules with positions and types of the side groups selected at random.</p>
<p>We have investigated three different chemical spaces corresponding to cores: decorated with none, one, and two side chains. We had tried to train a single network on the entire dataset, but our experiments showed that training separate neural networks for each number of side chains allows to increase reconstruction accuracy and accuracy of energies prediction, as well as to achieve better clustering of leads of each type in the chemical space, so we have chosen this strategy. For all molecules from generated chemical spaces, we optimized their geometries and calculated <italic>S</italic>
<sub>1</sub>, <italic>T</italic>
<sub>1</sub>, and <italic>T</italic>
<sub>2</sub> excitation energies using the SE PM3 approach. We have excluded molecules with unreasonably low energies from each dataset, using criteria outlined in <xref ref-type="disp-formula" rid="e3">(Eq. 3)</xref>. The datasets were used to train three JT-VAE generative models and three JT-E energy predicting models (see Supplementary Material for additional data on the architecture and accuracy of JT-VAE and JT-E models). We have selected molecules most suitable for TTF leads from the datasets and utilized them in seeded sampling of the latent space of the autoencoders, using procedures discussed in <xref ref-type="sec" rid="s3-2">Section 3.2</xref>. The overall discovery workflow for TTF materials is shown in <xref ref-type="fig" rid="F6">Figure&#x20;6</xref>.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Workflow of the TTF materials discovery. See text for details.</p>
</caption>
<graphic xlink:href="fchem-09-800133-g006.tif"/>
</fig>
<p>We have found 75, 58, and 55 leads directly in the dataset with 0, 1, and 2 side chains respectively. The leads were used to seed the generator of JT-VAEs. During each discovery loop, we generated another 100, 000 samples and predicted excitation energies for them using the JT-E network. The molecules satisfying criteria discussed in <xref ref-type="sec" rid="s3-4">Section 3.4</xref> were selected for quantum chemical calculation with PM3. Results obtained for each iteration of the workflow cycle are summarized in <xref ref-type="table" rid="T3">Table&#x20;3</xref>.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Gradual increase of the discovered leads for structures with 0, 1, and 2 side chains.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Iteration</th>
<th colspan="3" align="center">Unique after the JT-E filter side chains</th>
<th colspan="3" align="center">Unique after the PM3 filter side chains</th>
<th align="center">Total discovered</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left"/>
<td align="char" char=".">0</td>
<td align="char" char=".">1</td>
<td align="char" char=".">2</td>
<td align="char" char=".">0</td>
<td align="char" char=".">1</td>
<td align="char" char=".">2</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="left">0</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="char" char=".">75</td>
<td align="char" char=".">58</td>
<td align="char" char=".">55</td>
<td align="char" char=".">188</td>
</tr>
<tr>
<td align="left">1</td>
<td align="char" char=".">357</td>
<td align="char" char=".">1,248</td>
<td align="char" char=".">1,446</td>
<td align="char" char=".">55</td>
<td align="char" char=".">195</td>
<td align="char" char=".">236</td>
<td align="char" char=".">674</td>
</tr>
<tr>
<td align="left">2</td>
<td align="char" char=".">342</td>
<td align="char" char=".">1,378</td>
<td align="char" char=".">1,582</td>
<td align="char" char=".">67</td>
<td align="char" char=".">420</td>
<td align="char" char=".">402</td>
<td align="char" char=".">1,070</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>
<xref ref-type="table" rid="T3">Table&#x20;3</xref> shows that all three datasets with nearly 1.5&#xa0;million molecules initially contained only 188 leads. However, each iteration of the discovery workflow brings <inline-formula id="inf7">
<mml:math id="m10">
<mml:mo>&#x2248;</mml:mo>
<mml:mn>400</mml:mn>
</mml:math>
</inline-formula> more new leads. After just two iterations, we have obtained more than a thousand compounds that appear promising for deep blue TTF applications. SMILES notation for those molecules along with calculated PM3 energies are provided in Supplementary Material. Although one can easily continue the discovery cycles, we stopped at the current stage since more than 1,000 leads is already a substantial amount that is not easy to verify experimentally.</p>
</sec>
</sec>
<sec id="s4">
<title>4 Discussion</title>
<p>In this work, we have presented a computational approach for the discovery of TTF materials, choosing the best deep generative model on the basis of comprehensive experiments with smaller molecules, extending the results onto a much larger chemical space, and producing hundreds of promising leads for new TTF materials. Let us address several points regarding the applied computational methodology. First, a key element of the present study is the use of a very fast approach to quantum chemistry, based on the PM3 method that was not originally designed for the calculation of excited states. Good accuracy was achieved mainly due to additional empirical scaling of the excitation energies. We do not claim that the found scaling factors can be transferred to other applications or have a general scientific value, their application area is presumably limited to fused heterocyclic chromophores. Second, considering the tradeoff between computational cost and accuracy, our PM3-based method obviously represents one of the cheapest and fastest approaches. This was a necessary requirement to perform calculations for hundreds of thousands of compounds within acceptable computational resources, and these calculations were necessary as large datasets were crucial for the successful training of ML-based models. Accuracy of our calculations and the overall high quality of the approach have been validated in a comparison with reliable reference experimental data, both for the excited states energy levels and registered TTF activity. We believe that the resulting list of PubChem hits can be considered as a standalone contribution to the community, providing candidate compounds for blue OLED materials or, at the very least, promising patterns for further research.</p>
<p>One of the possible drawbacks of the current approach lies in the combinatorial nature of the search for all possible valid molecular structures, regardless of their stability or possible synthesizability. This issue can be resolved if we collect only those cores that can be found in PubChem database. This solution is simple and robust, although a lot of novel promising compounds are thus disregarded. In our workflow with ML-based models, we do not impose any additional constraints or filters to disregard unrealistic structures. We prefer to train the models on the complete chemical space, so the predictions are expected to be also correct for synthesizable compounds. After producing the leads, we can decide which molecules to pick up for experimental trials using expert knowledge and other external considerations.</p>
<p>We have demonstrated that ML methods can be applied for successful generation of novel compounds beyond those in the training set. We have been able to provide ML models with large training datasets obtained using SE methods and, at the same time, use ML inference to cover much larger regions of the chemical space. The number of molecules grows rapidly with increasing size of the molecule and heteroatom population. This means that direct calculation of excitation energies is required only for a very small portion of the target space. After that, the training procedure is performed on this dataset, and energies for any other molecule in the chemical space can be inferred from the model in a batch-wise manner with high computational efficiency.</p>
</sec>
<sec id="s5">
<title>5 Conclusion</title>
<p>Using the workflow described above and shown in <xref ref-type="fig" rid="F6">Figure&#x20;6</xref>, we have been able to discover hundreds of TTF candidate molecules with <italic>S</italic>
<sub>1</sub>, <italic>T</italic>
<sub>1</sub>, and <italic>T</italic>
<sub>2</sub> energy levels suitable for TTF application in blue OLED devices. These candidates include more than a dozen of PubChem compounds. After a thorough examination of the suggested leads by experimental chemists, several most promising candidates have been selected for experimental verification. The selection procedures included not only criteria on excitation energies, but also expert assessment of chemical and electrical stability of compounds, their synthetic accessibility, and other considerations. The experimental verification is currently in progress, and we are looking forward to report the results in the nearest future.</p>
<p>We note that the presented approach is not limited solely to TTF molecules, and with reasonable modifications can be applied to other compounds relevant for organic optoelectronic materials.</p>
</sec>
</body>
<back>
<sec id="s6">
<title>Data Availability Statement</title>
<p>The datasets and code for this study can be provided upon reasonable request to the authors.</p>
</sec>
<sec id="s7">
<title>Author Contributions</title>
<p>AO created the databases of TTF molecules and performed benchmarks of the methods of quantum chemistry. AY supervised the research, developed and applied ML models in collaboration with SN. YJ and HC guided practical applications the discovery workflow. AY and AO written the initial version of the manuscript with contributions from SN. All authors discussed the research, contributed to the final version of the manuscript, and reviewed and approved the paper.</p>
</sec>
<sec id="s8">
<title>Funding</title>
<p>The work of SN was supported by a grant for research centers in the field of artificial intelligence, provided by the Analytical Center for the Government of the Russian Federation in accordance with the subsidy agreement (agreement identifier 000000D730321P5Q0002) and the agreement with the Ivannikov Institute for System Programming of the Russian Academy of Sciences dated November 2, 2021 No. 70-2021-00142</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of Interest</title>
<p>AY, AO, YJ, HC were employed by Samsung Electronics.</p>
<p>The remaining author declares that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors, and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ack>
<p>We would like to thank Stanislav Polonsky for his support and helpful discussions. Computational resources were provided by the supercomputing facilities of Samsung Electronics.</p>
</ack>
<sec id="s11">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fchem.2021.800133/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fchem.2021.800133/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material>
<label>Data Sheet 1</label>
<caption>
<p>101 hit without side groups after 3 iterations.</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Data Sheet 2</label>
<caption>
<p>485 hits with 1 side group after 3 iterations.</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Data Sheet 3</label>
<caption>
<p>484 hits with 2 side groups after 3 iterations.</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Data Sheet 4</label>
<caption>
<p>107 PubChem hits.</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Data Sheet 5</label>
<caption>
<p>Supplementary Material.</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="DataSheet4.CSV" id="SM1" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet3.CSV" id="SM2" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet5.pdf" id="SM3" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet1.CSV" id="SM4" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet2.CSV" id="SM5" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abreha</surname>
<given-names>B. G.</given-names>
</name>
<name>
<surname>Agarwal</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Foster</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Blaiszik</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Lopez</surname>
<given-names>S. A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Virtual Excited State Reference for the Discovery of Electronic Materials Database: An Open-Access Resource for Ground and Excited State Properties of Organic Molecules</article-title>. <source>J.&#x20;Phys. Chem. Lett.</source> <volume>10</volume>, <fpage>6835</fpage>&#x2013;<lpage>6841</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jpclett.9b02577</pub-id> </citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Burgos</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Pope</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Swenberg</surname>
<given-names>C. E.</given-names>
</name>
<name>
<surname>Alfano</surname>
<given-names>R. R.</given-names>
</name>
</person-group> (<year>1977</year>). <article-title>Heterofission in Pentacene-Doped Tetracene Single Crystals</article-title>. <source>Phys. Stat. Sol. (B)</source> <volume>83</volume>, <fpage>249</fpage>&#x2013;<lpage>256</lpage>. <pub-id pub-id-type="doi">10.1002/pssb.2220830127</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname>
<given-names>Y. Y.</given-names>
</name>
<name>
<surname>F&#xfc;ckel</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Khoury</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Clady</surname>
<given-names>R. G. C. R.</given-names>
</name>
<name>
<surname>Tayebjee</surname>
<given-names>M. J.&#x20;Y.</given-names>
</name>
<name>
<surname>Ekins-Daukes</surname>
<given-names>N. J.</given-names>
</name>
<etal/>
</person-group> (<year>2010</year>). <article-title>Kinetic Analysis of Photochemical Upconversion by Triplet&#x2212;Triplet Annihilation: Beyond Any Spin Statistical Limit</article-title>. <source>J.&#x20;Phys. Chem. Lett.</source> <volume>1</volume>, <fpage>1795</fpage>&#x2013;<lpage>1799</lpage>. <pub-id pub-id-type="doi">10.1021/jz100566u</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dewar</surname>
<given-names>M. J.&#x20;S.</given-names>
</name>
<name>
<surname>Zoebisch</surname>
<given-names>E. G.</given-names>
</name>
<name>
<surname>Healy</surname>
<given-names>E. F.</given-names>
</name>
<name>
<surname>Stewart</surname>
<given-names>J.&#x20;J.&#x20;P.</given-names>
</name>
</person-group> (<year>1985</year>). <article-title>Development and Use of Quantum Mechanical Molecular Models. 76. Am1: a New General Purpose Quantum Mechanical Molecular Model</article-title>. <source>J.&#x20;Am. Chem. Soc.</source> <volume>107</volume>, <fpage>3902</fpage>&#x2013;<lpage>3909</lpage>. <pub-id pub-id-type="doi">10.1021/ja00299a024</pub-id> </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>D&#xf6;rr</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Gropper</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>1963</year>). <article-title>Die Polarisation der Triplett-Singulett-Phosphoreszenz einiger Aromaten und Heterocyclen II. Mitteilung): Chinolin, Isochinolin, Fluoren, Chrysen, Triphenylen, Dibenzochinoxalin, 1,2-3,4-Dibenzophenazin, Coronen</article-title>. <source>Berichte der Bunsengesellschaft f&#xfc;r physikalische Chem.</source> <volume>67</volume>, <fpage>193</fpage>&#x2013;<lpage>201</lpage>. <pub-id pub-id-type="doi">10.1002/bbpc.19630670214</pub-id> </citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Evans</surname>
<given-names>D. F.</given-names>
</name>
</person-group> (<year>1957</year>). <article-title>257. Perturbation of Singlet-Triplet Transitions of Aromatic Molecules by Oxygen under Pressure</article-title>. <source>J.&#x20;Chem. Soc.</source>, <fpage>1351</fpage>&#x2013;<lpage>1357</lpage>. <pub-id pub-id-type="doi">10.1039/jr9570001351</pub-id> </citation>
</ref>
<ref id="B7">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Frisch</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Trucks</surname>
<given-names>G. W.</given-names>
</name>
<name>
<surname>Schlegel</surname>
<given-names>H. B.</given-names>
</name>
<name>
<surname>Scuseria</surname>
<given-names>G. E.</given-names>
</name>
<name>
<surname>Robb</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Cheeseman</surname>
<given-names>J.&#x20;R.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <source>Gaussian<sup>&#x223c;</sup>16 Revision C.01</source>. <publisher-loc>Wallingford CT</publisher-loc>: <publisher-name>Gaussian Inc</publisher-name>. </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>G&#xf3;mez-Bombarelli</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Aguilera-Iparraguirre</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hirzel</surname>
<given-names>T. D.</given-names>
</name>
<name>
<surname>Duvenaud</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Maclaurin</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Blood-Forsythe</surname>
<given-names>M. A.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Design of Efficient Molecular Organic Light-Emitting Diodes by a High-Throughput Virtual Screening and Experimental Approach</article-title>. <source>Nat. Mater.</source> <volume>15</volume>, <fpage>1120</fpage>&#x2013;<lpage>1127</lpage>. <pub-id pub-id-type="doi">10.1038/nmat4717</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>G&#xf3;mez-Bombarelli</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>J.&#x20;N.</given-names>
</name>
<name>
<surname>Duvenaud</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Hern&#xe1;ndez-Lobato</surname>
<given-names>J.&#x20;M.</given-names>
</name>
<name>
<surname>S&#xe1;nchez-Lengeling</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Sheberla</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Automatic Chemical Design Using a Data-Driven Continuous Representation of Molecules</article-title>. <source>ACS Cent. Sci.</source> <volume>4</volume>, <fpage>268</fpage>&#x2013;<lpage>276</lpage>. <pub-id pub-id-type="doi">10.1021/acscentsci.7b00572</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Goodman</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Kasha</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>1958</year>). <article-title>The Observation and Assignment of the Lowest Multiplicity-Forbidden Transition in Pyrazine</article-title>. <source>J.&#x20;Mol. Spectrosc.</source> <volume>2</volume>, <fpage>58</fpage>&#x2013;<lpage>65</lpage>. <pub-id pub-id-type="doi">10.1016/0022-2852(58)90060-2</pub-id> </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Goodman</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>1961</year>). <article-title>Transitions in the Azines</article-title>. <source>J.&#x20;Mol. Spectrosc.</source> <volume>6</volume>, <fpage>109</fpage>&#x2013;<lpage>137</lpage>. <pub-id pub-id-type="doi">10.1016/0022-2852(61)90235-1</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gray</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Dzebo</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Abrahamsson</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Albinsson</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Moth-Poulsen</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Triplet-triplet Annihilation Photon-Upconversion: towards Solar Energy Applications</article-title>. <source>Phys. Chem. Chem. Phys.</source> <volume>16</volume>, <fpage>10345</fpage>&#x2013;<lpage>10352</lpage>. <pub-id pub-id-type="doi">10.1039/c4cp00744a</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Grimme</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Parac</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Substantial Errors from Time-dependent Density Functional Theory for the Calculation of Excited States of Large &#x3c0; Systems</article-title>. <source>ChemPhysChem</source> <volume>4</volume>, <fpage>292</fpage>&#x2013;<lpage>295</lpage>. <pub-id pub-id-type="doi">10.1002/cphc.200390047</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gropper</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Dorr</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>1963</year>). <article-title>Die orientierung der optischen &#xfc;bergangsmomente in phenanthren und seinen azaderivaten</article-title>. <source>Berichte der bunsen-gesellschaft physikalische Chem.</source> <volume>67</volume>, <fpage>193</fpage>&#x2013;<lpage>201</lpage>. <pub-id pub-id-type="doi">10.1002/bbpc.19630670109</pub-id> </citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hachmann</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Olivares-Amaya</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Atahan-Evrenk</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Amador-Bedolla</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>S&#xe1;nchez-Carrera</surname>
<given-names>R. S.</given-names>
</name>
<name>
<surname>Gold-Parker</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>The harvard Clean Energy Project: Large-Scale Computational Screening and Design of Organic Photovoltaics on the World Community Grid</article-title>. <source>J.&#x20;Phys. Chem. Lett.</source> <volume>2</volume>, <fpage>2241</fpage>&#x2013;<lpage>2251</lpage>. <pub-id pub-id-type="doi">10.1021/jz200866s</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Halverson</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Hirt</surname>
<given-names>R. C.</given-names>
</name>
</person-group> (<year>1951</year>). <article-title>Near Ultraviolet Solution Spectra of the Diazines</article-title>. <source>J.&#x20;Chem. Phys.</source> <volume>19</volume>, <fpage>711</fpage>&#x2013;<lpage>718</lpage>. <pub-id pub-id-type="doi">10.1063/1.1748338</pub-id> </citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Halverson</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Hirt</surname>
<given-names>R. C.</given-names>
</name>
</person-group> (<year>1949</year>). <article-title>The Near Ultraviolet Absorption Spectra of the Diazines</article-title>. <source>J.&#x20;Chem. Phys.</source> <volume>17</volume>, <fpage>1165</fpage>&#x2013;<lpage>1166</lpage>. <pub-id pub-id-type="doi">10.1063/1.1747135</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hirt</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Halverson</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Schmitt</surname>
<given-names>R. G.</given-names>
</name>
</person-group> (<year>1954</year>). <article-title>s&#x2010;Triazine. II. The Near Ultraviolet Absorption Spectrum</article-title>. <source>J.&#x20;Chem. Phys.</source> <volume>22</volume>, <fpage>1148</fpage>&#x2013;<lpage>1149</lpage>. <pub-id pub-id-type="doi">10.1063/1.1740306</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ito</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Shimada</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Kuraishi</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Mizushima</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>1957</year>). <article-title>Ultraviolet Absorption of Pyrazine Vapor Due Ton- &#x3c0; Transition</article-title>. <source>J.&#x20;Chem. Phys.</source> <volume>26</volume>, <fpage>1508</fpage>&#x2013;<lpage>1515</lpage>. <pub-id pub-id-type="doi">10.1063/1.1743570</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Jin</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Barzilay</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Jaakkola</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Junction Tree Variational Autoencoder for Molecular Graph Generation</article-title>,&#x201d; in <conf-name>International Conference on Machine Learning (PMLR)</conf-name>, <conf-loc>Stockholmsm&#xe4;ssan</conf-loc>, <conf-date>July 10-15, 2018</conf-date>, <fpage>2323</fpage>&#x2013;<lpage>2332</lpage>. </citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kadurin</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Aliper</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kazennov</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mamoshina</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Vanhaelen</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Khrabrov</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>The Cornucopia of Meaningful Leads: Applying Deep Adversarial Autoencoders for New Molecule Development in Oncology</article-title>. <source>Oncotarget</source> <volume>8</volume>, <fpage>10883</fpage>&#x2013;<lpage>10890</lpage>. <pub-id pub-id-type="doi">10.18632/oncotarget.14073</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Gindulyte</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Pubchem in 2021: New Data Content and Improved Web Interfaces</article-title>. <source>Nucleic Acids Res.</source> <volume>49</volume>, <fpage>D1388</fpage>&#x2013;<lpage>D1395</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkaa971</pub-id> </citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kingma</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Welling</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Auto-encoding Variational Bayes</article-title>. <source>arXiv</source>, <fpage>1312.6114</fpage>. </citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kondakov</surname>
<given-names>D. Y.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Triplet-triplet Annihilation in Highly Efficient Fluorescent Organic Light-Emitting Diodes: Current State and Future Outlook</article-title>. <source>Phil. Trans. R. Soc. A.</source> <volume>373</volume>, <fpage>20140321</fpage>. <pub-id pub-id-type="doi">10.1098/rsta.2014.0321</pub-id> </citation>
</ref>
<ref id="B25">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Landrum</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2012</year>). <source>Rdkit: Open-Source Cheminformatics</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.rdkit.org">https://www.rdkit.org</ext-link>
</comment> </citation>
</ref>
<ref id="B26">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Makhzani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Shlens</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jaitly</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Goodfellow</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Adversarial Autoencoders</article-title>,&#x201d; in <conf-name>International Conference on Learning Representations</conf-name>, <conf-loc>Caribe Hilton, San Juan, Puerto Rico</conf-loc>, <conf-date>May 2-4, 2016</conf-date>. </citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>O&#x27;Boyle</surname>
<given-names>N. M.</given-names>
</name>
<name>
<surname>Banck</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>James</surname>
<given-names>C. A.</given-names>
</name>
<name>
<surname>Morley</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Vandermeersch</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Hutchison</surname>
<given-names>G. R.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Open Babel: An Open Chemical Toolbox</article-title>. <source>J.&#x20;Cheminform</source> <volume>3</volume>, <fpage>33</fpage>&#x2013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1186/1758-2946-3-33</pub-id> </citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Padula</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Omar</surname>
<given-names>&#xd6;. H.</given-names>
</name>
<name>
<surname>Nematiaram</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Troisi</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Singlet Fission Molecules Among Known Compounds: Finding a Few needles in a Haystack</article-title>. <source>Energy Environ. Sci.</source> <volume>12</volume>, <fpage>2412</fpage>&#x2013;<lpage>2416</lpage>. <pub-id pub-id-type="doi">10.1039/c9ee01508f</pub-id> </citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Parac</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Grimme</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>A TDDFT Study of the Lowest Excitation Energies of Polycyclic Aromatic Hydrocarbons</article-title>. <source>Chem. Phys.</source> <volume>292</volume>, <fpage>11</fpage>&#x2013;<lpage>21</lpage>. <pub-id pub-id-type="doi">10.1016/s0301-0104(03)00250-7</pub-id> </citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Polykovskiy</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zhebrak</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sanchez-Lengeling</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Golovanov</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Tatanov</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Belyaev</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Molecular Sets (Moses): a Benchmarking Platform for Molecular Generation Models</article-title>. <source>Front. Pharmacol.</source> <volume>11</volume>, <fpage>565644</fpage>. <pub-id pub-id-type="doi">10.3389/fphar.2020.565644</pub-id> </citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Preuer</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Renz</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Unterthiner</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Hochreiter</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Klambauer</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Fr&#xe9;chet ChemNet Distance: A Metric for Generative Models for Molecules in Drug Discovery</article-title>. <source>J.&#x20;Chem. Inf. Model.</source> <volume>58</volume>, <fpage>1736</fpage>&#x2013;<lpage>1741</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jcim.8b00234</pub-id> </citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Prlj</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sandoval-Salinas</surname>
<given-names>M. E.</given-names>
</name>
<name>
<surname>Casanova</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Jacquemin</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Corminboeuf</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Low-Lying &#x3c0;&#x3c0;&#x2a; States of Heteroaromatic Molecules: A Challenge for Excited State Methods</article-title>. <source>J.&#x20;Chem. Theor. Comput.</source> <volume>12</volume>, <fpage>2652</fpage>&#x2013;<lpage>2660</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jctc.6b00245</pub-id> </citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ravetz</surname>
<given-names>B. D.</given-names>
</name>
<name>
<surname>Pun</surname>
<given-names>A. B.</given-names>
</name>
<name>
<surname>Churchill</surname>
<given-names>E. M.</given-names>
</name>
<name>
<surname>Congreve</surname>
<given-names>D. N.</given-names>
</name>
<name>
<surname>Rovis</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Campos</surname>
<given-names>L. M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Photoredox Catalysis Using Infrared Light via Triplet Fusion Upconversion</article-title>. <source>Nature</source> <volume>565</volume>, <fpage>343</fpage>&#x2013;<lpage>346</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-018-0835-2</pub-id> </citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reineke</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Baldo</surname>
<given-names>M. A.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Room Temperature Triplet State Spectroscopy of Organic Semiconductors</article-title>. <source>Sci. Rep.</source> <volume>4</volume>, <fpage>3797</fpage>&#x2013;<lpage>3798</lpage>. <pub-id pub-id-type="doi">10.1038/srep03797</pub-id> </citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Renz</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Theilacker</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Lambert</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Kaupp</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>A Reliable Quantum-Chemical Protocol for the Characterization of Organic Mixed-Valence Compounds</article-title>. <source>J.&#x20;Am. Chem. Soc.</source> <volume>131</volume>, <fpage>16292</fpage>&#x2013;<lpage>16302</lpage>. <pub-id pub-id-type="doi">10.1021/ja9070859</pub-id> </citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ridley</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zerner</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>1973</year>). <article-title>An Intermediate Neglect of Differential Overlap Technique for Spectroscopy: Pyrrole and the Azines</article-title>. <source>Theoret. Chim. Acta</source> <volume>32</volume>, <fpage>111</fpage>&#x2013;<lpage>134</lpage>. <pub-id pub-id-type="doi">10.1007/bf00528484</pub-id> </citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schiedt</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Weinkauf</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>Photodetachment Photoelectron Spectroscopy of Mass Selected Anions: Anthracene and the Anthracene-H2o Cluster</article-title>. <source>Chem. Phys. Lett.</source> <volume>266</volume>, <fpage>201</fpage>&#x2013;<lpage>205</lpage>. <pub-id pub-id-type="doi">10.1016/s0009-2614(96)01512-6</pub-id> </citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schmidt</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>1977</year>). <article-title>Photoelectron Spectra of Polynuclear Aromatics. V. Correlations with Ultraviolet Absorption Spectra in the Catacondensed Series</article-title>. <source>J.&#x20;Chem. Phys.</source> <volume>66</volume>, <fpage>828</fpage>&#x2013;<lpage>845</lpage>. <pub-id pub-id-type="doi">10.1063/1.433961</pub-id> </citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Segler</surname>
<given-names>M. H. S.</given-names>
</name>
<name>
<surname>Kogej</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Tyrchan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Waller</surname>
<given-names>M. P.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Generating Focused Molecule Libraries for Drug Discovery with Recurrent Neural Networks</article-title>. <source>ACS Cent. Sci.</source> <volume>4</volume>, <fpage>120</fpage>&#x2013;<lpage>131</lpage>. <pub-id pub-id-type="doi">10.1021/acscentsci.7b00512</pub-id> </citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shimada</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>1961</year>). <article-title>The Lowest Multiplicity-Forbidden Transitions in Diazines-I</article-title>. <source>Spectrochimica Acta</source> <volume>17</volume>, <fpage>14</fpage>&#x2013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.1016/0371-1951(61)80007-6</pub-id> </citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sponer</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Rush</surname>
<given-names>J.&#x20;H.</given-names>
</name>
</person-group> (<year>1949</year>). <article-title>Near Ultraviolet Absorption Spectra of the Isomeric Picolines</article-title>. <source>J.&#x20;Chem. Phys.</source> <volume>17</volume>, <fpage>587</fpage>&#x2013;<lpage>588</lpage>. <pub-id pub-id-type="doi">10.1063/1.1747337</pub-id> </citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stewart</surname>
<given-names>J.&#x20;J.&#x20;P.</given-names>
</name>
</person-group> (<year>1989</year>). <article-title>Optimization of Parameters for Semiempirical Methods Ii. Applications</article-title>. <source>J.&#x20;Comput. Chem.</source> <volume>10</volume>, <fpage>221</fpage>&#x2013;<lpage>264</lpage>. <pub-id pub-id-type="doi">10.1002/jcc.540100209</pub-id> </citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>van der Maaten</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Visualizing Data Using T-SNE</article-title>. <source>J.&#x20;Mach. Learn. Res.</source> <volume>9</volume>, <fpage>2579</fpage>&#x2013;<lpage>2605</lpage>. <ext-link ext-link-type="uri" xlink:href="http://jmlr.org/papers/v9/vandermaaten08a.html">http://jmlr.org/papers/v9/vandermaaten08a.html</ext-link> </citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Tom</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Congreve</surname>
<given-names>D. N.</given-names>
</name>
<name>
<surname>Marom</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>An Energetics Perspective on Why There Are So Few Triplet-Triplet Annihilation Emitters</article-title>. <source>J.&#x20;Mater. Chem. C</source> <volume>8</volume>, <fpage>10816</fpage>&#x2013;<lpage>10824</lpage>. <pub-id pub-id-type="doi">10.1039/d0tc00044b</pub-id> </citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Weigend</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Ahlrichs</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Balanced Basis Sets of Split Valence, Triple Zeta Valence and Quadruple Zeta Valence Quality for H to Rn: Design and Assessment of Accuracy</article-title>. <source>Phys. Chem. Chem. Phys.</source> <volume>7</volume>, <fpage>3297</fpage>&#x2013;<lpage>3305</lpage>. <pub-id pub-id-type="doi">10.1039/b508541a</pub-id> </citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Ramsundar</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Feinberg</surname>
<given-names>E. N.</given-names>
</name>
<name>
<surname>Gomes</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Geniesse</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Pappu</surname>
<given-names>A. S.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Moleculenet: a Benchmark for Molecular Machine Learning</article-title>. <source>Chem. Sci.</source> <volume>9</volume>, <fpage>513</fpage>&#x2013;<lpage>530</lpage>. <pub-id pub-id-type="doi">10.1039/c7sc02664a</pub-id> </citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Truhlar</surname>
<given-names>D. G.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>The M06 Suite of Density Functionals for Main Group Thermochemistry, Thermochemical Kinetics, Noncovalent Interactions, Excited States, and Transition Elements: Two New Functionals and Systematic Testing of Four M06-Class Functionals and 12 Other Functionals</article-title>. <source>Theor. Chem. Account.</source> <volume>120</volume>, <fpage>215</fpage>&#x2013;<lpage>241</lpage>. <pub-id pub-id-type="doi">10.1007/s00214-007-0310-x</pub-id> </citation>
</ref>
</ref-list>
</back>
</article>