<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2023.1173328</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Machine learning enhances prediction of plants as potential sources of antimalarials</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Richard-Bollans</surname>
<given-names>Adam</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2184931"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Aitken</surname>
<given-names>Conal</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Antonelli</surname>
<given-names>Alexandre</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/95500"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Bitencourt</surname>
<given-names>C&#xe1;ssia</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1188154"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Goyder</surname>
<given-names>David</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lucas</surname>
<given-names>Eve</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/742390"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ondo</surname>
<given-names>Ian</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>P&#xe9;rez-Escobar</surname>
<given-names>Oscar A.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/604662"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Pironon</surname>
<given-names>Samuel</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Richardson</surname>
<given-names>James E.</given-names>
</name>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<xref ref-type="aff" rid="aff7">
<sup>7</sup>
</xref>
<xref ref-type="aff" rid="aff8">
<sup>8</sup>
</xref>
<xref ref-type="aff" rid="aff9">
<sup>9</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Russell</surname>
<given-names>David</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Silvestro</surname>
<given-names>Daniele</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff10">
<sup>10</sup>
</xref>
<xref ref-type="aff" rid="aff11">
<sup>11</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/199140"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wright</surname>
<given-names>Colin W.</given-names>
</name>
<xref ref-type="aff" rid="aff12">
<sup>12</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1040065"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Howes</surname>
<given-names>Melanie-Jayne R.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff13">
<sup>13</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1508340"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Royal Botanic Gardens</institution>, <addr-line>Kew, Richmond</addr-line>, <country>United Kingdom</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>EaStCHEM, School of Chemistry, University of St Andrews</institution>, <addr-line>St Andrews</addr-line>, <country>United Kingdom</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Gothenburg Global Biodiversity Centre, Department of Biological and Environmental Sciences, University of Gothenburg</institution>, <addr-line>Gothenburg</addr-line>, <country>Sweden</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Department of Biology, University of Oxford</institution>, <addr-line>Oxford</addr-line>, <country>United Kingdom</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>UN Environment Programme World Conservation Monitoring Centre (UNEP-WCMC)</institution>, <addr-line>Cambridge</addr-line>, <country>United Kingdom</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>School of Biological, Earth and Environmental Sciences, University College Cork</institution>, <addr-line>Cork</addr-line>, <country>Ireland</country>
</aff>
<aff id="aff7">
<sup>7</sup>
<institution>Tropical Diversity Section, Royal Botanic Garden</institution>, <addr-line>Edinburgh</addr-line>, <country>United Kingdom</country>
</aff>
<aff id="aff8">
<sup>8</sup>
<institution>Departamento de Biolog&#xed;a, Facultad de Ciencias Naturales, Universidad del Rosario</institution>, <addr-line>Bogot&#xe1;</addr-line>, <country>Colombia</country>
</aff>
<aff id="aff9">
<sup>9</sup>
<institution>Environmental Research Institute, University College Cork</institution>, <addr-line>Cork</addr-line>, <country>Ireland</country>
</aff>
<aff id="aff10">
<sup>10</sup>
<institution>Department of Biology, University of Fribourg</institution>, <addr-line>Fribourg</addr-line>, &#xa0;<country>Switzerland</country>
</aff>
<aff id="aff11">
<sup>11</sup>
<institution>Swiss Institute of Bioinformatics</institution>, <addr-line>Fribourg</addr-line>, <country>Switzerland</country>
</aff>
<aff id="aff12">
<sup>12</sup>
<institution>School of Pharmacy and Medical Sciences, University of Bradford</institution>, <addr-line>Bradford</addr-line>, <country>United Kingdom</country>
</aff>
<aff id="aff13">
<sup>13</sup>
<institution>Institute of Pharmaceutical Science, King&#x2019;s College London, Franklin-Wilkins Building</institution>, <addr-line>London</addr-line>, <country>United Kingdom</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Kexuan Tang, Shanghai Jiao Tong University, China</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Tariq Khan, University of Malakand, Pakistan; Ebiamadon Andi Brisibe, University of Calabar, Nigeria</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Adam Richard-Bollans, <email xlink:href="mailto:a.richard-bollans@kew.org">a.richard-bollans@kew.org</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>25</day>
<month>05</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>14</volume>
<elocation-id>1173328</elocation-id>
<history>
<date date-type="received">
<day>24</day>
<month>02</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>20</day>
<month>04</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Richard-Bollans, Aitken, Antonelli, Bitencourt, Goyder, Lucas, Ondo, P&#xe9;rez-Escobar, Pironon, Richardson, Russell, Silvestro, Wright and Howes</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Richard-Bollans, Aitken, Antonelli, Bitencourt, Goyder, Lucas, Ondo, P&#xe9;rez-Escobar, Pironon, Richardson, Russell, Silvestro, Wright and Howes</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Plants are a rich source of bioactive compounds and a number of plant-derived antiplasmodial compounds have been developed into pharmaceutical drugs for the prevention and treatment of malaria, a major public health challenge. However, identifying plants with antiplasmodial potential can be time-consuming and costly. One approach for selecting plants to investigate is based on ethnobotanical knowledge which, though having provided some major successes, is restricted to a relatively small group of plant species. Machine learning, incorporating ethnobotanical and plant trait data, provides a promising approach to improve the identification of antiplasmodial plants and accelerate the search for new plant-derived antiplasmodial compounds. In this paper we present a novel dataset on antiplasmodial activity for three flowering plant families &#x2013; Apocynaceae, Loganiaceae and Rubiaceae (together comprising c. 21,100 species) &#x2013; and demonstrate the ability of machine learning algorithms to predict the antiplasmodial potential of plant species. We evaluate the predictive capability of a variety of algorithms &#x2013; Support Vector Machines, Logistic Regression, Gradient Boosted Trees and Bayesian Neural Networks &#x2013; and compare these to two ethnobotanical selection approaches &#x2013; based on usage as an antimalarial and general usage as a medicine. We evaluate the approaches using the given data and when the given samples are reweighted to correct for sampling biases. In both evaluation settings each of the machine learning models have a higher precision than the ethnobotanical approaches. In the bias-corrected scenario, the Support Vector classifier performs best &#x2013; attaining a mean precision of 0.67 compared to the best performing ethnobotanical approach with a mean precision of 0.46. We also use the bias correction method and the Support Vector classifier to estimate the potential of plants to provide novel antiplasmodial compounds. We estimate that 7677 species in Apocynaceae, Loganiaceae and Rubiaceae warrant further investigation and that at least 1300 active antiplasmodial species are highly unlikely to be investigated by conventional approaches. While traditional and Indigenous knowledge remains vital to our understanding of people-plant relationships and an invaluable source of information, these results indicate a vast and relatively untapped source in the search for new plant-derived antiplasmodial compounds.</p>
</abstract>
<kwd-group>
<kwd>malaria</kwd>
<kwd>traditional and indigenous knowledge</kwd>
<kwd>machine learning</kwd>
<kwd>botany</kwd>
<kwd>ethnobotany</kwd>
<kwd>sampling bias</kwd>
<kwd>antiplasmodial activity</kwd>
<kwd>ethnopharmacology</kwd>
</kwd-group>
<contract-num rid="cn001">PCEFP3 187012</contract-num>
<contract-num rid="cn002">2019-05191, 2019-04739</contract-num>
<contract-num rid="cn003">F 2022/1448</contract-num>
<contract-sponsor id="cn001">Schweizerischer Nationalfonds zur F&#xf6;rderung der Wissenschaftlichen Forschung<named-content content-type="fundref-id">10.13039/501100001711</named-content>
</contract-sponsor>
<contract-sponsor id="cn002">Svenska Forskningsr&#xe5;det Formas<named-content content-type="fundref-id">10.13039/501100001862</named-content>
</contract-sponsor>
<contract-sponsor id="cn003">Stiftelsen f&#xf6;r Milj&#xf6;strategisk Forskning<named-content content-type="fundref-id">10.13039/100007633</named-content>
</contract-sponsor>
<contract-sponsor id="cn004">Royal Botanical Gardens, Kew<named-content content-type="fundref-id">10.13039/501100001296</named-content>
</contract-sponsor>
<counts>
<fig-count count="8"/>
<table-count count="2"/>
<equation-count count="0"/>
<ref-count count="92"/>
<page-count count="14"/>
<word-count count="7581"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Plant Metabolism and Chemodiversity</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Malaria is a life-threatening disease that affected 247 million people globally in 2021, with a disproportionately high number of cases (95%) occurring in Africa (<xref ref-type="bibr" rid="B87">WHO, 2022b</xref>). Although global case incidence, deaths and mortality rates for malaria have fallen over the past two decades, this downward trend has plateaued since 2015 and there were an estimated 619,000 malaria deaths in 2021 (<xref ref-type="bibr" rid="B87">WHO, 2022b</xref>). The two main treatments for the most prominent malaria-causing species, <italic>Plasmodium falciparum</italic> and <italic>P. vivax</italic>, are chloroquine and artemisinin-based combination therapies (involving artemisinin or derivatives). In 2008, due to chloroquine resistance, the World Health Organisation (WHO) recommended that <italic>P. falciparum</italic> infections should be treated with artemisinin-based combination therapies instead of chloroquine (<xref ref-type="bibr" rid="B83">WHO, 2008</xref>), but chloroquine resistance still persists (<xref ref-type="bibr" rid="B54">Ocan et&#xa0;al., 2019</xref>). Resistance to existing antimalarial drugs is an escalating challenge for eliminating malaria, indeed, there is concerning evidence of strains partially resistant to artemisinin emerging in Africa (<xref ref-type="bibr" rid="B79">Uwimana et&#xa0;al., 2020</xref>). As a result, the WHO recommends that research into antimalarial medicines should be accelerated as part of an effort to reach global malaria targets (<xref ref-type="bibr" rid="B87">WHO, 2022b</xref>).</p>
<p>Plants have provided or inspired the development of numerous pharmaceutical drugs (<xref ref-type="bibr" rid="B34">Howes et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B53">Newman and Cragg, 2020</xref>), including those on the WHO&#x2019;s Model List of Essential Medicines (<xref ref-type="bibr" rid="B85">WHO et&#xa0;al., 2021</xref>). In the context of malaria, both chloroquine and artemisinin are derived from plants &#x2013; chloroquine being a synthetic analogue of quinine, from <italic>Cinchona</italic> L. species (Rubiaceae: Gentianales) (<xref ref-type="bibr" rid="B45">Meshnick and Dobson, 2001</xref>) while artemisinin is extracted from sweet wormwood, <italic>Artemisia annua</italic> L. (Asteraceae: Asterales) (<xref ref-type="bibr" rid="B61">Qinghaosu Antimalaria Coordinating Research group, 1979</xref>). Furthermore, the antimalarial drug atovaquone was inspired by the chemical lapachol, which occurs in <italic>Tabebuia</italic> Gomes ex DC. species (Bignoniaceae: Lamiales) (<xref ref-type="bibr" rid="B48">Milliken et&#xa0;al., 2021</xref>). These are excellent examples of the natural solutions offered by plants and motivate the search for further plant-derived antimalarial drugs, particularly in the context of emerging resistance to existing antimalarials.</p>
<p>The predominant plant selection approach in the search for new antiplasmodial compounds has been an ethnobotanical one, that is, plants are investigated pharmacologically based on a history of traditional usage for malaria or other fever-causing diseases. This approach has provided some major successes, for example, the development of both quinine and artemisinin arose from traditional ethnobotanical knowledge (<xref ref-type="bibr" rid="B61">Qinghaosu Antimalaria Coordinating Research group, 1979</xref>; <xref ref-type="bibr" rid="B45">Meshnick and Dobson, 2001</xref>). However, this approach is restricted to a relatively small group of plant species and is limited in terms of reliability. It is therefore timely to assess whether emerging technologies, such as machine learning, could be used to more reliably harness the potential of plants as sources of new lead compounds for drug development.</p>
<p>Here we investigate the potential of three flowering plant families from the order Gentianales &#x2013; Apocynaceae, Loganiaceae and Rubiaceae &#x2013; selected based on numerous taxa being sources of chemically diverse alkaloids, a compound class of particular pharmaceutical relevance (<xref ref-type="bibr" rid="B17">Daley and Cordell, 2021</xref>). Some examples of antiplasmodial alkaloids from these families are given in <xref ref-type="fig" rid="f1">
<bold>Figure 1</bold>
</xref>. Antiplasmodial activity in these families has been relatively well studied, in part due to the presence of the potent antiplasmodial alkaloids, quinine and the isomer quinidine, from the <italic>Cinchona</italic> genus. The phytochemistry of these families has also been relatively well studied, including numerous reports on the presence of alkaloids, for example, (<xref ref-type="bibr" rid="B52">Muhammad et&#xa0;al., 2003</xref>; <xref ref-type="bibr" rid="B73">Suksamrarn et&#xa0;al., 2003</xref>; <xref ref-type="bibr" rid="B26">Federici et&#xa0;al., 2009</xref>; <xref ref-type="bibr" rid="B88">Wong et&#xa0;al., 2011</xref>; <xref ref-type="bibr" rid="B17">Daley and Cordell, 2021</xref>). Furthermore, from an ethnobotanical perspective, these families contain many species which are used traditionally to treat malaria (<xref ref-type="bibr" rid="B48">Milliken et&#xa0;al., 2021</xref>).</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Examples of active antiplasmodial compounds in Apocynaceae, Loganiaceae and Rubiaceae. <bold>(A)</bold> Aspidocarpine from species in the genus <italic>Aspidosperma</italic> Mart. &amp; Zucc. (Apocynaceae). <bold>(B)</bold> Strychnogucine B found in species of the highly diversified genus, <italic>Strychnos</italic> L. (Loganiaceae). <bold>(C)</bold> Quinine, the well-known antimalarial, found in the genus <italic>Cinchona</italic> L. (Rubiaceae). Photos by C&#xe1;ssia Bitencourt <bold>(A)</bold>, Lucas Marinho <bold>(B)</bold> and Alexandre Antonelli <bold>(C)</bold>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1173328-g001.tif"/>
</fig>
<p>Our first aim is to assess whether machine learning models can be trained on plant trait data to predict the antiplasmodial activity of plants. To achieve this, we present a dataset for the three study plant families, quantifying the known antiplasmodial activity of species as well as a broad range of potentially salient predictors of activity, which we will use to train and test machine learning models. We compare the performance of the machine learning models with two ethnobotanical approaches. Our second aim is to highlight the potential of plants to provide novel antiplasmodial compounds. We address this by using the collected data to estimate the number of active antiplasmodial species in the three families and we also explore methods for correcting existing data biases, in order to infer a clearer picture of antiplasmodial activity in Apocynaceae, Loganiaceae and Rubiaceae.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Data collection</title>
<p>Here we use the term &#x2018;trait&#x2019; in a broad sense that encompasses a variety of plant properties and characteristics. We collected a wide range of traits including morphological, biochemical, environmental and geographic features, along with abstract features relating to medicinal usage and common knowledge of plant species. In the following, we provide detail of each of the collected traits. A summary of the collected data and detail of the data collection methods is given in the <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Material</bold>
</xref>.</p>
<sec id="s2_1_1">
<label>2.1.1</label>
<title>Taxonomy</title>
<p>We extracted accepted names of all species of the three families according to the World Checklist of Vascular Plants (WCVP) V7 (<xref ref-type="bibr" rid="B28">Govaerts et&#xa0;al., 2021</xref>), totalling 21,111 species &#x2013; 6,495, 496 and 14,120 from Apocynaceae, Loganiaceae and Rubiaceae respectively. We use <italic>Genus</italic> and <italic>Family</italic> names as categorical traits.</p>
</sec>
<sec id="s2_1_2">
<label>2.1.2</label>
<title>Ethnobotanical data</title>
<p>Due to the documented link between traditional medicinal usage and bioactivity, evidenced in, for example, (<xref ref-type="bibr" rid="B41">Krettli, 2009</xref>), we collected binary traits documenting the presence and absence of known antimalarial usage (<italic>Antimalarial Use</italic>) and general medicinal usage (<italic>Medicinal</italic>). To compile these data we conducted a comprehensive literature review of medicinal usage in the three plant families, along with data provided by the Medicinal Plant Names Services (<xref ref-type="bibr" rid="B51">MPNS, 2022</xref>) and references to medicinal usage on the Plants of the World Online (<xref ref-type="bibr" rid="B60">POWO, 2022</xref>).</p>
<p>As an extension of the ethnobotanical data, we included binary traits to capture whether a plant is commonly known &#x2013; which we approximated by recording the presence of a Wikipedia<xref ref-type="fn" rid="fn1">
<sup>1</sup>
</xref> page (<italic>Wiki Page</italic>) and the existence of a common name (<italic>Common Name</italic>). The existence of Wikipedia pages for species is determined by searching all species, subspecies and varieties (and their synonyms). Common name data are compiled from a variety of sources, outlined in the <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Material</bold>
</xref>, with the majority of the data coming from MPNS and the United States Department of Agriculture Plants Database (<xref ref-type="bibr" rid="B78">USDA, 2022b</xref>).</p>
</sec>
<sec id="s2_1_3">
<label>2.1.3</label>
<title>Phytochemistry</title>
<p>There is much evidence of the pharmacological and pharmaceutical importance of plant-derived alkaloids (<xref ref-type="bibr" rid="B15">Cordell et&#xa0;al., 2001</xref>; <xref ref-type="bibr" rid="B22">Dey et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B34">Howes et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B17">Daley and Cordell, 2021</xref>) and we have therefore collected binary traits on their presence/absence. These data were collected through a comprehensive literature review as well as metabolite data compiled from KNApSAcK (<xref ref-type="bibr" rid="B1">Afendi et&#xa0;al., 2012</xref>).</p>
<p>Though the coverage of the alkaloid data is relatively good (980 species with reported presence or absence), for the vast majority (97%) of these species, reports indicate a presence of alkaloids compared to 3% where alkaloids are absent. This may be the result of reporting bias, where publications are focused on species found to contain alkaloids and absences of alkaloids are not published. To assess the prevalence of the reporting bias, we contacted 11 authors of papers after the year 2000 that solely reported presences to ask if they had found any absences which they did not publish. We received responses from three authors detailing three species where alkaloids had been tested for and not found. Rather than being an issue of reporting bias, it may be the case that the vast majority of species in these families produce alkaloids. There is some evidence for this from studies testing large numbers of species for alkaloids where both presences and absences are reported e.g. (<xref ref-type="bibr" rid="B72">Soto-Sobenis et&#xa0;al., 2001</xref>). In either case, current data on the presence of alkaloids are relatively uninformative and so is not included in the following analysis. Instead, we use the collected data on alkaloids to catalogue which species have been tested for alkaloids. We use these data to create a binary trait (<italic>Tested for Alkaloids</italic>) which we use to analyse the relationship between phytochemical knowledge and knowledge of antiplasmodial activity.</p>
<p>An important plant trait indicating potent bioactivity is the degree to which a plant is toxic. As we aim to capture bioactivity in a broad sense, we compiled data on toxicity to any vertebrate and invertebrate animals. We have included this as a binary trait (<italic>Poisonous</italic>). Poison data were compiled from numerous sources detailing plants considered to be poisonous, outlined in the <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Material</bold>
</xref>, with the majority of the data coming from the LitTox resource (<xref ref-type="bibr" rid="B66">Royal Botanic Gardens, Kew, 2021</xref>).</p>
</sec>
<sec id="s2_1_4">
<label>2.1.4</label>
<title>Morphology</title>
<p>As a major putative role of certain phytochemicals is to protect plants from herbivores (<xref ref-type="bibr" rid="B44">Maldonado et&#xa0;al., 2017</xref>), it is plausible that other defence mechanisms have a relation to bioactivity. Furthermore, certain biologically active compounds (e.g. some diterpene alkaloids) are biosynthesised in particular morphological structures (e.g. plant trichomes/hairs) of certain plants (<xref ref-type="bibr" rid="B76">Tomlinson et&#xa0;al., 2022</xref>). Here we assess the presence of emergences (hairs or spines) which we include as a binary trait (<italic>Emergence</italic>). Emergence data have been collated by Gentianales specialists, supplemented by the TRY plant trait database (<xref ref-type="bibr" rid="B39">Kattge et&#xa0;al., 2020</xref>) and POWO.</p>
<p>Another morphological trait we consider is plant life-form, which may correspond to occurrences of specific phytochemicals (<xref ref-type="bibr" rid="B20">de Almeida et&#xa0;al., 2005</xref>). To facilitate collection and coverage of morphological data, and as life-forms and presence of emergences are often well conserved within genera in these families, we include these traits by using the predominant state at the genus level. As multiple life-forms may appear within a single genus, the life-form data are one-hot encoded giving a set of binary traits (<italic>herb</italic>, <italic>liana</italic>, <italic>succulent</italic>, <italic>shrub</italic>, <italic>subshrub</italic>, <italic>tree</italic>). Life form data were initially retrieved from the WCVP and Flora do Brasil (<xref ref-type="bibr" rid="B35">Jardim Bot&#xe2;nico do Rio de Janeiro, 2022</xref>), then reviewed and modified by Gentianales specialists.</p>
</sec>
<sec id="s2_1_5">
<label>2.1.5</label>
<title>Geographic regions with malaria</title>
<p>To examine the relationship between prevalence of malaria in a given geographic area and the number of tested species, we collected data indicating which species are found in regions where malaria transmission occurs. We identified those regions from various sources, including the World Health Organization Database (<xref ref-type="bibr" rid="B86">WHO, 2022a</xref>) and the World Bank Development Indicators (<xref ref-type="bibr" rid="B75">The World Bank, 2022</xref>) (see <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Material</bold>
</xref> for full details). Regions indicated in these sources were then mapped onto the World Geographical Scheme for Recording Plant Distributions (Level 3) (<xref ref-type="bibr" rid="B12">Brummitt et&#xa0;al., 2001</xref>). We then used the WCVP distribution data to identify which species occur in these malarial regions (either native or introduced), assigned to each species as a binary trait <italic>In Malarial Region</italic>.</p>
</sec>
<sec id="s2_1_6">
<label>2.1.6</label>
<title>Environmental</title>
<p>There is some evidence of environmental impacts on bioactive metabolite concentrations and diversity, for example, (<xref ref-type="bibr" rid="B21">Defossez et&#xa0;al., 2021</xref>). To characterise the environmental niche of species, we followed the methodology of <xref ref-type="bibr" rid="B92">Zu et al. (2021)</xref>. We first extracted geographic occurrence records from the Global Biodiversity Information Facility (GBIF)<xref ref-type="fn" rid="fn2">
<sup>2</sup>
</xref> for each species using the rgbif package (<xref ref-type="bibr" rid="B13">Chamberlain et&#xa0;al., 2022</xref>) in R. Occurrence data from GBIF contain many inconsistencies (<xref ref-type="bibr" rid="B46">Meyer et&#xa0;al., 2016</xref>). Initially we cleaned the data by removing: records collected before 1945, records with no given coordinates or impossible coordinates, records with coordinate uncertainty over 20km, records with rounded coordinates and records where the quantity of species occurrences (individual counts) is zero. Next, using the CoordinateCleaner package in R (<xref ref-type="bibr" rid="B91">Zizka et&#xa0;al., 2019</xref>) we removed: records with zero longitude or latitude, records with equal longitude and latitude, records outside reported country, records within country or province centroids, records in country capitals, records with institutional coordinates and records with GBIF Head Quarters coordinates. Finally, we discarded occurrences where species were reported to be outside of their native or introduced botanical regions according to the WCVP.</p>
<p>We quantified species&#x2019; environmental conditions using a set of 17 soil, climate, and topographic variables essential to plant survival, growth and reproduction. We extracted five soil traits (<italic>nitrogen content</italic>, <italic>pH</italic>, organic carbon stock (<italic>ocs</italic>), <italic>water capacity</italic>) from the SoilGrids database (<xref ref-type="bibr" rid="B29">Hengl et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B59">Poggio et&#xa0;al., 2021</xref>), which were averaged over a 30cm depth, as well as <italic>soil depth</italic> to bedrock. The eight bioclimatic traits we used were (<italic>bio1</italic>, <italic>bio4</italic>, <italic>bio10</italic>, <italic>bio11</italic>, <italic>bio12</italic>, <italic>bio15</italic>, <italic>bio16</italic>, <italic>bio17</italic>); representing temperature (mean annual, seasonality, daily mean of the warmest quarter, daily mean of the coldest quarter), precipitation (annual amount, seasonality, mean monthly amount of the wettest quarter, mean monthly amount of the driest quarter). These were extracted from the CHELSA database V2.1 (<xref ref-type="bibr" rid="B37">Karger et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B38">Karger et&#xa0;al., 2021</xref>). We also extracted the K&#xf6;ppen-Geiger climate classification (<italic>kg mode</italic>) from GloH2O (<xref ref-type="bibr" rid="B5">Beck et&#xa0;al., 2018</xref>). <italic>Elevation</italic> and <italic>breakline elevation</italic> were extracted from GMTED2010 (<xref ref-type="bibr" rid="B18">Danielson and Gesch, 2011</xref>) and <italic>slope</italic> was calculated from the elevation data using the terra package in R (<xref ref-type="bibr" rid="B30">Hijmans, 2022</xref>).</p>
<p>To match the resolution of the occurrence records, all environmental rasters were upscaled to 10 arc-minutes (c. 20 km) using the aggregate function of the terra package and environmental traits were extracted for each species occurrence using the extract function. For the continuous traits, median values were then calculated across all occurrences of each species and for the categorical variable <italic>kg mode</italic> the mode of all occurrences of each species was used. To capture coarse spatial information we also included median <italic>latitude</italic> and <italic>longitude</italic> for each species, calculated from the occurrence records.</p>
</sec>
<sec id="s2_1_7">
<label>2.1.7</label>
<title>Classifying activity</title>
<p>To generate a comprehensive dataset of antiplasmodial activity, we conducted a thorough literature review for details of antiplasmodial tests in Apocynaceae, Loganiaceae and Rubiaceae and assigned activity labels to species based on the available reports of <italic>in vitro</italic> and <italic>in vivo</italic> studies. As with many biological datasets (<xref ref-type="bibr" rid="B6">Bender and Cortes-Ciriano, 2021</xref>), providing class labels is a nontrivial problem as there are many variations on the experiments and methods used for reporting activity. A detailed summary of the designated classification scheme we chose is given in the <xref ref-type="supplementary-material" rid="SM2">
<bold>Supplementary Material</bold>
</xref>. In general, for <italic>in vitro</italic> studies testing activity against <italic>Plasmodium</italic> parasites, the potency of IC50 values for crude extracts follows the definitions given in (<xref ref-type="bibr" rid="B63">Rasoanaivo et&#xa0;al., 2004a</xref>) i.e. &lt; 10<italic>&#x3bc;g</italic>/<italic>ml</italic> is <italic>active</italic> and &#x2265; 10<italic>&#x3bc;g</italic>/<italic>ml</italic> is <italic>inactive</italic>. For tests of isolated compounds, according to the Medicines for Malaria Venture<xref ref-type="fn" rid="fn3">
<sup>3</sup>
</xref> compounds with IC50 values &lt; 1 &#xb5;M are designated as active and of interest for further investigation, thus we use this threshold in our data. For fractions, we use a threshold of 5 <italic>&#x3bc;g</italic>/<italic>ml</italic>, which in general corresponds with published author decisions of activity categories. For <italic>in vivo</italic> studies, we use the published author decisions regarding activity.</p>
</sec>
<sec id="s2_1_8">
<label>2.1.8</label>
<title>(Pseudo)absences</title>
<p>For some traits and datasets, presences are commonly reported but absences are not. For example, there are various datasets listing poisonous plants but published data on &#x2018;safe&#x2019; plants are sparse. In many cases, this is likely a result of reporting bias, however there are multiple possible reasons for this. For certain traits there are presence biases e.g. in the case of poisons, once a plant has been found to be poisonous it can be reported as such; however if a plant is assessed for its toxicity, there are various caveats which limit the ability to confidently say the plant is safe. Examples of such caveats include the effect of extraction or preparation method on toxicity, the specific plant part tested, and which organisms the plant is toxic to. These variables exist in addition to methodological differences in assessing toxicity and also that <italic>in vitro</italic> studies may not correlate with effects <italic>in vivo</italic> (<xref ref-type="bibr" rid="B33">Houghton et&#xa0;al., 2007</xref>).</p>
<p>Where missing data give a strong indication of a genuine absence, i.e. for <italic>Common Name</italic>, <italic>Poisonous</italic>, <italic>Medicinal</italic>, <italic>Wiki Page</italic>, <italic>Antimalarial Use</italic>, <italic>Emergence</italic>, we take these pseudoabsences to be absences and fill missing values with 0. Missing values for other traits are left as NA and, where necessary, will be imputed.</p>
</sec>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Analysing and correcting sampling bias</title>
<p>An obstacle to our analysis is the significant sampling bias in the data. In part this has been created by the <italic>ethnobotanical approach</italic> to drug discovery. In this approach, researchers carry out (or rely on) ethnobotanical surveys that document traditional medicinal uses of plants. Plants used traditionally for malaria are then investigated to determine whether there is any scientific basis (e.g. antiplasmodial activity) that could explain the traditional use. As a result, plants traditionally used for malaria are significantly over-represented in the data on antiplasmodial activity of plant species.</p>
<p>In this section we outline the methods used to evidence the existence of the sampling biases as well as a method we use for correcting sampling bias, which may allow for a better picture of antiplasmodial activity and may be applied when training and evaluating machine learning models. Throughout this paper we use <italic>labelled</italic> to indicate species which have been classified as <italic>Active</italic> or <italic>Inactive</italic> following the scheme described in Section 2.1.7. We use <italic>unlabelled</italic> to indicate species with unknown antiplasmodial activity. The <italic>underlying population</italic> refers to all species in Apocynaceae, Loganiaceae and Rubiaceae.</p>
<p>Firstly, we compare the labelled data with the underlying population by highlighting common choices made by researchers when selecting plants to test for antiplasmodial activity. We then statistically verify the differences using the Chi-squared test (<xref ref-type="bibr" rid="B55">Pearson, 1900</xref>) for the discrete traits and the Kolmogorov&#x2013;Smirnov 2-Sample test (<xref ref-type="bibr" rid="B70">Smirnov, 1939</xref>) for the continuous traits. In order to account for the repetition of multiple tests and the associated family-wise error rate, we adjust the significance thresholds using the Holm-Bonferroni method (<xref ref-type="bibr" rid="B31">Holm, 1979</xref>).</p>
<p>Before describing the bias correction method we have implemented, we first outline our assumptions about the nature of the bias. Let <italic>s</italic> be a binary variable denoting the sampling decision i.e. 1 indicates a sample is in the labelled data and 0 indicates a sample is unlabelled. Given a species with traits <italic>x</italic> and activity label <italic>y</italic>, we assume that the sampling decision, <inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>|</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, is independent of <italic>y</italic> given <italic>x</italic>, <inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>|</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>|</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> i.e. plants are tested without <italic>a priori</italic> knowledge of their activity, <italic>y</italic>, but based on traits, <italic>x</italic>, that might increase the probability of active compounds compared to random sampling. This is commonly known as the missing at random (MAR) assumption (<xref ref-type="bibr" rid="B90">Zadrozny, 2004</xref>).</p>
<p>As described by <xref ref-type="bibr" rid="B16">Cortes et&#xa0;al. (2008)</xref>, we can correct for sampling bias by reweighting the sampled (labelled) data using the inverse of the sampling probability for each sample, <inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo stretchy="false">/</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>|</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
<xref ref-type="fn" rid="fn4">
<sup>4</sup>
</xref>, a technique often referred to as Inverse Probability Weighting. Under this procedure, the reweighted data will resemble the underlying population if <inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>|</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is accurately estimated. As an example in the context of the current study, species which are traditionally used for malaria have a relatively high probability of being tested and as a result are over-represented in the available sample i.e. <inline-formula>
<mml:math display="inline" id="im5">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>|</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is large for these species and so the assigned weight is small.</p>
<p>To predict <inline-formula>
<mml:math display="inline" id="im6">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>|</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, we use a regularised Logistic Regression model, implemented in the scikit-learn Python library (<xref ref-type="bibr" rid="B56">Pedregosa et&#xa0;al., 2011</xref>) which we refer to as the Correction Model. We use such a model to limit overfitting and as Logistic Regression models are generally well calibrated. Given a sample (labelled) dataset and underlying population, instances in the sample dataset are labelled <italic>s</italic> = 1 and instances not in the sample are labelled <italic>s</italic> = 0. The Correction Model is trained to predict <italic>s</italic> from the given traits such that, assuming good calibration, the probability estimates given by the model correspond to <inline-formula>
<mml:math display="inline" id="im7">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>|</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>Prior to training the model, the categorical traits <italic>Genus</italic>, <italic>Family</italic> and <italic>kg mode</italic> are target encoded in the preprocessing step using the category_encoders library (<xref ref-type="bibr" rid="B47">Micci-Barreca, 2001</xref>). The traits are then scaled by removing the mean and scaling to unit variance. Finally, we use the scikit-learn (<xref ref-type="bibr" rid="B56">Pedregosa et&#xa0;al., 2011</xref>) k-Nearest Neighbor imputer to impute any missing values. Missing values of a trait from a given sample are imputed by assigning the mean trait value of the five samples nearest to the given sample, where nearness between two samples is measured with the Euclidean distance using the traits that neither sample is missing.</p>
<p>To verify the accuracy of this bias correction approach, we calculated the mean Brier score (<xref ref-type="bibr" rid="B11">Brier, 1950</xref>) of the predicted probabilities in 10 iterations of 10-fold stratified cross validation. The Brier Score measures the difference between the predicted probability given by the model and the actual label (<italic>s</italic> = 0 or 1). We also visualise the accuracy of the bias correction approach by comparing the means of the traits in the labelled data, underlying population and the bias-corrected labelled data.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Machine learning models</title>
<p>To explore the success of different plant selection approaches and motivate a machine learning based approach to the problem, we train Support Vector (SVC), Logistic Regression (Logit) (<xref ref-type="bibr" rid="B56">Pedregosa et&#xa0;al., 2011</xref>), XGBoost (XGB) (<xref ref-type="bibr" rid="B14">Chen and Guestrin, 2016</xref>) and Bayesian Neural Network (BNN) (<xref ref-type="bibr" rid="B68">Silvestro and Andermann, 2020</xref>) classifiers and compare these with two ethnobotanical approaches: selection based on traditional antimalarial use and selection based on traditional medicinal use not specific for malaria.</p>
<p>As the cost of false positives is relatively high &#x2013; resources will be misallocated in trying to find antiplasmodial compounds in inactive species &#x2013; we aim to maximise <italic>precision</italic> of the models i.e. the proportion of species which are predicted to be active that are correctly predicted. Of course, <italic>recall</italic> (the proportion of active species predicted to be active) is still important as a large list of antiplasmodial species provides more opportunities for finding new antiplasmodial compounds. However, even with very low recall the models will still generate very large lists of antiplasmodial species from the 21,111 species in Apocynaceae, Loganiaceae and Rubiaceae. As a result, we aim to maximise the F-score with <italic>&#x3b2;</italic> = 0.5 (<italic>F</italic>
<sub>0.5</sub>), i.e. the harmonic mean of precision and recall with more importance given to precision. We evaluate the models with this score along with precision, and also provide precision-recall curves.</p>
<p>We evaluate the models using 10 iterations of 10-fold stratified cross validation in two settings. Firstly, we analyse model performance in the usual case, where the models are trained and tested on folds of the given data. We also attempt to estimate model performance on the underlying population by assigning sample weights to the labelled data, using the method discussed in Section 2.2, such that the given labelled data is more representative of the underlying population. In this case, sample weights are used in both training and testing.</p>
<sec id="s2_3_1">
<label>2.3.1</label>
<title>Preprocessing</title>
<p>In the preprocessing step, the categorical traits <italic>Genus</italic>, <italic>Family</italic> and <italic>kg mode</italic> are target encoded. The traits are then scaled by removing the mean and scaling to unit variance. We then use the scikit-learn (<xref ref-type="bibr" rid="B56">Pedregosa et&#xa0;al., 2011</xref>) k-Nearest Neighbor imputer, trained using the training data and the unlabelled data, to impute any missing values. Finally, we use Principal Component Analysis (PCA), implemented in scikit-learn, to reduce the dimensionality of the highly colinear continuous environmental traits. The PCA is trained using the training data and unlabelled data and the number of components used in the PCA is selected such that at least 80% of the variance is explained by the components. The traits <italic>In Malarial Region</italic> and <italic>Tested for Alkaloids</italic> were collected for the analysis of sampling bias rather than as predictive traits and so are not included in the machine learning models.</p>
</sec>
<sec id="s2_3_2">
<label>2.3.2</label>
<title>Training</title>
<p>The Logit, SVC and XGB classifiers are trained as follows. Given a set of training folds and a test fold, hyperparameters of the models are tuned via cross validation on the training data using GridSearchCV (<xref ref-type="bibr" rid="B56">Pedregosa et&#xa0;al., 2011</xref>). In this step, <italic>F</italic>
<sub>0.5</sub> is used as the evaluation metric and we tune a basic list of hyperparameters in order to minimise under/overfitting and to maximise <italic>F</italic>
<sub>0.5</sub>. For the Logit and SVC classifiers, we tune the regularisation parameter C, as well as the class_weight parameter. For the XGB classifier, we tune the max_depth parameter. Once the best hyperparameters for the models have been generated the models are retrained on all the given training data (with/without sample weights depending on the evaluation setting).</p>
<p>For the BNN classifier, we use two layers of 10 and 5 nodes, respectively, and tanh activation function. We train the model through 100,000 Markov chain Monte Carlo iterations, as implemented in npBNN (<xref ref-type="bibr" rid="B68">Silvestro and Andermann, 2020</xref>), with/without sample weights depending on the evaluation setting. We use 1,000 posterior samples of the parameters when generating predictions.</p>
</sec>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Assessing activity in the study families</title>
<p>In order to motivate further exploration of these three plant families as potential sources of new pharmaceuticals, we use the collected data to estimate the antiplasmodial activity of the families in two ways. Firstly, we summarise the proportion of active species in each family using the collected labelled data. As this is likely to be unrepresentative due to the sampling biases, we also provide a summary of the labelled data when the bias is corrected using the method discussed in Section 2.2.</p>
<p>We also use the estimation of <inline-formula>
<mml:math display="inline" id="im8">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>|</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, discussed in Section 2.2, to analyse the existing sampling decision and highlight the wealth of potentially active species that are currently overlooked. First, we compare <inline-formula>
<mml:math display="inline" id="im9">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>|</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> for the known active and inactive species in the labelled data. We then analyse species that are highly unlikely to be tested according to the existing sampling decision and we take these to be species for which <inline-formula>
<mml:math display="inline" id="im10">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>|</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is below the median value in the unlabelled data. We check the known activity of these species and use the machine learning model with the highest precision to provide a conservative estimate of how many of these species are active in the underlying population. The estimate of the number of active species given by the model is corrected using an estimate of the model precision. The model precision estimate is generated from the mean precision given in the cross-validation evaluation and we calculate a 95% bootstrap confidence interval from the precision scores given in each fold of the cross-validation.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Data summary</title>
<sec id="s3_1_1">
<label>3.1.1</label>
<title>Labelled data</title>
<p>Following the scheme for classifying activity described in Section 2.1.7, we designated 132 species as active and 150 species as inactive, providing 282 labelled species from the 21,111 species in Apocynaceae, Loganiaceae and Rubiaceae. In these labelled data, all species are given trait values for each of the traits except for those trait values which rely on GBIF occurrence records where data are missing for five species. </p>
</sec>
<sec id="s3_1_2">
<label>3.1.2</label>
<title>Trait relations</title>
<p>
<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref> provides a brief overview of the collected data, summarising relationships between some of the traits from all the collected data. The heatmap gives a visualisation of the co-occurrences of the binary traits, and the given values correspond to the mean values of traits in the <italic>y</italic> axis when traits in the <italic>x</italic> axis are present, while &#x2018;All Species&#x2019; provides a comparison with mean values of traits in the underlying population. For example, 1% of all species are used traditionally for malaria while 18% of poisonous species are used traditionally for malaria. Similarly 10% of all species are used as traditional medicines while 77% of poisonous species are used as medicines. With regards to activity, the first column provides mean trait values for active species which indicate stark differences with the underlying population (e.g. 52% of active species are poisonous compared to 3% in the underlying population). However, these differences are more a reflection of the sampling biases rather than any strong relationships between the traits and antiplasmodial activity.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Co-occurrence heatmap summarising collected binary traits.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1173328-g002.tif"/>
</fig>
</sec>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Sampling bias</title>
<sec id="s3_2_1">
<label>3.2.1</label>
<title>Comparing the labelled data with the underlying population</title>
<p>The most common feature motivating the selection of plants to test for antiplasmodial activity is traditional knowledge of use for malaria, for example (<xref ref-type="bibr" rid="B3">Andrade-Neto et&#xa0;al., 2003</xref>; <xref ref-type="bibr" rid="B9">Bourdy et&#xa0;al., 2004</xref>; <xref ref-type="bibr" rid="B7">Bertania et&#xa0;al., 2005</xref>; <xref ref-type="bibr" rid="B62">Ramalhete et&#xa0;al., 2008</xref>; <xref ref-type="bibr" rid="B25">Ezike et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B74">Taek et&#xa0;al., 2021</xref>). We found that 48% of labelled species are traditionally used for malaria while only 1% of species in the underlying population are traditionally used for malaria. Similarly, plants are frequently tested based on more general traditional medicinal usage (not specific to malaria), e.g. (<xref ref-type="bibr" rid="B40">Kaushik et&#xa0;al., 2013</xref>; <xref ref-type="bibr" rid="B50">Mothana et&#xa0;al., 2014</xref>; <xref ref-type="bibr" rid="B69">Singh et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B67">Satish et&#xa0;al., 2017</xref>). 77% of labelled species are traditionally used as medicines while 10% of species in the underlying population are traditionally used as medicines.</p>
<p>As previous successes in finding plants with antiplasmodial activity have linked their alkaloid content to the antiplasmodial activity, tests of antiplasmodial activity are often conducted on plants known/expected to contain alkaloids. For example (<xref ref-type="bibr" rid="B89">Wright et&#xa0;al., 1992</xref>; <xref ref-type="bibr" rid="B71">Solis et&#xa0;al., 1995</xref>; <xref ref-type="bibr" rid="B43">Likhitwitayawuid et&#xa0;al., 1999</xref>; <xref ref-type="bibr" rid="B82">Weniger et&#xa0;al., 2001</xref>; <xref ref-type="bibr" rid="B49">Mitaine-Offer et&#xa0;al., 2002</xref>; <xref ref-type="bibr" rid="B26">Federici et&#xa0;al., 2009</xref>). Moreover, in many reports where plants are tested for antiplasmodial activity, those studies also include tests for (and find) alkaloids e.g. (<xref ref-type="bibr" rid="B43">Likhitwitayawuid et&#xa0;al., 1999</xref>; <xref ref-type="bibr" rid="B52">Muhammad et&#xa0;al., 2003</xref>; <xref ref-type="bibr" rid="B73">Suksamrarn et&#xa0;al., 2003</xref>; <xref ref-type="bibr" rid="B88">Wong et&#xa0;al., 2011</xref>). As a result, 69% of labelled species and 82% of active species have been tested for presence of alkaloids, while only 5% of species in the underlying population have been tested for presence of alkaloids.</p>
<p>Another potential factor influencing sampling is the geographic location of species, i.e. plants occurring in regions with malaria are commonly selected to test for antiplasmodial activity, for example (<xref ref-type="bibr" rid="B64">Rasoanaivo et&#xa0;al., 2004b</xref>; <xref ref-type="bibr" rid="B7">Bertania et&#xa0;al., 2005</xref>; <xref ref-type="bibr" rid="B2">Al-Musayeib et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B36">Kantamreddi and Wright, 2012</xref>; <xref ref-type="bibr" rid="B74">Taek et&#xa0;al., 2021</xref>). As a result, 99% of labelled species are found in malarial regions compared to 89% in the underlying population. In fact, there is only one tested species which is not found in a malarial region (<italic>Gardenia urvillei</italic> Montrouz. (Rubiaceae) which is native to New Caledonia) and three <italic>Ochrosia</italic> Juss. (Apocynaceae) species (native to Fiji, Tonga and New Caledonia) whose activity is known through the presence of antiplasmodial compounds (not themselves explicitly tested) which are not found in malarial regions.</p>
<p>It is also common to test plants taxonomically related to known antiplasmodial plants (<xref ref-type="bibr" rid="B81">Weenen et&#xa0;al., 1990</xref>; <xref ref-type="bibr" rid="B27">Fr&#xe9;d&#xe9;rich et&#xa0;al., 2002</xref>; <xref ref-type="bibr" rid="B58">Philippe et&#xa0;al., 2005</xref>; <xref ref-type="bibr" rid="B23">dos Santos Torres et&#xa0;al., 2013</xref>; <xref ref-type="bibr" rid="B10">Brand&#xe3;o et&#xa0;al., 2020</xref>). For example, some genera known to contain active species are frequently tested e.g. <italic>Aspidosperma</italic> (Apocynaceae: Gentianales) (18 labelled species) and <italic>Strychnos</italic> (Loganiaceae: Gentianales) (36 labelled species).</p>
<p>For almost all the quantitative traits, the difference between the labelled data and underlying population (as measured by Chi-squared test for the discrete traits and the Kolmogorov&#x2013;Smirnov 2-Sample test for the continuous traits) is significant (corrected p values &lt; 0.05) with the exception of life-forms (lianas and succulents). The most diverging traits are <italic>Antimalarial Use</italic> and <italic>Tested for Alkaloids</italic> (corrected p values = 0, Chi-squared statistic 3137 and 2218 respectively). We can therefore conclude that the labelled data significantly differ from the underlying population. Overall, it is apparent that the approaches used to select plants for antiplasmodial tests have biased the available data on antiplasmodial activity.</p>
</sec>
<sec id="s3_2_2">
<label>3.2.2</label>
<title>Bias correction</title>
<p>When testing the Correction Model in 10 iterations of 10-fold stratified cross validation, the mean Brier score was 0.0097 (SD = 0.001), indicating an accurate fit to the data and so, a reliable prediction of the selection probability. A visual comparison of the bias-corrected data and the underlying population is given in <xref ref-type="fig" rid="f3">
<bold>Figures&#xa0;3</bold>
</xref>, <xref ref-type="fig" rid="f4">
<bold>4</bold>
</xref>. For readability, the mean values of the continuous traits are rescaled between 0 and 1 using the MinMaxScaler from scikit-learn (<xref ref-type="bibr" rid="B56">Pedregosa et&#xa0;al., 2011</xref>). We can see that for the majority of the traits, the mean values of the corrected data closely resemble the underlying population compared to the values in the labelled data.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Mean values of binary traits in biased and corrected datasets.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1173328-g003.tif"/>
</fig>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Scaled mean values of continuous traits in biased and corrected datasets.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1173328-g004.tif"/>
</fig>
</sec>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Comparing plant selection approaches</title>
<p>Given the quantification of antiplasmodial activity, we may now analyse the effectiveness of different approaches for plant selection &#x2013; random selection, selection based on traditional antimalarial use (Ethno (M)) and selection based on general traditional medicinal use not specific for malaria (Ethno (G)). <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref> provides a summary of the precisions of these methods on the biased and corrected datasets. When plants are selected based on a history of use for malaria or general medicinal usage, they are more likely to be active than selecting plants at random (both in the biased and corrected cases). This result provides some validation for the ethnopharmacological approach and agrees with the findings of (<xref ref-type="bibr" rid="B42">Krettli et&#xa0;al., 2001</xref>). However, in Apocynaceae, Loganiaceae and Rubiaceae, only 281 species have a history of antimalarial usage and 2109 have a history of general medicinal usage which limits the search for new compounds to a relatively small group of plants.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Precision of selection strategies.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left"/>
<th valign="top" align="center">Uncorrected</th>
<th valign="top" align="center">Corrected</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">
<bold>Random</bold>
</td>
<td valign="top" align="left">0.47</td>
<td valign="top" align="left">0.36</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>Ethno (G)</bold>
</td>
<td valign="top" align="left">0.50</td>
<td valign="top" align="left">0.42</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>Ethno (M)</bold>
</td>
<td valign="top" align="left">0.56</td>
<td valign="top" align="left">0.42</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Considering the sampling decision more generally, in the uncorrected case, the value for the &#x2018;Random&#x2019; approach reflects the mean activity of all tested species and provides some quantification of the overall precision of the existing plant selection approach i.e. species selected for testing by researchers have a probability of being active of 0.47, while the estimate of the mean activity of the underlying population is 0.36. Similarly, the mean value of <inline-formula>
<mml:math display="inline" id="im12">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>|</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> for active species in the labelled dataset is 0.53, while for inactive species this value is 0.31.</p>
<sec id="s3_3_1">
<label>3.3.1</label>
<title>Machine learning evaluation</title>
<sec id="s3_3_1_1">
<label>3.3.1.1</label>
<title>Without bias correction</title>
<p>In <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5</bold>
</xref>, we see the performance of the machine learning models compared to the two ethnobotanical approaches. Overall the mean scores of the machine learning models improve on both approaches and indicate that antiplasmodial activity can be predicted relatively accurately from the collected traits (mean precisions &#x2013; BNN: 0.66, XGB: 0.66, Logit: 0.62, SVC: 0.65, Ethno (M): 0.57, Ethno (G): 0.50). The Precision-Recall curves in <xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref>, generated using all test instances in the cross validation, show how varying the classifier thresholds can improve precision at the cost of recall, for example, by increasing the threshold of the models we can achieve a precision of over 0.8 with a recall of approximately 0.2.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Model performance in stratified cross validation without bias correction.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1173328-g005.tif"/>
</fig>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Precision-Recall Curves in stratified cross validation without bias correction.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1173328-g006.tif"/>
</fig>
</sec>
<sec id="s3_3_1_2">
<label>3.3.1.2</label>
<title>Corrected performance</title>
<p>
<xref ref-type="fig" rid="f7">
<bold>Figures&#xa0;7</bold>
</xref>, <xref ref-type="fig" rid="f8">
<bold>8</bold>
</xref> show the estimated performance of the models on the underlying population. Again, though there is higher variance in model performance due to the weights used on the train and test samples, the machine learning models improve on the ethnobotanical approaches. Moreover, above we estimated that the precision of the existing plant selection approach of the field as a whole was 0.47, and our models again compare well with this (mean precisions &#x2013; BNN: 0.59, XGB: 0.63, Logit: 0.66, SVC: 0.67).</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Model performance in stratified cross validation with bias correction of training and testing samples.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1173328-g007.tif"/>
</fig>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Precision-Recall Curves in stratified cross validation with bias correction of training and testing samples.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1173328-g008.tif"/>
</fig>
</sec>
</sec>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Antiplasmodial potential of Apocynaceae, Loganiaceae and Rubiaceae</title>
<p>In <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>, we provide a summary of the proportion of active species in each family. The given labelled data suggest a high level of activity in the families (47%), though when we estimate the activity of the underlying population by correcting for the sampling biases, the proportion is lower (36%). Nevertheless, this estimate indicates that there are approximately 7677 species in these families that may warrant further investigation.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Estimated proportions of active species.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left"/>
<th valign="top" align="center">Uncorrected</th>
<th valign="top" align="center">Corrected</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">
<bold>Apocynaceae</bold>
</td>
<td valign="top" align="left">0.57</td>
<td valign="top" align="left">0.51</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>Loganiaceae</bold>
</td>
<td valign="top" align="left">0.30</td>
<td valign="top" align="left">0.12</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>Rubiaceae</bold>
</td>
<td valign="top" align="left">0.41</td>
<td valign="top" align="left">0.34</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>All</bold>
</td>
<td valign="top" align="left">0.47</td>
<td valign="top" align="left">0.36</td>
</tr>
</tbody>
</table>
</table-wrap>
<sec id="s3_4_1">
<label>3.4.1</label>
<title>Surprises</title>
<p>For those species that we deem highly unlikely to be tested, (<inline-formula>
<mml:math display="inline" id="im13">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>|</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>&lt;</mml:mo>
<mml:mn>0.0014</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>) only 2 such species are in the labelled data where one is known to be active, while 9997 are in the unlabelled data. When the SVC model is trained on all the available data and used to predict the activity of these species in the unlabelled data, 2358 are estimated to be active. This gives a 95% confidence interval of 1300 &#x2013; 1522 active species when the model precision is accounted for. Note that this is a conservative approximation as we are only considering species that the model predicts to be active and correcting for the estimated false positives. However, as visible in the Precision-Recall curves, recall of the models is not perfect and it is highly likely that there are also a significant number of species that the model predicts to be inactive species which are in fact active.</p>
</sec>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>In this study we have shown that machine learning models based on plant traits can be effective at selecting active antiplasmodial plants. Moreover, as the machine learning models output a classification confidence for each sample, researchers searching for active species may select samples which are labelled as active with most confidence by the models. The Precision-Recall curves in Section 3.3.1 indicate that such an approach could yield a large number of active species with a precision of at least 0.8.</p>
<p>We have also extensively considered sampling biases in the data, an issue that exists in botany (<xref ref-type="bibr" rid="B46">Meyer et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B80">Visscher et&#xa0;al., 2022</xref>) and biological sciences more generally (<xref ref-type="bibr" rid="B6">Bender and Cortes-Ciriano, 2021</xref>). We have used a bias correction method to provide a more accurate representation of the properties of the underlying data and a more robust evaluation of plant selection methods. We hope that by tackling sampling bias in our particular context we raise awareness of this issue in botany more widely and highlight potential solutions to this problem.</p>
<p>Our results suggest that there are a large number of species (approximately 7677) in Apocynaceae, Loganiaceae and Rubiaceae with antiplasmodial potential while only 281 species have a history of antimalarial usage. Furthermore, of those species we deem highly unlikely to be investigated, we estimate at least 1300 untested species to be active. These results indicate a vast and relatively untapped source to accelerate the search for new plant-derived antiplasmodial compounds.</p>
<p>We have so far explored the potential of machine learning in predicting antiplasmodial activity. However, activity is not the only metric to evaluate useful medicinal plants. For example, useful active compounds found in plants will ideally also be more selective for <italic>Plasmodium</italic> parasites and less toxic to human cells. Plants used traditionally as oral preparations, which have a long history of use, may give some indication of their safety and/or possible selectivity, which is a potential benefit of selecting traditionally used plants. Moreover, our machine learning approach does not yet provide any indication of which plant parts contain the active compounds, and which extraction methods optimise their concentrations; in contrast to traditional preparations that specify plant parts and methods for their preparation. Nevertheless, finding active antiplasmodial plants is still a critical step in the search for new antiplasmodial plant-derived compounds with potential lead structures/pharmacophores to facilitate future drug discovery for malaria. The urgent need to find new antimalarial drugs exists against a backdrop of escalating resistance to existing antimalarial drugs (<xref ref-type="bibr" rid="B79">Uwimana et&#xa0;al., 2020</xref>), and in the context that the WHO&#x2019;s Global Technical Strategy for Malaria (2016 &#x2013; 2030) aims to ensure universal access to malaria prevention, diagnosis and treatment, an aim that is supported through harnessing innovation and expanding research (<xref ref-type="bibr" rid="B84">WHO, 2017</xref>).</p>
<p>In summary, we show that trait data-based machine learning models can outperform existing ethnobotanical plant selection approaches to find species with antiplasmodial activity, and provide a novel approach underpinning future work to predict the bioactivity of plant species. Plants are a known source of lead compounds for pharmaceutical drug development (<xref ref-type="bibr" rid="B34">Howes et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B53">Newman and Cragg, 2020</xref>) and more strategic and efficient approaches are needed to facilitate future drug discovery, particularly considering that there are an estimated 343,000 known vascular plant species (<xref ref-type="bibr" rid="B28">Govaerts et&#xa0;al., 2021</xref>) that remain largely unexplored scientifically. This study highlights the potential of integrating ethnobotanical knowledge with technological advances. While such integration creates promising opportunities, we stress the need that any material and non-material benefits are shared fairly and equitably with knowledge holders and stewards of plant diversity around the world (<xref ref-type="bibr" rid="B4">Antonelli, 2023</xref>). By exploring sustainable uses of biodiversity, societies are more likely to reach the ambitious goals and targets set under the recently established Kunming-Montreal Global Biodiversity Framework.</p>
<sec id="s4_1">
<label>4.1</label>
<title>Related work and novelty</title>
<p>In this paper we have presented and evaluated a novel approach based on plant traits to predict the antiplasmodial activity of plants. Though there is some related work, e.g. predicting antiplasmodial activity of compounds (<xref ref-type="bibr" rid="B24">Egieyeh et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B19">Danishuddin et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B8">Bosc et&#xa0;al., 2021</xref>), predicting potential antiplasmodial plants using traditional antimalarial <italic>usage</italic> as a proxy (<xref ref-type="bibr" rid="B57">Pellicer et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B48">Milliken et&#xa0;al., 2021</xref>), predicting other related measures of bioactivity (<xref ref-type="bibr" rid="B65">R&#xf8;nsted et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B44">Maldonado et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B32">Holzmeyer et&#xa0;al., 2020</xref>); we believe ours is the first to predict antiplasmodial activity of plants directly based on a combination of plant trait data.</p>
<p>In order to predict the antiplasmodial activity of plants, we have generated a comprehensive resource of plant traits and documented antiplasmodial activity for plants in the Apocynaceae, Loganiaceae and Rubiaceae families. With regards to antiplasmodial activity, the closest available datasets we were able to find detailing antiplasmodial plants were the metabolite and biological activity data from KNApSAcK (<xref ref-type="bibr" rid="B1">Afendi et&#xa0;al., 2012</xref>) and Dr. Duke&#x2019;s Phytochemical and Ethnobotanical Databases (DPED) (<xref ref-type="bibr" rid="B77">USDA, 2022a</xref>). In an attempt to utilise the KNApSAcK data, we extracted information on known antiplasmodial metabolites from KNApSAcK and using the KNApSAcK database, were able to match these to plants which contain these compounds. Similarly, we downloaded the list of antiplasmodial plants in DPED and filtered the results to the study families. We found these data to be limited. Firstly, in both cases, the data are limited to antiplasmodial activity of specific compounds rather than antiplasmodial fractions or extracts from plants. Secondly, the coverage of the data is poor (from KNApSAcK: one active species in Apocynaceae, one in Loganiaceae and four in Rubiaceae; from DPED: 19 active species in Apocynaceae, one in Loganiaceae and ten in Rubiaceae). Also, though KNApSAcK and DPED provide references to the original research, it is not clear exactly what criteria are used to determine when a compound is an active antiplasmodial and in DPED many of the cases of &#x2018;active&#x2019; species were due to presence of compounds with weak activity (e.g. lupeol, rutin, quercetin and betulinic acid). Finally, from these kind of data, it is difficult to ascertain with confidence which plants are inactive.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Future work</title>
<p>We have shown that the collected trait data can be used to predict antiplasmodial activity with machine learning approaches and basic preprocessing steps. However, though we have used a bias correction method to improve evaluation of the plant selection approaches, we recognise that the models must be tested on the underlying population in order to obtain a true measure of model performance. We hope to address this in future work by using the machine learning models to predict active species in the underlying population and assessing the activity of these predicted species in new antiplasmodial assays. Regarding training of the models, as we have seen, the antiplasmodial activity is known for only 282 species, resulting in a relatively small dataset for training machine learning models. We believe that small improvements in the existing data could further improve performance of the machine learning approaches, and, where possible, we therefore encourage further testing of species that are currently underrepresented in the existing data.</p>
</sec>
</sec>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Material</bold>
</xref>. All finalised trait data and analysis are archived in <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.7836732">https://doi.org/10.5281/zenodo.7836732</ext-link>. Further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s6" sec-type="author-contributions">
<title>Author contributions</title>
<p>M-JRH, OP-E, EL, JR and AA conceptualized the study. AR-B, CB, DG, EL, M-JRH, CA, DR, IO and SP collated data and provided specialist input on datasets. DS provided specialist input on the machine learning methodology. CW provided specialist input on antiplasmodial activity. AR-B collated data, conducted analyses and drafted the original manuscript. All authors participated in writing and giving feedback on the manuscript. All authors have read and approved the final manuscript.</p>
</sec>
</body>
<back>
<sec id="s7" sec-type="funding-information">
<title>Funding</title>
<p>The authors would like to thank the individuals who have generously funded this project. DS received funding from the Swiss National Science Foundation (PCEFP3_187012) and the Swedish Research Council (VR: 2019-04739). DS and AA acknowledge funding from the Swedish Foundation for Strategic Environmental Research MISTRA within the framework of the research programme BIOPATH (F 2022/1448). AA further acknowledges financial support from the Swedish Research Council (2019-05191) and the Royal Botanic Gardens, Kew.</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>The authors thank Dr. Bob Allkin and the MPNS team for use of necessary datasets from RBG Kew (<xref ref-type="bibr" rid="B51">MPNS, 2022</xref>) and Dr. Elizabeth Dauncey for useful discussions on poisonous plant data and access to the LitTox resource (<xref ref-type="bibr" rid="B66">Royal Botanic Gardens, Kew, 2021</xref>).</p>
</ack>
<sec id="s8" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s9" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpls.2023.1173328/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpls.2023.1173328/full#supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet_1.zip" id="SM1" mimetype="application/zip"/>
<supplementary-material xlink:href="DataSheet_2.pdf" id="SM2" mimetype="application/pdf"/>
</sec>
<fn-group>
<fn id="fn1">
<label>1</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://www.wikipedia.org/">https://www.wikipedia.org/</ext-link> accessed on 14 Apr. 2022</p>
</fn>
<fn id="fn2">
<label>2</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://www.gbif.org/what-is-gbif">https://www.gbif.org/what-is-gbif</ext-link>
</p>
</fn>
<fn id="fn3">
<label>3</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://www.mmv.org/20th-call-proposals">https://www.mmv.org/20th-call-proposals</ext-link> accessed on 30 Aug. 2022.</p>
</fn>
<fn id="fn4">
<label>4</label>
<p>The constant <italic>P</italic> (<italic>s</italic> = 1) is omitted.</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Afendi</surname> <given-names>F. M.</given-names>
</name>
<name>
<surname>Okada</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Yamazaki</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Hirai-Morita</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Nakamura</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Nakamura</surname> <given-names>K.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>KNApSAcK family databases: integrated metabolite&#x2013;plant species databases for multifaceted plant research</article-title>. <source>Plant Cell Physiol.</source> <volume>53</volume>, <fpage>e1</fpage>&#x2013;<lpage>e1</lpage>. doi: <pub-id pub-id-type="doi">10.1093/pcp/pcr165</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Al-Musayeib</surname> <given-names>N. M.</given-names>
</name>
<name>
<surname>Mothana</surname> <given-names>R. A.</given-names>
</name>
<name>
<surname>Al-Massarani</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Matheeussen</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Cos</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Maes</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Study of the in vitro antiplasmodial, antileishmanial and antitrypanosomal activities of medicinal plants from Saudi Arabia</article-title>. <source>Molecules</source> <volume>17</volume>, <fpage>11379</fpage>&#x2013;<lpage>11390</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/molecules171011379</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Andrade-Neto</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Brand&#xe3;o</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Stehmann</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Oliveira</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Krettli</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Antimalarial activity of cinchona-like plants used to treat fever and malaria in Brazil</article-title>. <source>J. Ethnopharmacology</source> <volume>87</volume>, <fpage>253</fpage>&#x2013;<lpage>256</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S0378-8741(03)00141-7</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Antonelli</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Indigenous knowledge is key to sustainable food systems</article-title>. <source>Nature</source> <volume>613</volume>, <fpage>239</fpage>&#x2013;<lpage>242</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/d41586-023-00021-4</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Beck</surname> <given-names>H. E.</given-names>
</name>
<name>
<surname>Zimmermann</surname> <given-names>N. E.</given-names>
</name>
<name>
<surname>McVicar</surname> <given-names>T. R.</given-names>
</name>
<name>
<surname>Vergopolan</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Berg</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Wood</surname> <given-names>E. F.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Present and future k&#xf6;ppen-Geiger climate classification maps at 1-km resolution</article-title>. <source>Sci. Data</source> <volume>5</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. doi: <pub-id pub-id-type="doi">10.1038/sdata.2018.214</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bender</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Cortes-Ciriano</surname> <given-names>I.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Artificial intelligence in drug discovery: what is realistic, what are illusions? part 2: a discussion of chemical and biological data</article-title>. <source>Drug Discov. Today</source> <volume>26</volume>, <fpage>1040</fpage>&#x2013;<lpage>1052</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.drudis.2020.11.037</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bertania</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Bourdyb</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Landaua</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Robinsonc</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Esterred</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Deharo</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Evaluation of French Guiana traditional antimalarial remedies</article-title>. <source>J. Ethnopharmacology</source> <volume>98</volume>, <fpage>45</fpage>&#x2013;<lpage>54</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jep.2004.12.020</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bosc</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Felix</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Arcila</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Mendez</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Saunders</surname> <given-names>M. R.</given-names>
</name>
<name>
<surname>Green</surname> <given-names>D. V. S.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>MAIP: a web service for predicting blood-stage malaria inhibitors</article-title>. <source>J. Cheminformatics</source> <volume>13</volume>, <elocation-id>13</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13321-021-00487-2</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bourdy</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Oporto</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Gimenez</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Deharo</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>A search for natural bioactive compounds in Bolivia through a multidisciplinary approach</article-title>. <source>J. Ethnopharmacology</source> <volume>93</volume>, <fpage>269</fpage>&#x2013;<lpage>277</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jep.2004.03.045</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brand&#xe3;o</surname> <given-names>D. L.</given-names>
</name>
<name>
<surname>d.</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Martins</surname> <given-names>M. T.</given-names>
</name>
<name>
<surname>Silva</surname> <given-names>A. O.</given-names>
</name>
<name>
<surname>Almeida</surname> <given-names>A. D.</given-names>
</name>
<name>
<surname>d.</surname> <given-names>R. C.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Anti-malarial activity and toxicity of aspidosperma nitidum benth: a plant used in traditional medicine in the Brazilian Amazon</article-title>. <source>Research Soc. Dev.</source> <volume>9</volume>, <elocation-id>e5059108817</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.33448/rsd-v9i10.8817</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brier</surname> <given-names>G. W.</given-names>
</name>
</person-group> (<year>1950</year>). <article-title>Verification of forecasts expressed in terms of probability</article-title>. <source>Monthly weather Rev.</source> <volume>78</volume>, <fpage>1</fpage>&#x2013;<lpage>3</lpage>.</citation>
</ref>
<ref id="B12">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Brummitt</surname> <given-names>R. K.</given-names>
</name>
<name>
<surname>Pando</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Hollis</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Brummitt</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2001</year>). <source>World geographical scheme for recording plant distributions</source> Vol. <volume>951</volume> (<publisher-loc>Hunt Institute for Botanical Documentation, Carnegie Mellon University, Pittsburgh</publisher-loc>: <publisher-name>International working group on taxonomic databases for plant sciences (TDWG)</publisher-name>).</citation>
</ref>
<ref id="B13">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Chamberlain</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Barve</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Mcglinn</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Oldoni</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Desmet</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Geffert</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <source>Rgbif: interface to the global biodiversity information facility API</source>.</citation>
</ref>
<ref id="B14">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Guestrin</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>XGBoost: a scalable tree boosting system</article-title>,&#x201d; in <conf-name>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name> (<publisher-loc>New York, NY, USA</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name>), <fpage>785</fpage>&#x2013;<lpage>794</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1145/2939672.2939785</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cordell</surname> <given-names>G. A.</given-names>
</name>
<name>
<surname>Quinn-Beattie</surname> <given-names>M. L.</given-names>
</name>
<name>
<surname>Farnsworth</surname> <given-names>N. R.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>The potential of alkaloids in drug discovery</article-title>. <source>Phytotherapy Res.</source> <volume>15</volume>, <fpage>183</fpage>&#x2013;<lpage>205</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/ptr.890</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Cortes</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Mohri</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Riley</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Rostamizadeh</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2008</year>). &#x201c;<article-title>Sample selection bias correction theory</article-title>,&#x201d; in <source>Algorithmic learning theory</source>, vol. <volume>5254</volume>. (<publisher-loc>Berlin, Heidelberg</publisher-loc>: <publisher-name>Springer Berlin Heidelberg</publisher-name>), <fpage>38</fpage>&#x2013;<lpage>53</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-3-540-87987-9</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Daley</surname> <given-names>S.-k.</given-names>
</name>
<name>
<surname>Cordell</surname> <given-names>G. A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Alkaloids in contemporary drug discovery to meet global disease needs</article-title>. <source>Molecules</source> <volume>26</volume>, <elocation-id>3800</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/molecules26133800</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Danielson</surname> <given-names>J. J.</given-names>
</name>
<name>
<surname>Gesch</surname> <given-names>D. B.</given-names>
</name>
</person-group> (<year>2011</year>). <source> Global multi-resolution terrain elevation data 2010 (GMTED2010): U.S</source>. Geological Survey Open-File Report 2011&#x2013;1073, <fpage>26</fpage> p.</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Danishuddin</surname>
</name>
<name>
<surname>Madhukar</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Malik</surname> <given-names>M. Z.</given-names>
</name>
<name>
<surname>Subbarao</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Development and rigorous validation of antimalarial predictive models using machine learning approaches</article-title>. <source>SAR QSAR Environ. Res.</source> <volume>30</volume>, <fpage>543</fpage>&#x2013;<lpage>560</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/1062936X.2019.1635526</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>de Almeida</surname> <given-names>C.</given-names>
</name>
<name>
<surname>de Lima e Silva</surname> <given-names>T.</given-names>
</name>
<name>
<surname>de Amorim</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Maia</surname> <given-names>M.</given-names>
</name>
<name>
<surname>d.</surname> <given-names>S.</given-names>
</name>
<name>
<surname>de Albuquerque</surname> <given-names>U.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Life strategy and chemical composition as predictors of the selection of medicinal plants from the caatinga (Northeast Brazil)</article-title>. <source>J. Arid Environments</source> <volume>62</volume>, <fpage>127</fpage>&#x2013;<lpage>142</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jaridenv.2004.09.020</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Defossez</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Pitteloud</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Descombes</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Glauser</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Allard</surname> <given-names>P.-M.</given-names>
</name>
<name>
<surname>Walker</surname> <given-names>T. W. N.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Spatial and evolutionary predictability of phytochemical diversity</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>118</volume>, <elocation-id>e2013344118</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1073/pnas.2013344118</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Dey</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Kundu</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Kumar</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Gupta</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>B. M.</given-names>
</name>
<name>
<surname>Bhakta</surname> <given-names>T.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). &#x201c;<article-title>Analysis of alkaloids (indole alkaloids, isoquinoline alkaloids, tropane alkaloids)</article-title>,&#x201d; in <source>Recent advances in natural products analysis</source> (<publisher-name>Elsevier</publisher-name>), <fpage>505</fpage>&#x2013;<lpage>567</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/B978-0-12-816455-6.00015-9</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>dos Santos Torres</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Silveira</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Rocha e Silva</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Lima</surname> <given-names>E.</given-names>
</name>
<name>
<surname>de Vasconcellos</surname> <given-names>M.</given-names>
</name>
<name>
<surname>de Andrade Uchoa</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2013</year>). <article-title>Chemical composition of aspidosperma ulei markgr. and antiplasmodial activity of selected indole alkaloids</article-title>. <source>Molecules</source> <volume>18</volume>, <fpage>6281</fpage>&#x2013;<lpage>6297</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/molecules18066281</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Egieyeh</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Syce</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Malan</surname> <given-names>S. F.</given-names>
</name>
<name>
<surname>Christoffels</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Predictive classifier models built from natural products with antimalarial bioactivity using machine learning approach</article-title>. <source>PloS One</source> <volume>13</volume>, <fpage>1</fpage>&#x2013;<lpage>15</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0204644</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ezike</surname> <given-names>A. C.</given-names>
</name>
<name>
<surname>Okonkwo</surname> <given-names>C. H.</given-names>
</name>
<name>
<surname>Akah</surname> <given-names>P. A.</given-names>
</name>
<name>
<surname>Okoye</surname> <given-names>T. C.</given-names>
</name>
<name>
<surname>Nworu</surname> <given-names>C. S.</given-names>
</name>
<name>
<surname>Mbaoji</surname> <given-names>F. N.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>). <article-title>Landolphia owariensis leaf extracts reduce parasitemia in <italic>Plasmodium berghei-</italic> infected mice</article-title>. <source>Pharm. Biol.</source> <volume>54</volume>, <fpage>2017</fpage>&#x2013;<lpage>2025</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3109/13880209.2016.1138970</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Federici</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Palazzino</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Nicoletti</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Galeffi</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Antiplasmodial activity of the alkaloids of <italic>Peschiera fuchsiaefolia</italic>
</article-title>. <source>Planta Med.</source> <volume>66</volume>, <fpage>93</fpage>&#x2013;<lpage>95</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1055/s-0029-1243122</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fr&#xe9;d&#xe9;rich</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Jacquier</surname> <given-names>M.-J.</given-names>
</name>
<name>
<surname>Th&#xe9;penier</surname> <given-names>P.</given-names>
</name>
<name>
<surname>De Mol</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Tits</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Philippe</surname> <given-names>G.</given-names>
</name>
<etal/>
</person-group>. (<year>2002</year>). <article-title>Antiplasmodial activity of alkaloids from various <italic>Strychnos</italic> species</article-title>. <source>J. Natural Products</source> <volume>65</volume>, <fpage>1381</fpage>&#x2013;<lpage>1386</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/np020070e</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Govaerts</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Nic Lughadha</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Black</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Turner</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Paton</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>The world checklist of vascular plants, a continuously updated resource for exploring global plant diversity</article-title>. <source>Sci. Data</source> <volume>8</volume>, <fpage>1</fpage>&#x2013;<lpage>10</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41597-021-00997-6</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hengl</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Mendes de Jesus</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Heuvelink</surname> <given-names>G. B.</given-names>
</name>
<name>
<surname>Ruiperez Gonzalez</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Kilibarda</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Blagoti&#x107;</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>SoilGrids250m: global gridded soil information based on machine learning</article-title>. <source>PloS One</source> <volume>12</volume>, <fpage>e0169748</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pone.0169748</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Hijmans</surname> <given-names>R. J.</given-names>
</name>
</person-group> (<year>2022</year>). <source>Terra: spatial data analysis</source>.</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Holm</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>1979</year>). <article-title>A simple sequentially rejective multiple test procedure</article-title>. <source>Scandinavian J. Stat</source> <volume>6</volume>, <fpage>65</fpage>&#x2013;<lpage>70</lpage>.</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Holzmeyer</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Hartig</surname> <given-names>A.-K.</given-names>
</name>
<name>
<surname>Franke</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Brandt</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Muellner-Riehl</surname> <given-names>A. N.</given-names>
</name>
<name>
<surname>Wessjohann</surname> <given-names>L. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Evaluation of plant sources for antiinfective lead compound discovery by correlating phylogenetic, spatial, and bioactivity data</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>117</volume>, <fpage>12444</fpage>&#x2013;<lpage>12451</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1073/pnas.1915277117</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Houghton</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Howes</surname> <given-names>M.-J. R.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Steventon</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Uses and abuses of <italic>in vitro</italic> tests in ethnopharmacology: visualizing an elephant</article-title>. <source>J. Ethnopharmacology</source> <volume>110</volume>, <fpage>391</fpage>&#x2013;<lpage>400</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jep.2007.01.032</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Howes</surname> <given-names>M.-J. R.</given-names>
</name>
<name>
<surname>Quave</surname> <given-names>C. L.</given-names>
</name>
<name>
<surname>Collemare</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Tatsis</surname> <given-names>E. C.</given-names>
</name>
<name>
<surname>Twilley</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Lulekal</surname> <given-names>E.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Molecules from nature: reconciling biodiversity conservation and global healthcare imperatives for sustainable use of medicinal plants and fungi</article-title>. <source>Plants People Planet</source> <volume>2</volume>, <fpage>463</fpage>&#x2013;<lpage>481</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/ppp3.10138</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="web">
<person-group person-group-type="author">
<collab>Jardim Bot&#xe2;nico do Rio de Janeiro</collab>
</person-group> (<year>2022</year>) <source>Flora do brasil</source>. Available at: <uri xlink:href="http://floradobrasil.jbrj.gov.br/">http://floradobrasil.jbrj.gov.br/</uri>.</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kantamreddi</surname> <given-names>V. S. S.</given-names>
</name>
<name>
<surname>Wright</surname> <given-names>C. W.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Screening Indian plant species for antiplasmodial properties - ethnopharmacological compared with random selection</article-title>. <source>Phytotherapy Res.</source> <volume>26</volume>, <fpage>1793</fpage>&#x2013;<lpage>1799</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/ptr.4651</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Karger</surname> <given-names>D. N.</given-names>
</name>
<name>
<surname>Conrad</surname> <given-names>O.</given-names>
</name>
<name>
<surname>B&#xf6;hner</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Kawohl</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Kreft</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Soria-Auza</surname> <given-names>R. W.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Climatologies at high resolution for the earth&#x2019;s land surface areas</article-title>. <source>Sci. Data</source> <volume>4</volume>, <fpage>170122</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/sdata.2017.122</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Karger</surname> <given-names>D. N.</given-names>
</name>
<name>
<surname>Conrad</surname> <given-names>O.</given-names>
</name>
<name>
<surname>B&#xf6;hner</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Kawohl</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Kreft</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Soria-Auza</surname> <given-names>R. W.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <source>Climatologies at high resolution for the earth&#x2019;s land surface areas</source>. <publisher-name>EnviDat</publisher-name>. doi:&#xa0;<pub-id pub-id-type="doi">10.16904/envidat.228.v2.1</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kattge</surname> <given-names>J.</given-names>
</name>
<name>
<surname>B&#xf6;nisch</surname> <given-names>G.</given-names>
</name>
<name>
<surname>D&#xed;az</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Lavorel</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Prentice</surname> <given-names>I. C.</given-names>
</name>
<name>
<surname>Leadley</surname> <given-names>P.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>TRY plant trait database&#x2013;enhanced coverage and open access</article-title>. <source>Global Change Biol.</source> <volume>26</volume>, <fpage>119</fpage>&#x2013;<lpage>188</lpage>. doi: <pub-id pub-id-type="doi">10.1111/gcb.14904</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kaushik</surname> <given-names>N. K.</given-names>
</name>
<name>
<surname>Bagavan</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Rahuman</surname> <given-names>A. A.</given-names>
</name>
<name>
<surname>Mohanakrishnan</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Kamaraj</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Elango</surname> <given-names>G.</given-names>
</name>
<etal/>
</person-group>. (<year>2013</year>). <article-title>Antiplasmodial potential of selected medicinal plants from Eastern ghats of south India</article-title>. <source>Exp. Parasitol.</source> <volume>134</volume>, <fpage>26</fpage>&#x2013;<lpage>32</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.exppara.2013.01.021</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krettli</surname> <given-names>A. U.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Antimalarial drug discovery: screening of Brazilian medicinal plants and purified compounds</article-title>. <source>Expert Opin. Drug Discovery</source> <volume>4</volume>, <fpage>95</fpage>&#x2013;<lpage>108</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1517/17530050802678127</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krettli</surname> <given-names>A. U.</given-names>
</name>
<name>
<surname>Andrade-Neto</surname> <given-names>V. F.</given-names>
</name>
<name>
<surname>Brand&#xe3;o</surname> <given-names>M.</given-names>
</name>
<name>
<surname>d.</surname> <given-names>G. L.</given-names>
</name>
<name>
<surname>Ferrari</surname> <given-names>W. M.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>The search for new antimalarial drugs from plants used to treat fever and malaria or plants ramdomly selected: a review</article-title>. <source>Mem&#xf3;rias do Instituto Oswaldo Cruz</source> <volume>96</volume>, <fpage>1033</fpage>&#x2013;<lpage>1042</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1590/S0074-02762001000800002</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Likhitwitayawuid</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Dej-adisai</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Jongbunprasert</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Krungkrai</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>Antimalarials from stephania venosa, prismatomeris sessiliflora, diospyros montana and murraya siamensis</article-title>. <source>Planta Med.</source> <volume>65</volume>, <fpage>754</fpage>&#x2013;<lpage>756</lpage>. doi: <pub-id pub-id-type="doi">10.1055/s-2006-960858</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maldonado</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Barnes</surname> <given-names>C. J.</given-names>
</name>
<name>
<surname>Cornett</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Holmfred</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Hansen</surname> <given-names>S. H.</given-names>
</name>
<name>
<surname>Persson</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Phylogeny predicts the quantity of antimalarial alkaloids within the iconic yellow cinchona bark (Rubiaceae: cinchona calisaya)</article-title>. <source>Front. Plant Sci.</source> <volume>8</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2017.00391</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Meshnick</surname> <given-names>S. R.</given-names>
</name>
<name>
<surname>Dobson</surname> <given-names>M. J.</given-names>
</name>
</person-group> (<year>2001</year>). &#x201c;<article-title>The history of antimalarial drugs</article-title>,&#x201d; in <source>Antimalarial chemotherapy</source> (<publisher-loc>New Jersey</publisher-loc>: <publisher-name>Humana Press</publisher-name>), <fpage>15</fpage>&#x2013;<lpage>25</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1385/1-59259-111-6:15</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meyer</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Weigelt</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Kreft</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Multidimensional biases, gaps and uncertainties in global plant occurrence information</article-title>. <source>Ecol. Lett.</source> <volume>19</volume>, <fpage>992</fpage>&#x2013;<lpage>1006</lpage>. doi: <pub-id pub-id-type="doi">10.1111/ele.12624</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Micci-Barreca</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>A preprocessing scheme for high-cardinality categorical attributes in classification and prediction problems</article-title>. <source>ACM SIGKDD Explor. Newslett.</source> <volume>3</volume>, <fpage>27</fpage>&#x2013;<lpage>32</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1145/507533.507538</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Milliken</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Walker</surname> <given-names>B. E.</given-names>
</name>
<name>
<surname>Howes</surname> <given-names>M.-J. R.</given-names>
</name>
<name>
<surname>Forest</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Nic Lughadha</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Plants used traditionally as antimalarials in Latin America: mining the tree of life for potential new medicines</article-title>. <source>J. Ethnopharmacology</source> <volume>279</volume>, <elocation-id>114221</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jep.2021.114221</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mitaine-Offer</surname> <given-names>A.-C.</given-names>
</name>
<name>
<surname>Sauvain</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Valentin</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Callapa</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Malli&#xe9;</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Z&#xe8;ches-Hanrot</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Antiplasmodial activity of aspidosperma indole alkaloids</article-title>. <source>Phytomedicine</source> <volume>9</volume>, <fpage>142</fpage>&#x2013;<lpage>145</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1078/0944-7113-00094</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mothana</surname> <given-names>R. A.</given-names>
</name>
<name>
<surname>Al-Musayeib</surname> <given-names>N. M.</given-names>
</name>
<name>
<surname>Al-Ajmi</surname> <given-names>M. F.</given-names>
</name>
<name>
<surname>Cos</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Maes</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Evaluation of the <italic>In vitro</italic> antiplasmodial, antileishmanial, and antitrypanosomal activity of medicinal plants used in Saudi and Yemeni traditional medicine</article-title>. <source>Evidence-Based Complementary Altern. Med.</source> <volume>2014</volume>, <fpage>1</fpage>&#x2013;<lpage>7</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1155/2014/905639</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="web">
<person-group person-group-type="author">
<collab>MPNS</collab>
</person-group> (<year>2022</year>) <source>Medicinal plant names services, version 11</source> (<publisher-name>Royal Botanic Gardens, Kew</publisher-name>) (Accessed <access-date>18/01/2022</access-date>).</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Muhammad</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Dunbar</surname> <given-names>D. C.</given-names>
</name>
<name>
<surname>Khan</surname> <given-names>S. I.</given-names>
</name>
<name>
<surname>Tekwani</surname> <given-names>B. L.</given-names>
</name>
<name>
<surname>Bedir</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Takamatsu</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2003</year>). <article-title>Antiparasitic alkaloids from <italic>Psychotria klugii</italic>
</article-title>. <source>J. Natural Products</source> <volume>66</volume>, <fpage>962</fpage>&#x2013;<lpage>967</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/np030086k</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Newman</surname> <given-names>D. J.</given-names>
</name>
<name>
<surname>Cragg</surname> <given-names>G. M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Natural products as sources of new drugs over the nearly four decades from 01/1981 to 09/2019</article-title>. <source>J. Natural Products</source> <volume>83</volume>, <fpage>770</fpage>&#x2013;<lpage>803</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/acs.jnatprod.9b01285</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ocan</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Akena</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Nsobya</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Kamya</surname> <given-names>M. R.</given-names>
</name>
<name>
<surname>Senono</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Kinengyere</surname> <given-names>A. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Persistence of chloroquine resistance alleles in malaria endemic countries: a systematic review of burden and risk factors</article-title>. <source>Malaria J.</source> <volume>18</volume>, <fpage>1</fpage>&#x2013;<lpage>15</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12936-019-2716-z</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pearson</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>1900</year>). <article-title>On the criterion that a given system of deviations from the probable in the case of a correlated system of variables is such that it can be reasonably supposed to have arisen from random sampling</article-title>. <source>London Edinburgh Dublin Philos. Magazine J. Sci.</source> <volume>50</volume>, <fpage>157</fpage>&#x2013;<lpage>175</lpage>. doi: <pub-id pub-id-type="doi">10.1080/14786440009463897</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pedregosa</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Varoquaux</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Gramfort</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Michel</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Thirion</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Grisel</surname> <given-names>O.</given-names>
</name>
<etal/>
</person-group>. (<year>2011</year>). <article-title>Scikit-learn: machine learning in Python</article-title>. <source>J. Mach. Learn. Res.</source> <volume>12</volume>, <fpage>2825</fpage>&#x2013;<lpage>2830</lpage>.</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pellicer</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Saslis-Lagoudakis</surname> <given-names>C. H.</given-names>
</name>
<name>
<surname>Carri&#xf3;</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Ernst</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Garnatje</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Grace</surname> <given-names>O. M.</given-names>
</name>
<etal/>
</person-group>. (<year>2018</year>). <article-title>A phylogenetic road map to antimalarial artemisia species</article-title>. <source>J. Ethnopharmacology</source> <volume>225</volume>, <fpage>1</fpage>&#x2013;<lpage>9</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jep.2018.06.030</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Philippe</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Angenot</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Mol</surname> <given-names>P. D.</given-names>
</name>
<name>
<surname>Goffin</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Hayette</surname> <given-names>M.-P.</given-names>
</name>
<name>
<surname>Tits</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2005</year>). <article-title>
<italic>In vitro</italic> screening of some strychnos species for antiplasmodial activity</article-title>. <source>J. Ethnopharmacology</source> <volume>97</volume>, <fpage>535</fpage>&#x2013;<lpage>539</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jep.2004.12.011</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Poggio</surname> <given-names>L.</given-names>
</name>
<name>
<surname>De Sousa</surname> <given-names>L. M.</given-names>
</name>
<name>
<surname>Batjes</surname> <given-names>N. H.</given-names>
</name>
<name>
<surname>Heuvelink</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Kempen</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Ribeiro</surname> <given-names>E.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>SoilGrids 2.0: producing soil information for the globe with quantified spatial uncertainty</article-title>. <source>Soil</source> <volume>7</volume>, <fpage>217</fpage>&#x2013;<lpage>240</lpage>. doi: <pub-id pub-id-type="doi">10.5194/soil-7-217-2021</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="book">
<person-group person-group-type="author">
<collab>POWO</collab>
</person-group> (<year>2022</year>). <source>Plants of the world online</source> (<publisher-name>Royal Botanic Gardens, Kew</publisher-name>). Available at: <uri xlink:href="http://www.plantsoftheworldonline.org/">http://www.plantsoftheworldonline.org/</uri>.</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<collab>Qinghaosu Antimalaria Coordinating Research group</collab>
</person-group> (<year>1979</year>). <article-title>Antimalaria studies on qinghaosu</article-title>. <source>Chin. Med. J.</source> <volume>92</volume>, <fpage>811</fpage>&#x2013;<lpage>816</lpage>.</citation>
</ref>
<ref id="B62">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Ramalhete</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Lopes</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Mulhovo</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ros&#xe1;rio</surname> <given-names>V. E.</given-names>
</name>
<name>
<surname>Ferreira</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2008</year>). &#x201c;<article-title>Antimalarial activity of some plants traditionally used in Mozambique</article-title>,&#x201d; in <source>Workshop plantas medicinais e fitoterap&#xea;uticas nos tr&#xf3;picos</source>, vol. <volume>29</volume>. (<publisher-name>IICT/CCCM</publisher-name>), <fpage>30</fpage>.</citation>
</ref>
<ref id="B63">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Rasoanaivo</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Deharo</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Ratsimamanga-Urveg</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Frappier</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2004</year>a). &#x201c;<article-title>Guidelines for the nonclinical evaluation of the efficacy of traditional antimalarials</article-title>,&#x201d; in <source>Traditional medicinal plants and malaria</source> (<publisher-loc>Boca Raton</publisher-loc>: <publisher-name>CRC Press</publisher-name>), <fpage>324</fpage>&#x2013;<lpage>341</lpage>.</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rasoanaivo</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Ramanitrahasimbola</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Rafatro</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Rakotondramanana</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Robijaona</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Rakotozafy</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2004</year>b). <article-title>Screening extracts of madagascan plants in search of antiplasmodial compounds: screening extracts of madagascan plants for antiplasmodial compounds</article-title>. <source>Phytotherapy Res.</source> <volume>18</volume>, <fpage>742</fpage>&#x2013;<lpage>747</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/ptr.1533</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>R&#xf8;nsted</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Symonds</surname> <given-names>M. R. E.</given-names>
</name>
<name>
<surname>Birkholm</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Christensen</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Meerow</surname> <given-names>A. W.</given-names>
</name>
<name>
<surname>Molander</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>Can phylogeny predict chemical diversity and potential medicinal activity of plants? a case study of amaryllidaceae</article-title>. <source>BMC Evolutionary Biol.</source> <volume>12</volume>, <elocation-id>182</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/1471-2148-12-182</pub-id>
</citation>
</ref>
<ref id="B66">
<citation citation-type="book">
<person-group person-group-type="author">
<collab>Royal Botanic Gardens, Kew</collab>
</person-group> (<year>2021</year>). <source>LitTox database</source> (<publisher-loc>London</publisher-loc>: <publisher-name>Royal Botanic Gardens, Kew</publisher-name>).</citation>
</ref>
<ref id="B67">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Satish</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Kumari</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Sunita</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Antiplasmodial efficacy of calotropis gigantea (L.) against plasmodium falciparum (3D7 strain) and plasmodium berghei (ANKA)</article-title>. <source>J. Vector Borne Dis.</source> <volume>54</volume>, <fpage>215</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.4103/0972-9062.217612</pub-id>
</citation>
</ref>
<ref id="B68">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Silvestro</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Andermann</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Prior choice affects ability of Bayesian neural networks to identify unknowns</article-title>. <source>ArXiv</source>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2005.04987</pub-id>
</citation>
</ref>
<ref id="B69">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Singh</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Kaushik</surname> <given-names>N. K.</given-names>
</name>
<name>
<surname>Mohanakrishnan</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Tiwari</surname> <given-names>S. K.</given-names>
</name>
<name>
<surname>Sahal</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Antiplasmodial activity of medicinal plants from chhotanagpur plateau, jharkhand, India</article-title>. <source>J. Ethnopharmacology</source> <volume>165</volume>, <fpage>152</fpage>&#x2013;<lpage>162</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jep.2015.02.038</pub-id>
</citation>
</ref>
<ref id="B70">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Smirnov</surname> <given-names>N. V.</given-names>
</name>
</person-group> (<year>1939</year>). <article-title>Estimate of deviation between empirical distribution functions in two independent samples</article-title>. <source>Bull. Moscow Univ.</source> <volume>2</volume>, <fpage>3</fpage>&#x2013;<lpage>16</lpage>.</citation>
</ref>
<ref id="B71">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Solis</surname> <given-names>P. N.</given-names>
</name>
<name>
<surname>Lang&#x2019;at</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Gupta</surname> <given-names>M. P.</given-names>
</name>
<name>
<surname>Kirby</surname> <given-names>G. C.</given-names>
</name>
<name>
<surname>Warhurst</surname> <given-names>D. C.</given-names>
</name>
<name>
<surname>Phillipson</surname> <given-names>J. D.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Bio-active compounds from psychotria camponutans</article-title>. <source>Planta Med.</source> <volume>61</volume>, <fpage>62</fpage>&#x2013;<lpage>65</lpage>. doi: <pub-id pub-id-type="doi">10.1055/s-2006-958001</pub-id>
</citation>
</ref>
<ref id="B72">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Soto-Sobenis</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Castillo</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Delgado</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Gonz&#xe1;lez</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Montenegro</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Alkaloid screening of herbarium samples of rubiaceae from Panama</article-title>. <source>Pharm. Biol.</source> <volume>39</volume>, <fpage>161</fpage>&#x2013;<lpage>169</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1076/phbi.39.3.161.5925</pub-id>
</citation>
</ref>
<ref id="B73">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Suksamrarn</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Tanachatchairatana</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Kanokmedhakul</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Antiplasmodial triterpenes from twigs of gardenia saxatilis</article-title>. <source>J. Ethnopharmacology</source> <volume>88</volume>, <fpage>275</fpage>&#x2013;<lpage>277</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S0378-8741(03)00261-7</pub-id>
</citation>
</ref>
<ref id="B74">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Taek</surname> <given-names>M. M.</given-names>
</name>
<name>
<surname>Tukan</surname> <given-names>G. D.</given-names>
</name>
<name>
<surname>Prajogo</surname> <given-names>B. E. W.</given-names>
</name>
<name>
<surname>Agil</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Antiplasmodial activity and phytochemical constituents of selected antimalarial plants used by native people in West timor Indonesia</article-title>. <source>Turkish J. Pharm. Sci.</source> <volume>18</volume>, <fpage>80</fpage>&#x2013;<lpage>90</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.4274/tjps.galenos.2019.29000</pub-id>
</citation>
</ref>
<ref id="B75">
<citation citation-type="web">
<person-group person-group-type="author">
<collab>The World Bank</collab>
</person-group> (<year>2022</year>) <source>World development indicators</source>. Available at: <uri xlink:href="https://datacatalog.worldbank.org/search/dataset/0037712">https://datacatalog.worldbank.org/search/dataset/0037712</uri> (Accessed <access-date>2022-05-03</access-date>).</citation>
</ref>
<ref id="B76">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tomlinson</surname> <given-names>M. L.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Barclay</surname> <given-names>E. J.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Felix</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Diterpenoids from scutellaria barbata induce tumour-selective cytotoxicity by taking the brakes off apoptosis</article-title>. <source>Medicinal Plant Biol.</source> <volume>1</volume>, <fpage>1</fpage>&#x2013;<lpage>16</lpage>. doi: <pub-id pub-id-type="doi">10.48130/MPB-2022-0003</pub-id>
</citation>
</ref>
<ref id="B77">
<citation citation-type="web">
<person-group person-group-type="author">
<collab>USDA</collab>
</person-group> (<year>2022</year>a) <source>Dr. duke&#x2019;s phytochemical and ethnobotanical databases</source> (Accessed <access-date>2022-02-22</access-date>).</citation>
</ref>
<ref id="B78">
<citation citation-type="web">
<person-group person-group-type="author">
<collab>USDA</collab>
</person-group> (<year>2022</year>b) <source>The PLANTS database</source>. Available at: <uri xlink:href="http://plants.usda.gov">http://plants.usda.gov</uri> (Accessed <access-date>2022-01-10</access-date>).</citation>
</ref>
<ref id="B79">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Uwimana</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Legrand</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Stokes</surname> <given-names>B. H.</given-names>
</name>
<name>
<surname>Ndikumana</surname> <given-names>J.-L. M.</given-names>
</name>
<name>
<surname>Warsame</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Umulisa</surname> <given-names>N.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Emergence and clonal expansion of <italic>in vitro</italic> artemisinin-resistant plasmodium falciparum kelch13 R561H mutant parasites in Rwanda</article-title>. <source>Nat. Med.</source> <volume>26</volume>, <fpage>1602</fpage>&#x2013;<lpage>1608</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41591-020-1005-2</pub-id>
</citation>
</ref>
<ref id="B80">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Visscher</surname> <given-names>A. M.</given-names>
</name>
<name>
<surname>Vandelook</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Fern&#xe1;ndez-Pascual</surname> <given-names>E.</given-names>
</name>
<name>
<surname>P&#xe9;rez-Mart&#xed;nez</surname> <given-names>L. V.</given-names>
</name>
<name>
<surname>Ulian</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Diazgranados</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Low availability of functional seed trait data from the tropics could negatively affect global macroecological studies, predictive models and plant conservation</article-title>. <source>Ann Bot</source> <volume>130</volume> (<issue>6</issue>), <fpage>773</fpage>&#x2013;784. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/aob/mcac130</pub-id>
</citation>
</ref>
<ref id="B81">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Weenen</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Nkunya</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Bray</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Mwasumbi</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Kinabo</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Kilimali</surname> <given-names>V.</given-names>
</name>
</person-group> (<year>1990</year>). <article-title>Antimalarial activity of Tanzanian medicinal plants</article-title>. <source>Planta Medica</source> <volume>56</volume>, <fpage>368</fpage>&#x2013;<lpage>370</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1055/s-2006-960984</pub-id>
</citation>
</ref>
<ref id="B82">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Weniger</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Robledo</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Arango</surname> <given-names>G. J.</given-names>
</name>
<name>
<surname>Deharo</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Arag&#xf3;n</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Mu&#xf1;oz</surname> <given-names>V.</given-names>
</name>
<etal/>
</person-group>. (<year>2001</year>). <article-title>Antiprotozoal activities of Colombian plants</article-title>. <source>J. Ethnopharmacology</source> <volume>78</volume>, <fpage>193</fpage>&#x2013;<lpage>200</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S0378-8741(01)00346-4</pub-id>
</citation>
</ref>
<ref id="B83">
<citation citation-type="book">
<person-group person-group-type="author">
<collab>WHO</collab>
</person-group> (<year>2008</year>). <source>World malaria report 2008</source> (<publisher-loc>Geneva</publisher-loc>: <publisher-name>World Health Organization</publisher-name>).</citation>
</ref>
<ref id="B84">
<citation citation-type="book">
<person-group person-group-type="author">
<collab>WHO</collab>
</person-group> (<year>2017</year>). <source>A framework for malaria elimination</source> (<publisher-loc>Geneva</publisher-loc>: <publisher-name>World Health Organization</publisher-name>).</citation>
</ref>
<ref id="B85">
<citation citation-type="book">
<person-group person-group-type="author">
<collab>WHO</collab>
</person-group>. (<year>2021</year>). <source>World health organization model list of essential medicines: 22nd list. tech. rep</source> (<publisher-loc>Geneva</publisher-loc>: <publisher-name>World Health Organization</publisher-name>).</citation>
</ref>
<ref id="B86">
<citation citation-type="web">
<person-group person-group-type="author">
<collab>WHO</collab>
</person-group> (<year>2022</year>a) <source>Global health observatory: number of indigenous malaria cases</source> (<publisher-name>World Health Organization</publisher-name>). Available at: <uri xlink:href="https://www.who.int/data/gho/data/indicators/indicator-details/GHO/number-of-indigenous-malaria-cases">https://www.who.int/data/gho/data/indicators/indicator-details/GHO/number-of-indigenous-malaria-cases</uri> (Accessed <access-date>2022-09-14</access-date>).</citation>
</ref>
<ref id="B87">
<citation citation-type="book">
<person-group person-group-type="author">
<collab>WHO</collab>
</person-group> (<year>2022</year>b). <source>World malaria report 2022</source> (<publisher-loc>Geneva</publisher-loc>: <publisher-name>World Health Organization</publisher-name>).</citation>
</ref>
<ref id="B88">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wong</surname> <given-names>S. K.</given-names>
</name>
<name>
<surname>Lim</surname> <given-names>Y. Y.</given-names>
</name>
<name>
<surname>Abdullah</surname> <given-names>N. R.</given-names>
</name>
<name>
<surname>Nordin</surname> <given-names>F. J.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Assessment of antiproliferative and antiplasmodial activities of five selected apocynaceae species</article-title>. <source>BMC Complementary Altern. Med.</source> <volume>11</volume>, <elocation-id>3</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/1472-6882-11-3</pub-id>
</citation>
</ref>
<ref id="B89">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wright</surname> <given-names>C. W.</given-names>
</name>
<name>
<surname>Allen</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Cai</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Phillipson</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Said</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Kirby</surname> <given-names>G.</given-names>
</name>
<etal/>
</person-group>. (<year>1992</year>). <article-title>
<italic>In vitro</italic> antiamoebic and antiplasmodial activities of alkaloids isolated from alstonia angustifolia roots</article-title>. <source>Phytotherapy Res.</source> <volume>6</volume>, <fpage>121</fpage>&#x2013;<lpage>124</lpage>. doi: <pub-id pub-id-type="doi">10.1002/ptr.2650060303</pub-id>
</citation>
</ref>
<ref id="B90">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zadrozny</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2004</year>). &#x201c;<article-title>Learning and evaluating classifiers under sample selection bias</article-title>,&#x201d; in <source>Twenty-first international conference on machine learning - ICML &#x2018;04</source> (<publisher-loc>Banff, Alberta, Canada</publisher-loc>: <publisher-name>ACM Press</publisher-name>), <fpage>114</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1145/1015330.1015425</pub-id>
</citation>
</ref>
<ref id="B91">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zizka</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Silvestro</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Andermann</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Azevedo</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Duarte Ritter</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Edler</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>CoordinateCleaner: standardized cleaning of occurrence records from biological collection databases</article-title>. <source>Methods Ecol. Evol.</source> <volume>10</volume>, <fpage>744</fpage>&#x2013;<lpage>751</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/2041-210X.13152</pub-id>
</citation>
</ref>
<ref id="B92">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zu</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Koch</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Schwery</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Pironon</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Phillips</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Ondo</surname> <given-names>I.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Pollen sterols are associated with phylogeny and environment but not with pollinator guilds</article-title>. <source>New Phytol.</source> <volume>230</volume>, <fpage>1169</fpage>&#x2013;<lpage>1184</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/nph.17227</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>