<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="brief-report" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Ecol. Evol.</journal-id>
<journal-title>Frontiers in Ecology and Evolution</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Ecol. Evol.</abbrev-journal-title>
<issn pub-type="epub">2296-701X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fevo.2023.1270857</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Ecology and Evolution</subject>
<subj-group>
<subject>Brief Research Report</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Surveying wildlife and livestock in Uganda with aerial cameras: Deep Learning reduces the workload of human interpretation by over 70%</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Delplanque</surname>
<given-names>Alexandre</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2381395"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Lamprey</surname>
<given-names>Richard</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2517883"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Foucher</surname>
<given-names>Samuel</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/634255"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Th&#xe9;au</surname>
<given-names>J&#xe9;r&#xf4;me</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/717208"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lejeune</surname>
<given-names>Philippe</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2571755/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>TERRA Teaching and Research Centre &#x2013; Forest Is Life, Gembloux Agro-Bio Tech, University of Li&#xe8;ge (ULi&#xe8;ge)</institution>, <addr-line>Gembloux</addr-line>, <country>Belgium</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Natural Resources, Faculty of Geo-Information Science and Earth Observation (ITC), University of Twente</institution>, <addr-line>Enschede</addr-line>, <country>Netherlands</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Applied Geomatics, Universit&#xe9; de Sherbrooke</institution>, <addr-line>Sherbrooke, QC</addr-line>, <country>Canada</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Quebec Centre for Biodiversity Science (QCBS), Stewart Biology, McGill University</institution>, <addr-line>Montr&#xe9;al Qu&#xe9;bec, QC</addr-line>, <country>Canada</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Erhan Sener, S&#xfc;leyman Demirel University, T&#xfc;rkiye</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Peter John Sabine Fleming, NSW Government, Australia; Benjamin Kellenberger, Yale University, United States</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Alexandre Delplanque, <email xlink:href="mailto:alexandre.delplanque@uliege.be">alexandre.delplanque@uliege.be</email>; Richard Lamprey, <email xlink:href="mailto:lamprey.richard@gmail.com">lamprey.richard@gmail.com</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>17</day>
<month>11</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>11</volume>
<elocation-id>1270857</elocation-id>
<history>
<date date-type="received">
<day>01</day>
<month>08</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>31</day>
<month>10</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Delplanque, Lamprey, Foucher, Th&#xe9;au and Lejeune</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Delplanque, Lamprey, Foucher, Th&#xe9;au and Lejeune</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>As the need to accurately monitor key-species populations grows amid increasing pressures on global biodiversity, the counting of large mammals in savannas has traditionally relied on the Systematic-Reconnaissance-Flight (SRF) technique using light aircrafts and human observers. However, this method has limitations, including non-systematic human errors. In recent years, the Oblique-Camera-Count (OCC) approach developed in East Africa has utilized cameras to capture high-resolution imagery replicating aircraft observers&#x2019; oblique view. Whilst demonstrating that human observers have missed many animals, OCC relies on labor-intensive human interpretation of thousands of images. This study explores the potential of Deep Learning (DL) to reduce the interpretation workload associated with OCC surveys. Using oblique aerial imagery of 2.1 hectares footprint collected during an SRF-OCC survey of Queen Elizabeth Protected Area in Uganda, a DL model (HerdNet) was trained and evaluated to detect and count 12 wildlife and livestock mammal species. The model&#x2019;s performance was assessed both at the animal instance-based and image-based levels, achieving accurate detection performance (F1 score of 85%) in positive images (i.e. containing animals) and reducing manual interpretation workload by 74% on a realistic dataset showing less than 10% of positive images. However, it struggled to differentiate visually related species and overestimated animal counts due to false positives generated by landscape items resembling animals. These challenges may be addressed through improved training and verification processes. The results highlight DL&#x2019;s potential to semi-automate processing of aerial survey wildlife imagery, reducing manual interpretation burden. By incorporating DL models into existing counting standards, future surveys may increase sampling efforts, improve accuracy, and enhance aerial survey safety.</p>
</abstract>
<kwd-group>
<kwd>wildlife</kwd>
<kwd>aerial survey</kwd>
<kwd>Deep Learning</kwd>
<kwd>remote sensing</kwd>
<kwd>convolutional neural networks</kwd>
<kwd>animal conservation</kwd>
<kwd>livestock</kwd>
<kwd>object detection</kwd>
</kwd-group>
<counts>
<fig-count count="2"/>
<table-count count="2"/>
<equation-count count="3"/>
<ref-count count="45"/>
<page-count count="9"/>
<word-count count="4741"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Environmental Informatics and Remote Sensing</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>As pressures on biodiversity increase across the globe, accurately determining key-species populations is seen as critical in the &#x2018;Essential Biodiversity Variables&#x2019; (EBV) approach to monitoring ecosystem health (<xref ref-type="bibr" rid="B2">Brummitt et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B20">Jetz et&#xa0;al., 2019</xref>). For over 60 years, the counting of large wildlife species in the expansive savannas of eastern and southern Africa has been addressed using light aircrafts and human counting crews (<xref ref-type="bibr" rid="B17">Gwynne and Croze, 1975</xref>; <xref ref-type="bibr" rid="B30">Norton-Griffiths, 1978</xref>; <xref ref-type="bibr" rid="B18">Jachmann, 2001</xref>). The &#x2018;Systematic Reconnaissance Flight&#x2019; (SRF) technique involves flying an aircraft at low altitude along transects, whilst Rear-Seat-Observers (RSOs) count animals to left and right in strips of terrain defined by markers on the aircraft (<xref ref-type="bibr" rid="B5">Caughley, 1977</xref>; <xref ref-type="bibr" rid="B30">Norton-Griffiths, 1978</xref>; <xref ref-type="bibr" rid="B16">Grimsdell and Westley, 1981</xref>; <xref ref-type="bibr" rid="B40">Stelfox and Peden, 1981</xref>). The transects are the sample units, and analysis to derive estimates and margins of error is conducted using the Jolly II Method (<xref ref-type="bibr" rid="B21">Jolly, 1969</xref>; <xref ref-type="bibr" rid="B5">Caughley, 1977</xref>).</p>
<p>SRF &#x2018;counting standards&#x2019; have been adopted by many eastern and southern African countries to ensure that data meet minimum quality requirements for national and continental-wide trend-analysis of critical flagship such as elephants (<xref ref-type="bibr" rid="B30">Norton-Griffiths, 1978</xref>; <xref ref-type="bibr" rid="B8">Craig, 2012</xref>; <xref ref-type="bibr" rid="B34">PAEAS, 2014</xref>; <xref ref-type="bibr" rid="B7">CITES-MIKE, 2019</xref>). These standards define <italic>inter alia</italic> the flying heights and strip-widths for counting, the sampling intensities that should be used, the length of time that RSOs should count animals before rest-breaks, the recording methods and the statistical analysis techniques used. Although these standards can ensure that important technical criteria are met, they cannot account for all human counting bias. Observers may miss cryptic animals, become overstretched when faced with large herds or multi-species groups, and lose concentration in long hot, turbulent flights over monotonous landscapes (<xref ref-type="bibr" rid="B4">Caughley, 1974</xref>; <xref ref-type="bibr" rid="B19">Jachmann, 2002</xref>; <xref ref-type="bibr" rid="B15">Fleming and Tracey, 2008</xref>; <xref ref-type="bibr" rid="B38">Schlossberg et&#xa0;al., 2016</xref>). In regard to detection, they have very little time to search and record animals; as the aircraft moves at a ground-speed of 170&#x2013;180 km.hr<sup>&#x2212;1</sup> along the transect, the RSO can hold any particular feature in view for 5&#x2013;7 seconds (<xref ref-type="bibr" rid="B15">Fleming and Tracey, 2008</xref>). For this reason, an optimum RSO strip width of 150 m on each side of the aircraft was derived from experimental studies in the 1970s, and this metric was subsequently embedded within counting standards (<xref ref-type="bibr" rid="B36">Pennycuick and Western, 1969</xref>; <xref ref-type="bibr" rid="B6">Caughley and Goddard, 1975</xref>; <xref ref-type="bibr" rid="B30">Norton-Griffiths, 1978</xref>; <xref ref-type="bibr" rid="B40">Stelfox and Peden, 1981</xref>; <xref ref-type="bibr" rid="B33">Ottichilo and Khaemba, 2001</xref>).</p>
<p>Despite the long-recognized constraints of RSO-viewing, consistency of method over decades is seen as key in determining trends (<xref ref-type="bibr" rid="B31">Ogutu et&#xa0;al., 2016</xref>). Therefore, advances in methods will need to be made incrementally to ensure harmonization with previous surveys. A recent SRF advance in East Africa, known as the &#x2018;Oblique-Camera-Count&#x2019; (OCC), uses digital cameras to record the counting strips to left and right of the aircraft (<xref ref-type="bibr" rid="B26">Lamprey et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B25">Lamprey et&#xa0;al., 2020</xref>). This replicates the oblique view of the RSOs where animals can be detected under tree canopies. With OCC the observers are not in the aircraft but in the laboratory, and their job is to interpret the many thousands of images obtained in a flight mission.</p>    <p>In recent years, multiple RSO-OCC comparisons have been conducted. <xref ref-type="bibr" rid="B1">Br&#xf6;ker et&#xa0;al. (2019)</xref> showed that the abundance estimate of narwhal in Greenland (<italic>Monodon monoceros</italic>) based on oblique-imaging was not significantly different from RSO one. However, <xref ref-type="bibr" rid="B28">Lethbridge et&#xa0;al. (2019)</xref> found 30% higher oblique-imaging estimates than RSO ones when surveying Kangaroos in Australia. OCC counts in Kenya and Uganda over the last decade revealed that RSOs had been missing up to 70% of large mammal species, including key cryptic species such as giraffe (<xref ref-type="bibr" rid="B26">Lamprey et&#xa0;al., 2019</xref>). Estimates for smaller animals were greatly increased. In Murchison Falls National Park in Uganda for example, an RSO-based survey estimated 600 oribi (<italic>Ourebia ourebi</italic> ssp. <italic>cottoni</italic>), whilst an OCC survey the following year estimated 12,000 (<xref ref-type="bibr" rid="B25">Lamprey et&#xa0;al., 2020</xref>). Thus the use of cameras is important in resetting baseline population estimates.</p>
<p>The primary advantage of camera-based counts is that time can be spent in the lab to carefully study each image for animals, and that interpreters can cross-check scenes for verification. Conversely, the primary constraint of aerial imaging methods is that thousands of images are acquired that need to be visually interpreted. This is a time-consuming and costly exercise. For example, a standard counting flight transect, involving just 30 minutes of RSO time for detection and recording, would obtain 900 OCC images taken each side of the aircraft. These images will take 4 days to interpret by two interpreters (left and right cameras). It is therefore not surprising that conservation agencies balk at the time and labour costs of OCC counts and other imaging exercises (<xref ref-type="bibr" rid="B1">Br&#xf6;ker et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B35">Peng et&#xa0;al., 2020</xref>).</p>
<p>Another limitation of the OCC approach is that a very high percentage of aerial images will have no animals. In the arid Tsavo NP in Kenya for example, just 2% of the 160,000 images acquired had animals present (<xref ref-type="bibr" rid="B26">Lamprey et&#xa0;al., 2019</xref>). In Uganda&#x2019;s sub-humid national parks with higher density of wildlife, some 10% of images are positive (<xref ref-type="bibr" rid="B25">Lamprey et&#xa0;al., 2020</xref>). In general, therefore, over 90% of the time of OCC image interpretation is spent on True Negative (TN) images &#x2013; images with no animals &#x2013; and if these can be identified and eliminated then there can be significant reductions in human labor.</p>
<p>The next incremental step up from RSO to image-based counting is therefore to accelerate the detection of animals on images. Deep Learning (DL) offers this possibility (<xref ref-type="bibr" rid="B44">Tuia et&#xa0;al., 2022</xref>). DL is a subgroup of artificial intelligence approach regrouping machine learning methods based on artificial neural networks, capable of learning and integrating multi-level representation from large datasets (<xref ref-type="bibr" rid="B27">LeCun et&#xa0;al., 2015</xref>). Significant progress has already been made in identifying a range of key species in Africa using DL-based object detectors and aerial imagery (<xref ref-type="bibr" rid="B22">Kellenberger et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B14">Eikelboom et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B29">Naud&#xe9; and Joubert, 2019</xref>; <xref ref-type="bibr" rid="B43">Torney et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B9">Delplanque et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B11">Delplanque et&#xa0;al., 2023</xref>). However, DL models produced biased counts because of their current high false positive rate, usually generated by animal-look-alike background objects. Thus, detections still need to be reviewed by humans. Furthermore, the field of animal detection in oblique aerial imagery is not yet as well developed as that of camera traps, where models trained on large and varied datasets are available for image (pre-)processing (<xref ref-type="bibr" rid="B41">Tabak et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B39">Shepley et&#xa0;al., 2021</xref>). At the moment, it is therefore often necessary to develop one&#x2019;s own model for application in a given protected area.</p>
<p>Being aware that current DL models need humans for prediction verification, we conducted a study to determine the potential of DL for reducing the interpretation workload of OCC surveys. We asked two specific questions:</p>
<list list-type="simple">
<list-item>
<p>(1) When the model detects animals in an image that we know are present, how well does it locate, count and identify them?</p>
</list-item>
<list-item>
<p>(2) For a &#x2018;practical&#x2019; evaluation to reduce interpretation, can the model discriminate correctly the images which do not contain animals?</p>
</list-item>
</list>
</sec>
<sec id="s2">
<label>2</label>
<title>Methods</title>
<p>We trained a DL model using annotations of a sample of images obtained in an SRF-OCC survey of Queen Elizabeth Protected Area in Uganda. These images had been previously visually interpreted to count animals, with the counts entered into a meta-database. An image could contain nothing and be a TN, or it could be a True Positive (TP) image with (for example) a single warthog, and/or 20 elephants and/or 100 Uganda kob. Having trained the DL model on a range of species from the annotated samples, we then tested the model on a realistic dataset, i.e. visually interpreted images that had not been used in the DL training, which contains both positive and negative images.</p>
<sec id="s2_1">
<label>2.1</label>
<title>Study area and dataset</title>
<p>The study area is the Queen Elizabeth Protected Area (QEPA) located in southwestern Uganda. The census zone included the Queen Elizabeth National Park and the contiguous Kyambura and Kigezi Wildlife Reserves, covering 2,560 km&#xb2; of bushed grassland, thicket, open woodlands and forest. Our study is based on aerial imagery acquired for a previous study of wildlife populations of QEPA, conducted in 2018. Only the information necessary for the understanding of the present paper is provided here, for more details the reader is referred to the study of <xref ref-type="bibr" rid="B24">Lamprey et&#xa0;al. (2023)</xref>.</p>
<p>High-resolution images were acquired using two 24-megapixel Nikon DSLR cameras obliquely mounted at 45&#xb0; through a camera hatch of a Cessna 182 aircraft. At 600 ft (183 m) above ground level coupled with an aircraft ground speed of 105 knots (194 km.hr<sup>&#x2212;1</sup>), a 2 second timing interval on cameras provided a continuous sample-strip of 150 m width on the ground (&#x2018;strip-width&#x2019;) with a 40% overlap between sequential images and frame footprint of 2.1 hectares. The cameras generated sequentially numbered images, stored in incremental folders on the camera cards. Flight transects were spaced at 1 km intervals and a total of 37,000 images were collected with Ground-Sampling Distance (GSD) 2.4 cm at the inner edge and 5.0 cm at the outer edge. These were manually interpreted by a team of four Ugandan interpreters during a six-week period. For each image, species name and numbers were recorded into a data spreadsheet. Where large herds spanned overlapping images, animals in the overlap area were counted into Even-Number Images (ENIs), while animals were counted in the center portion of Odd-Number Images (ONIs) to avoid any possibility of double counting. Therefore, ENIs contained total counts while ONIs contained partial counts (i.e. only the animals within the gaps between ENIs).</p>
<p>From the manual photo-interpretation, 12 wildlife and livestock species were detected: elephant (<italic>Loxodonta africana</italic>), buffalo (<italic>Syncerus caffer</italic>), topi (<italic>Damaliscus lunatus</italic> ssp. <italic>jimela</italic>), Uganda kob (<italic>Kobus kob</italic> ssp. <italic>thomasi</italic>), waterbuck (<italic>Kobus ellipsiprymnus</italic> ssp. <italic>defassa</italic>), warthog (<italic>Phacochoerus africanus</italic> ssp. <italic>massaicus</italic>), giant forest hog (<italic>Hylochoerus meinertzhageni</italic>), hippopotamus (<italic>Hippopotamus amphibius</italic>), crocodile (<italic>Crocodylus niloticus</italic>), cow (<italic>Bos taurus</italic>), sheep (<italic>Ovis aries</italic>) and goat (<italic>Capra hircus</italic>). Since the management of double counting is beyond the scope of this paper, only ENIs were selected. From all ENIs (18,833), approximately 70% (12,806) were randomly selected for creating annotations, used for training, validation and animal instance-based testing of the DL model, keeping the remaining 30% (6,027) for image-based model testing. Therefore two test sets were established to answer the 2 research questions: 1) the &#x2018;animal instance-based&#x2019; test set, where the annotated points are the ground truth; it was used to answer the first question, and 2) the &#x2018;image-based&#x2019; test set, containing less than 10% of positive images and more than 90% of negative images, where the species counts are the ground truth. This second test set served as a case study and was used to answer the second question.</p>
<p>The animal instance-based dataset was initially annotated as bounding boxes by a team of 4 experienced Ugandan interpreters, using VGG Image Annotator (<xref ref-type="bibr" rid="B12">Dutta and Zisserman, 2019</xref>). However, since point annotation has emerged as a faster and better alternative for the detection of animals with DL-based object detectors (<xref ref-type="bibr" rid="B10">Delplanque et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B11">Delplanque et&#xa0;al., 2023</xref>), pseudo-points were created by selecting the center of the bounding boxes. These pseudo-points were finally reviewed by an experienced annotator to obtain body-centered points, as the camera&#x2019;s viewing angle, animal pose or tightness of bounding box drawn may result in a point being outside the animal&#x2019;s body. This has been done using Label Studio software (<xref ref-type="bibr" rid="B42">Tkachenko et&#xa0;al., 2021</xref>). The images and points of the animal instance-based dataset were randomly split into training, validation and testing sets following a common allocation of 70%&#x2013;10%&#x2013;20% respectively, while taking the species numbers distribution into account (<xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>). Sheep and goat were amalgamated as a single class due to their great similarity in shape and color given the image resolution.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Details of the dataset split.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="bottom" rowspan="2" align="left">Number of</th>
<th valign="top" colspan="4" align="right">Animal instance-based dataset</th>
<th valign="top" align="right">Image-based dataset</th>
<th valign="bottom" rowspan="2" align="right">Probability of occurrence</th>
</tr>    <tr>
<th valign="top" align="right">Training</th>
<th valign="top" align="right">Validation</th>
<th valign="top" align="right">Test</th>
<th valign="top" align="right">Total</th>
<th valign="top" align="right">Test</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Elephant</td>
<td valign="top" align="right">406</td>
<td valign="top" align="right">58</td>
<td valign="top" align="right">116</td>
<td valign="top" align="right">580</td>
<td valign="top" align="right">299</td>
<td valign="top" align="right">7.6%</td>
</tr>
<tr>
<td valign="top" align="left">Buffalo</td>
<td valign="top" align="right">1,258</td>
<td valign="top" align="right">180</td>
<td valign="top" align="right">359</td>
<td valign="top" align="right">1,797</td>
<td valign="top" align="right">858</td>
<td valign="top" align="right">23.0%</td>
</tr>
<tr>
<td valign="top" align="left">Topi</td>
<td valign="top" align="right">172</td>
<td valign="top" align="right">10</td>
<td valign="top" align="right">43</td>
<td valign="top" align="right">225</td>
<td valign="top" align="right">118</td>
<td valign="top" align="right">3.0%</td>
</tr>
<tr>
<td valign="top" align="left">Kob</td>
<td valign="top" align="right">1,526</td>
<td valign="top" align="right">218</td>
<td valign="top" align="right">436</td>
<td valign="top" align="right">2,180</td>
<td valign="top" align="right">1,137</td>
<td valign="top" align="right">28.8%</td>
</tr>
<tr>
<td valign="top" align="left">Waterbuck</td>
<td valign="top" align="right">504</td>
<td valign="top" align="right">72</td>
<td valign="top" align="right">143</td>
<td valign="top" align="right">719</td>
<td valign="top" align="right">335</td>
<td valign="top" align="right">9.1%</td>
</tr>
<tr>
<td valign="top" align="left">Warthog</td>
<td valign="top" align="right">196</td>
<td valign="top" align="right">28</td>
<td valign="top" align="right">56</td>
<td valign="top" align="right">280</td>
<td valign="top" align="right">172</td>
<td valign="top" align="right">3.9%</td>
</tr>
<tr>
<td valign="top" align="left">Giant Forest Hog</td>
<td valign="top" align="right">27</td>
<td valign="top" align="right">5</td>
<td valign="top" align="right">8</td>
<td valign="top" align="right">40</td>
<td valign="top" align="right">25</td>
<td valign="top" align="right">0.6%</td>
</tr>
<tr>
<td valign="top" align="left">Hippopotamus</td>
<td valign="top" align="right">497</td>
<td valign="top" align="right">71</td>
<td valign="top" align="right">142</td>
<td valign="top" align="right">710</td>
<td valign="top" align="right">351</td>
<td valign="top" align="right">9.2%</td>
</tr>
<tr>
<td valign="top" align="left">Crocodile</td>
<td valign="top" align="right">14</td>
<td valign="top" align="right">2</td>
<td valign="top" align="right">4</td>
<td valign="top" align="right">20</td>
<td valign="top" align="right">16</td>
<td valign="top" align="right">0.3%</td>
</tr>
<tr>
<td valign="top" align="left">Cow</td>
<td valign="top" align="right">376</td>
<td valign="top" align="right">38</td>
<td valign="top" align="right">227</td>
<td valign="top" align="right">641</td>
<td valign="top" align="right">441</td>
<td valign="top" align="right">9.4%</td>
</tr>
<tr>
<td valign="top" align="left">Sheep/Goat</td>
<td valign="top" align="right">353</td>
<td valign="top" align="right">51</td>
<td valign="top" align="right">100</td>
<td valign="top" align="right">504</td>
<td valign="top" align="right">81</td>
<td valign="top" align="right">5.1%</td>
</tr>
<tr>
<td valign="top" align="left">24MP<sup>1</sup> positive images</td>
<td valign="top" align="right">717</td>
<td valign="top" align="right">95</td>
<td valign="top" align="right">200</td>
<td valign="top" align="right">1,012</td>
<td valign="top" align="right">494</td>
<td valign="top" align="right">&#x2013;</td>
</tr>
<tr>
<td valign="top" align="left">24MP<sup>1</sup> negative images</td>
<td valign="top" align="right">0</td>
<td valign="top" align="right">0</td>
<td valign="top" align="right">0</td>
<td valign="top" align="right">11,778</td>
<td valign="top" align="right">5,533</td>
<td valign="top" align="right">&#x2013;</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<sup>1</sup>MP, Megapixel.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Deep Learning model</title>
<p>Given its better performances in detecting and counting animals in oblique aerial imagery compared to common DL models, HerdNet (<xref ref-type="bibr" rid="B11">Delplanque et&#xa0;al., 2023</xref>) was chosen to process the dataset. Briefly, HerdNet is a single-stage point-based CNN consisting of two heads, one dedicated to the accurate localization of animals in the image (i.e., points), and the other to their classification, both trained in a pixel-wise manner using the Focal and the Cross-Entropy losses respectively. The training scheme was the same as that presented in <xref ref-type="bibr" rid="B11">Delplanque et&#xa0;al. (2023)</xref> and consisted of two steps: 1) training the architecture using positive patches only, and 2) harvesting and including Hard Negative Patches (HNPs) to further train the model in order to reduce the number of false positives. The patch size was set to 1,024 &#xd7; 1,024 pixels and following original paper values and early ablation studies, the hyperparameters were set as follows: the learning rate to 10<sup>&#x2212;5</sup>, the batch size to 2 and the number of epochs to 100. Horizontal flipping was used for data augmentation, using a 50% probability of occurrence and the Adam optimizer was used for neural network&#x2019;s parameters optimization. During testing, points were obtained by extracting local maxima from the pixel map produced by the localization head, in which a pixel value close to 1 indicates the presence of an animal. Each point was then used to pin the classification maps and obtain the associated class and confidence score. An image was considered as negative if the maximum pixel value of the localization map did not exceed 0.1. Each full-resolution test image was scanned in a moving-window fashion with a patch overlap was set to 256 pixels. A radial distance threshold of 20 pixels was used to compare ground truths and detections during animal instance-based evaluation. Finally, only detections with confidence score above 50% were retained for image-based evaluation. For more details, the reader is referred to the reference paper. Operations were performed on a Windows-10 workstation using a 64 GB AMD Ryzen 9 5900X central processing unit (CPU) and an 8 GB NVIDIA GeForce RTX 3070 graphics processing unit (GPU).</p>
<p>HerdNet was evaluated in two ways: 1) The &#x2018;standard&#x2019; machine learning way, by calculating common detection metrics on the animal instance-based test set, containing positive images only; and 2) The &#x2018;practical&#x2019; way, by running the model on unseen images of the image-based test set, containing both negative and positive images, and comparing the DL model&#x2019;s counts with interpreters&#x2019; visual counts. Recall, precision, and F1 score were calculated for each species on the animal instance-based test set for the standard evaluation:</p>
<disp-formula>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where #TP, #FN, and #FP are the number of true positives (i.e., exact detection and identification), false negatives (i.e., missed animals) and false positives (i.e., wrong detections) respectively.</p>
<p>Recall, also referred to as &#x2018;true positive rate&#x2019;, measures the proportion of animals correctly detected and identified by the model, while precision measures the proportion of true animals among all detections. The F1 score is the harmonic mean of these two metrics and is higher when recall and precision are balanced.</p>
<p>Concerning the practical evaluation on the image-based test set, only counting comparisons were made as no annotated points were available for calculating the above metrics. The true counting rate, representing the proportion of the human count found by the DL model, and the counting precision, representing the ratio of human count by DL model count, were calculated for each species.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Animal instance-based performance</title>
<p>All species combined, HerdNet reached 85% for both recall, precision and F1 score with little variation in performance according to distance from the aircraft (<xref ref-type="fig" rid="f1">
<bold>Figures&#xa0;1A, B</bold>
</xref>). Kob, buffalo, waterbuck and elephant were particularly well detected and located, as expressed by recall above 80% in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1C</bold>
</xref>. Hippopotamus and topi stood just after with a recall close to 60%, and the other species were much less detected. Except for the crocodile and the giant forest hog (i.e., minority species), the precision varied from 44 to 90%, meaning that the model produced respectively between 1.3 and 0.1 false positives per true positive. The least confused species were elephant, hippo and kob while the most confused were cow, warthog and topi. The highest confusions were between cow and buffalo and between topi and kob (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>).</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Animal instance-based detection performance of the DL model (HerdNet): <bold>(A)</bold> Example of model detection on a full oblique image, <bold>(B)</bold> model performance relative to the horizontal distance to the aircraft, and <bold>(C)</bold> species precision-recall curves.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fevo-11-1270857-g001.tif"/>
</fig>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Animal instance-based identification performance of the DL model (HerdNet). Each species was assigned a letter for referencing in the confusion matrix (bottom right): <bold>(A)</bold> Elephant, <bold>(B)</bold> buffalo, <bold>(C)</bold> topi, <bold>(D)</bold> kob, <bold>(E)</bold> waterbuck, <bold>(F)</bold> warthog, <bold>(G)</bold> giant forest hog, <bold>(H)</bold> hippopotamus, <bold>(I)</bold> crocodile, <bold>(J)</bold> cow, and <bold>(K)</bold> sheep/goat. The confusion matrix shows the comparison between the identification assigned during annotation by the human (&#x2018;Ground truth&#x2019;) and that predicted by the DL model (&#x2018;Model prediction&#x2019;).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fevo-11-1270857-g002.tif"/>
</fig>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Image-based performance</title>
<p>From the image-based test set of 6,027 images, the DL model correctly identified 81.1% of the negative images (4,486/5,533), thus reducing the manual interpretation workload by 74.4% (4,486/6,027). The same tendency was observed when applying the model to the whole set of ENIs: HerdNet identified 80.1% of the negative images (9,487/11,778), reducing the workload by 74.1% (9,487/12,806). In addition, it is worth mentioning that the DL model processed images on the workstation at a rate of about 2.8 seconds per 24-megapixel image, which corresponded to around 10 hours for the entire ENI dataset.</p>
<p>Focusing on detection by species, the model guides the interpreters to 95% or more of the animals for almost all the species studied except warthog, as expressed by the high detection rate in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>. Overall, the model detected 98.2% of animals previously identified in the original 2018 count by interpreters. Meanwhile, the counting precision of the model was low overall at&lt; 50%, but was reasonable for elephant (50.1%) and buffalo (54.1%), and high for topi (92.9%) and cow (90%).</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Results of the DL model (HerdNet) on the image-based test images (N=6,027).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Species</th>
<th valign="top" align="right">N<sub>H</sub>
<sup>1</sup>
</th>
<th valign="top" align="right">N<sub>H|M</sub>
<sup>2</sup>
</th>
<th valign="top" align="right">N<sub>M</sub>
<sup>3</sup>
</th>
<th valign="top" align="right">N<sub>H|M</sub>/N<sub>H</sub>
<sup>4</sup>
</th>
<th valign="top" align="right">N<sub>H</sub>/N<sub>M</sub>
<sup>5</sup>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Elephant</td>
<td valign="top" align="right">299 (65)</td>
<td valign="top" align="right">292 (58)</td>
<td valign="top" align="right">597 (313)</td>
<td valign="top" align="right">97.7%</td>
<td valign="top" align="right">50.1%</td>
</tr>
<tr>
<td valign="top" align="left">Buffalo</td>
<td valign="top" align="right">858 (51)</td>
<td valign="top" align="right">852 (46)</td>
<td valign="top" align="right">1,587 (527)</td>
<td valign="top" align="right">99.3%</td>
<td valign="top" align="right">54.1%</td>
</tr>
<tr>
<td valign="top" align="left">Topi</td>
<td valign="top" align="right">118 (16)</td>
<td valign="top" align="right">117 (15)</td>
<td valign="top" align="right">127 (44)</td>
<td valign="top" align="right">99.0%</td>
<td valign="top" align="right">92.9%</td>
</tr>
<tr>
<td valign="top" align="left">Kob</td>
<td valign="top" align="right">1,137 (152)</td>
<td valign="top" align="right">1,137 (152)</td>
<td valign="top" align="right">4,092 (1,706)</td>
<td valign="top" align="right">100.0%</td>
<td valign="top" align="right">27.8%</td>
</tr>
<tr>
<td valign="top" align="left">Waterbuck</td>
<td valign="top" align="right">335 (100)</td>
<td valign="top" align="right">329 (96)</td>
<td valign="top" align="right">1,348 (841)</td>
<td valign="top" align="right">98.2%</td>
<td valign="top" align="right">24.9%</td>
</tr>
<tr>
<td valign="top" align="left">Warthog</td>
<td valign="top" align="right">172 (61)</td>
<td valign="top" align="right">143 (46)</td>
<td valign="top" align="right">701 (514)</td>
<td valign="top" align="right">83.1%</td>
<td valign="top" align="right">24.5%</td>
</tr>
<tr>
<td valign="top" align="left">Giant Forest Hog</td>
<td valign="top" align="right">25 (8)</td>
<td valign="top" align="right">25 (8)</td>
<td valign="top" align="right">55 (45)</td>
<td valign="top" align="right">100.0%</td>
<td valign="top" align="right">45.5%</td>
</tr>
<tr>
<td valign="top" align="left">Hippopotamus</td>
<td valign="top" align="right">351 (60)</td>
<td valign="top" align="right">332 (49)</td>
<td valign="top" align="right">1,468 (508)</td>
<td valign="top" align="right">94.6%</td>
<td valign="top" align="right">23.9%</td>
</tr>
<tr>
<td valign="top" align="left">Crocodile</td>
<td valign="top" align="right">16 (3)</td>
<td valign="top" align="right">16 (3)</td>
<td valign="top" align="right">97 (85)</td>
<td valign="top" align="right">100.0%</td>
<td valign="top" align="right">16.5%</td>
</tr>
<tr>
<td valign="top" align="left">Cow</td>
<td valign="top" align="right">441 (19)</td>
<td valign="top" align="right">440 (18)</td>
<td valign="top" align="right">490 (109)</td>
<td valign="top" align="right">99.8%</td>
<td valign="top" align="right">90.0%</td>
</tr>
<tr>
<td valign="top" align="left">Sheep/Goat</td>
<td valign="top" align="right">81 (14)</td>
<td valign="top" align="right">81 (14)</td>
<td valign="top" align="right">994 (515)</td>
<td valign="top" align="right">100.0%</td>
<td valign="top" align="right">8.1%</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<sup>1</sup>Animal numbers in images as determined by human counts (&#x2018;interpretation&#x2019;) in survey year 2018.</p>
</fn>
<fn>
<p>
<sup>2</sup>Animal numbers in images from 2018 interpretation, where these images were later classified as animal-positive by the DL model.</p>
</fn>
<fn>
<p>
<sup>3</sup>Numbers estimated by the DL model; indicating the &#x2018;overcount&#x2019; by the DL model.</p>
</fn>
<fn>
<p>
<sup>4</sup>True counting rate of the model; the proportion of the 2018 count found by the DL model.</p>
</fn>
<fn>
<p>
<sup>5</sup>Counting precision of the DL model, where 1/precision is the ratio of the overcount.</p>
</fn>
<fn>
<p>The absolute numbers indicated correspond to the number of animals detected, followed by the number of images that contained the species in parentheses.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>In the context of improving multi-species SRF surveys in Africa, we trained a DL model based on aerial imagery of a Ugandan protected area acquired under standardized criteria for OCC surveys, specifically pixel density, camera angles, image footprint size and ground-sampling distance. Our DL model detected human-identified wildlife in positive images at high recall and precision rates (85%). It showed equivalent or better performance than previous DL models developed in similar conditions or habitats (<xref ref-type="bibr" rid="B22">Kellenberger et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B14">Eikelboom et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B9">Delplanque et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B11">Delplanque et&#xa0;al., 2023</xref>). The CNN used here (i.e., HerdNet) revealed better performance than the study of the original paper (<xref ref-type="bibr" rid="B11">Delplanque et&#xa0;al., 2023</xref>). This may be explained by the higher resolution of current images and their more controlled and standardized acquisition, which should allow for better differentiation of animals in the landscape and within herds and reduced scale variation among individuals.</p>
<p>As previously observed, our model struggles to detect minority species (i.e., crocodile and giant forest hog) certainly due to a lack of training samples for the CNN to develop robust features. In addition, the inherently small test sample sizes for these species reduce the statistical credibility of the performance. Some of the species showed a low recall while they do not seem more challenging to detect at first sight. This is explained by the difficulty for the model to differentiate visually related species, causing confusion between detected animals. As an example, cow and topi seemed to be poorly detected, but their recall may rise from 47% to 83%, and from 63% to 100% respectively, considering the entire group of confused detected animals (i.e. amalgamated cow and topi). Thus, majority species weighting appears to confuse identification of look-alike species (e.g., cow-to-buffalo confusion). In fact, this phenomenon is common in object detection tasks and is related to &#x2018;foreground&#x2013;foreground class imbalance&#x2019; (<xref ref-type="bibr" rid="B32">Oksuz et&#xa0;al., 2020</xref>), inherent to the imbalance of objects frequencies in nature. Future research should investigate other approaches such as efficient sampling strategies, progressive fine tuning or generative methods (e.g., <xref ref-type="bibr" rid="B45">Wang et&#xa0;al., 2017</xref>) to reduce such bias.</p>
<p>We were surprised by the low detection performance of sheep/goat, considering the good results of previous studies involving these species (<xref ref-type="bibr" rid="B37">Sarwar et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B11">Delplanque et&#xa0;al., 2023</xref>). We suspect that the use of the HNP mining method during training degraded the sheep/goat detection ability of the model. In this area in Uganda, sheep and goats were mostly found in the villages, where they are not herded (as in arid lands) but roam in small groups around households; villages were an major source of false positives due to the particular bright items found in them, appearing as &#x2018;white shapes&#x2019; of various sizes. Training the model to discard these sheep- or goat-like objects certainly confused the model, as expressed by the 25% drop of recall obtained on the validation set after the second training step.</p>
<p>HerdNet thus correctly detects and counts our studied species in positive images, but what about its performance on a realistic dataset, i.e. containing less than 10% of positive images and more than 90% of negative images? We observed that our DL model succeeded in guiding interpreters to 98.2% of the animals (all species combined). It discriminated more than 80% of negative images, reducing the workload of manual interpretation by 74%. Nevertheless, the counting performance is not yet satisfactory as the model tended to overestimate the true number of animals. This is the result of a high number of false positives, typically generated by unknown or animal-like landscape items such as particular shapes of trunks, shadows, rocks, termite mounds and mud. This model behavior was expected as such landscape items have previously shown to be the main cause of false positives (<xref ref-type="bibr" rid="B22">Kellenberger et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B9">Delplanque et&#xa0;al., 2021</xref>). Precision could be improved by properly re-training the DL model on these particular landscape items, following a short-time human verification session.</p>
<p>At this time, a sufficient annotated wildlife training dataset acquired of the target area, or of areas with the same wildlife species is required to process all the image data. This training and verification can be accelerated by using point detections, because adding, deleting or moving points is much faster than adjusting bounding boxes, which makes our model more appropriate for processing aerial surveys images.</p>
<p>Our results confirm and validate that we have entered the era of using DL as a tool to semi-automatically process aerial survey wildlife imagery acquired under standard SRF conditions, with demonstrated effectiveness to reduce human interpretation workloads by over 70%. Humans must remain in the process to study positive images, as filtered by the DL model. Annotated image databases and models will also improve with each new acquisition, and we can therefore anticipate a growing improvement in DL models. Current counting standards such as CITES-MIKE V3 (<xref ref-type="bibr" rid="B7">CITES-MIKE, 2019</xref>) can now evolve further to prescribe image-based animal detection based on a combination of manual interpretation and high-performance DL models. Following surveys can invest in increased sampling effort, as the DL model is insensitive to fatigue unlike humans. This can be effected by increasing sampling strip widths, flying higher and using higher resolution cameras, such as the new generation of 40&#x2013;60 MP mirrorless cameras (<xref ref-type="bibr" rid="B25">Lamprey et&#xa0;al., 2020</xref>). On one hand, this would allow for the transfer of the observers&#x2019; real-time visual counting work to the verification of the model detections. On the other hand, this would decrease the human-life risks associated with traditional aerial surveys while increasing the sampling effort at no extra costs.</p>
<p>In our study we have emphasized the potential use of DL for detection in strip transects. However, the method also has potential for detection in line transects where the population is calculated from a function of the drop-off of observations with distance from a line defined to the side of the aircraft (<xref ref-type="bibr" rid="B13">Eberhardt, 1978</xref>; <xref ref-type="bibr" rid="B3">Buckland et&#xa0;al., 2004</xref>). To date, problems in measuring distance to aircraft, together with meeting a key assumption of 100% animal detection by observers on the line itself, have precluded the wide use of line transects in Africa (<xref ref-type="bibr" rid="B23">Kruger et&#xa0;al., 2008</xref>). However, where pixel position can define the distance from the aircraft, and detection through DL is improved, our approach has the capability to greatly enhance line-transect counts.</p>
<p>Next work will consist of manually verifying detections and producing population estimates. This will enable us to assess the performance of our semi-automated detection model at the scale of an entire aerial survey. On a more general scale, it would be important to develop efficient semi-automated approaches to process large volumes of aerial survey images, integrating Deep Learning and humans with minimal verification time investment, to ensure accurate and precise derived estimates.</p>
</sec>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary files, futher inquiries can be directed to the corresponding author/s.</p>
</sec>
<sec id="s6" sec-type="author-contributions">
<title>Author contributions</title>
<p>AD: Conceptualization, Data curation, Formal Analysis, Methodology, Software, Writing &#x2013; original draft. RL: Conceptualization, Data curation, Formal Analysis, Methodology, Writing &#x2013; original draft. SF: Writing &#x2013; review &amp; editing. JT: Writing &#x2013; review &amp; editing. PL: Supervision, Writing &#x2013; review &amp; editing.</p>
</sec>
</body>
<back>
<sec id="s7" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. The work of A. Delplanque was supported under a grant from the Fund for Research Training in Industry and Agriculture (FRIA, F.R.S.-FNRS). The annotation work was supported under a grant from Global Conservation of California, USA to R. Lamprey and the WildSpace-Image-Analytics team of Uganda (<ext-link ext-link-type="uri" xlink:href="www.wildspace-image-analytics.com">www.wildspace-image-analytics.com</ext-link>). Sharing of annotation data was conducted under a Memorandum of Understanding between Uganda Conservation Foundation (UCF) and the University of Liege. The original aerial 2018 survey of QENP, from which this experimental imagery was collected, was funded by UCF with support from Global Conservation, Vulcan Inc., Save the Elephants and the Uganda Wildlife Authority.</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>We are grateful to Jeff Morgan of Global Conservation and Mike Keigwin of the Uganda Conservation Foundation who supported R. Lamprey and the WildSpace-Image-Analytics team in Uganda in conducting the first-stage image annotation work of this project. We would like to thank the Uganda Wildlife Authority for their assistance in the original QEPA aerial survey of 2018, and especially to Mr Charles Tumwesigye, Director of Conservation at UWA, who kindly obtained the necessary authorizations.</p>
</ack>
<sec id="s8" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that this study received funding from the not-for-profit organizations Uganda Conservation Foundation, Global Conservation, Save the Elephants, Vulcan Inc. and Uganda Wildlife Authority. The funders were not involved in the study design, collection, analysis, interpretation of data, the writing of this article or the decision to submit it for publication.</p>
</sec>
<sec id="s9" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Br&#xf6;ker</surname> <given-names>K. C. A.</given-names>
</name>
<name>
<surname>Hansen</surname> <given-names>R. G.</given-names>
</name>
<name>
<surname>Leonard</surname> <given-names>K. E.</given-names>
</name>
<name>
<surname>Koski</surname> <given-names>W. R.</given-names>
</name>
<name>
<surname>Heide-J&#xf8;rgensen</surname> <given-names>M. P.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A comparison of image and observer based aerial surveys of narwhal</article-title>. <source>Mar. Mammal Sci.</source> <volume>35</volume> (<issue>4</issue>), <fpage>1253</fpage>&#x2013;<lpage>1279</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/mms.12586</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brummitt</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Regan</surname> <given-names>E. C.</given-names>
</name>
<name>
<surname>Weatherdon</surname> <given-names>L. V.</given-names>
</name>
<name>
<surname>Martin</surname> <given-names>C. S.</given-names>
</name>
<name>
<surname>Geijzendorffer</surname> <given-names>I. R.</given-names>
</name>
<name>
<surname>Rocchini</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Taking stock of nature: Essential biodiversity variables explained</article-title>. <source>Biol. Conserv.</source> <volume>213</volume>, <fpage>252</fpage>&#x2013;<lpage>255</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.biocon.2016.09.006</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Buckland</surname> <given-names>S. T.</given-names>
</name>
<name>
<surname>Anderson</surname> <given-names>D. R.</given-names>
</name>
<name>
<surname>Burnham</surname> <given-names>K. P.</given-names>
</name>
<name>
<surname>Laake</surname> <given-names>J. L.</given-names>
</name>
<name>
<surname>Borchers</surname> <given-names>D. L.</given-names>
</name>
<name>
<surname>Thomas</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2004</year>). <source>Advanced distance sampling.</source> (<publisher-loc>Oxford, United Kingdom</publisher-loc>: <publisher-name>Oxford University Press</publisher-name>).</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Caughley</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>1974</year>). <article-title>Bias in aerial survey</article-title>. <source>J. Wildl. Manage.</source> <volume>38</volume> (<issue>4</issue>), <fpage>921</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2307/3800067</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Caughley</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>1977</year>). <article-title>Sampling in aerial survey</article-title>. <source>J. Wildl. Manage.</source> <volume>41</volume> (<issue>4</issue>), <fpage>605</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2307/3799980</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Caughley</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Goddard</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>1975</year>). <article-title>Abundance and distribution of elephants in the Luangwa Valley, Zambia</article-title>. <source>Afr. J. Ecol.</source> <volume>13</volume> (<issue>1</issue>), <fpage>39</fpage>&#x2013;<lpage>48</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/j.1365-2028.1975.tb00122.x</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="book">
<person-group person-group-type="author">
<collab>CITES-MIKE</collab>
</person-group> (<year>2019</year>). <source>Monitoring the Illegal Killing of Elephants: Aerial Survey Standards for the MIKE Programme. Version 3.0. Convention on International Trade in Endangered Species - Monitoring the Illegal Killing of Elephants Programme (CITES-MIKE)</source> (<publisher-loc>Nairobi, Kenya</publisher-loc>: <publisher-name>United Nations Environment Programme</publisher-name>).</citation>
</ref>
<ref id="B8">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Craig</surname> <given-names>G. C.</given-names>
</name>
</person-group> (<year>2012</year>). <source>Aerial survey standards for the CITES-MIKE Programme, Version 2</source> (<publisher-loc>UNEP/DELC, Nairobi, Kenya</publisher-loc>: <publisher-name>CITES-MIKE Programme</publisher-name>).</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Delplanque</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Foucher</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Lejeune</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Linchant</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Th&#xe9;au</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Multispecies detection and identification of African mammals in aerial imagery using convolutional neural networks</article-title>. In: <source>Remote Sens. Ecol. Conserv.</source> <volume>8</volume> (<issue>2</issue>), <fpage>pp.166</fpage>&#x2013;<lpage>pp.179</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/rse2.234</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Delplanque</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Foucher</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Lejeune</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Th&#xe9;au</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Counting African mammal herds in aerial imagery using deep learning: are anchor-based algorithms the most suitable</article-title>?,&#x201d; in <conf-name>10th International Conference on Agro-Geoinformatics and 43rd Canadian Symposium on Remote Sensing</conf-name>, <conf-loc>Qu&#xe9;bec, Canada</conf-loc>. <conf-date>14 July 2022</conf-date>. <uri xlink:href="https://hdl.handle.net/2268/293320">https://hdl.handle.net/2268/293320</uri>.</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Delplanque</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Foucher</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Th&#xe9;au</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Bussi&#xe8;re</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Vermeulen</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Lejeune</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>From crowd to herd counting: how to precisely detect and count African mammals using aerial imagery and deep learning</article-title>? <source>ISPRS J. Photogrammetry Remote Sens.</source> <volume>197</volume>, <fpage>167</fpage>&#x2013;<lpage>180</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.isprsjprs.2023.01.025</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Dutta</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Zisserman</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>The VIA annotation software for images, audio and video</article-title>,&#x201d; in <conf-name>Proceedings of the 27th ACM International Conference on Multimedia</conf-name>, <conf-loc>New York, USA</conf-loc>. (<publisher-name>Association for Computing Machinery</publisher-name>, <publisher-loc>New York (United States of America)</publisher-loc>), p.<fpage>2276</fpage>&#x2013;<lpage>2279</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1145/3343031.3350535</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Eberhardt</surname> <given-names>L. L.</given-names>
</name>
</person-group> (<year>1978</year>). <article-title>Transect methods for population studies</article-title>. <source>J. Wildl. Manage.</source> <volume>42</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>31</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2307/3800685</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Eikelboom</surname> <given-names>J. A. J.</given-names>
</name>
<name>
<surname>Wind</surname> <given-names>J.</given-names>
</name>
<name>
<surname>van de Ven</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Kenana</surname> <given-names>L. M.</given-names>
</name>
<name>
<surname>Schroder</surname> <given-names>B.</given-names>
</name>
<name>
<surname>de Knegt</surname> <given-names>H. J.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Improving the precision and accuracy of animal population estimates with aerial image object detection</article-title>. <source>Methods Ecol. Evol.</source> <volume>10</volume> (<issue>11</issue>), <fpage>1875</fpage>&#x2013;<lpage>1887</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/2041-210x.13277</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fleming</surname> <given-names>P. J. S.</given-names>
</name>
<name>
<surname>Tracey</surname> <given-names>J. P.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Some human, aircraft and animal factors affecting aerial surveys: how to enumerate animals from the air</article-title>. <source>Wildl. Res.</source> <volume>35</volume> (<issue>4</issue>), <fpage>258</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1071/wr07081</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Grimsdell</surname> <given-names>J. J. R.</given-names>
</name>
<name>
<surname>Westley</surname> <given-names>S. B.</given-names>
</name>
</person-group> (<year>1981</year>). <source>Low-level aerial survey techniques</source>. Addis Ababa: Monograph 4 (<publisher-loc>Addis Ababa (Ethiopia)</publisher-loc>: <publisher-name>International Livestock Centre for Africa</publisher-name>).</citation>
</ref>
<ref id="B17">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Gwynne</surname> <given-names>M. D.</given-names>
</name>
<name>
<surname>Croze</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>1975</year>). &#x201c;<article-title>East African habitat monitoring practice: a review of methods and application</article-title>,&#x201d; in <conf-name>Evaluation and Mapping of Tropical African Rangelands; Proceedings of the Seminar in Bamako</conf-name>, <conf-loc>Mali</conf-loc>. (<publisher-name>International Livestock Centre for Africa, Addis Ababa (Ethiopia)</publisher-name>), <fpage>95</fpage>&#x2013;<lpage>136</lpage>.</citation>
</ref>
<ref id="B18">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Jachmann</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2001</year>). <source>Estimating the abundance of African wildlife</source> (<publisher-loc>New York United States of America</publisher-loc>: <publisher-name>Springer Science &amp; Business Media</publisher-name>).</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jachmann</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Comparison of aerial counts with ground counts for large African herbivores</article-title>. <source>J. Appl. Ecol.</source> <volume>39</volume> (<issue>5</issue>), <fpage>841</fpage>&#x2013;<lpage>852</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1046/j.1365-2664.2002.00752.x</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jetz</surname> <given-names>W.</given-names>
</name>
<name>
<surname>McGeoch</surname> <given-names>M. A.</given-names>
</name>
<name>
<surname>Guralnick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Ferrier</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Beck</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Costello</surname> <given-names>M. J.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Essential biodiversity variables for mapping and monitoring species populations</article-title>. <source>Nat. Ecol. Evol.</source> <volume>3</volume> (<issue>4</issue>), <fpage>539</fpage>&#x2013;<lpage>551</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41559-019-0826-1</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jolly</surname> <given-names>G. M.</given-names>
</name>
</person-group> (<year>1969</year>). <article-title>Sampling methods for aerial censuses of wildlife populations. East African</article-title>. <source>Agric. Forest. J.</source> <volume>34</volume> (<issue>1</issue>), <fpage>46</fpage>&#x2013;<lpage>49</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/00128325.1969.11662347</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kellenberger</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Marcos</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Tuia</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Detecting mammals in UAV images: Best practices to address a substantially imbalanced dataset with deep learning</article-title>. <source>Remote Sens. Environ.</source> <volume>216</volume>, <fpage>139</fpage>&#x2013;<lpage>153</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.rse.2018.06.028</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kruger</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Reilly</surname> <given-names>B. K.</given-names>
</name>
<name>
<surname>Whyte</surname> <given-names>I. J.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Application of distance sampling to estimate population densities of large herbivores in Kruger National Park</article-title>. <source>Wildl. Res.</source> <volume>35</volume> (<issue>4</issue>), <fpage>371</fpage>&#x2013;<lpage>376</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1071/WR07084</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Lamprey</surname> <given-names>R. H.</given-names>
</name>
<name>
<surname>Keigwin</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Tumwesigye</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2023</year>)<article-title>A high-resolution aerial camera survey of Uganda&#x2019;s Queen Elizabeth Protected Area improves detection of wildlife and delivers a surprisingly high estimate of the elephant population</article-title> (Accessed <access-date>February 15, 2023</access-date>).</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lamprey</surname> <given-names>R. H.</given-names>
</name>
<name>
<surname>Ochanda</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Brett</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Tumwesigye</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Douglas-Hamilton</surname> <given-names>I.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Cameras replace human observers in multi-species aerial counts in Murchison Falls, Uganda</article-title>. <source>Remote Sens. Ecol. Conserv.</source> <volume>6</volume>, <fpage>529</fpage>&#x2013;<lpage>545</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/rse2.154</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lamprey</surname> <given-names>R. H.</given-names>
</name>
<name>
<surname>Pope</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Ngene</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Norton-Griffiths</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Frederick</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Okita-Ouma</surname> <given-names>B.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Comparing an automated high-definition oblique camera system to rear-seat-observers in a wildlife survey in Tsavo, Kenya: Taking multi-species aerial counts to the next level</article-title>. <source>Biol. Conserv.</source> <volume>241</volume>, <fpage>108243</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.biocon.2019.108243</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>LeCun</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Bengio</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Hinton</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Deep learning</article-title>. <source>Nature</source> <volume>521</volume> (<issue>753</issue>), <fpage>436</fpage>&#x2013;<lpage>444</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nature14539</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lethbridge</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Stead</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Wells</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Estimating kangaroo density by aerial survey: A comparison of thermal cameras with human observers</article-title>. <source>Wildl. Res.</source> <volume>46</volume> (<issue>8</issue>), <fpage>639</fpage>&#x2013;<lpage>648</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1071/WR18122</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Naud&#xe9;</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Joubert</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>The aerial elephant dataset: A new public benchmark for aerial object detection</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops</conf-name>, (<publisher-name>Institute for Electrical and Electronics Engineers (IEEE)</publisher-name>, <publisher-loc>New York City (United States of America)</publisher-loc>) Vol. <volume>pp</volume>. <fpage>48</fpage>&#x2013;<lpage>55</lpage>.</citation>
</ref>
<ref id="B30">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Norton-Griffiths</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>1978</year>). <source>Counting animals</source> (<publisher-loc>Washington DC</publisher-loc>: <publisher-name>African Wildlife Foundation Handbook Number 1</publisher-name>).</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ogutu</surname> <given-names>J. O.</given-names>
</name>
<name>
<surname>Piepho</surname> <given-names>H. P.</given-names>
</name>
<name>
<surname>Said</surname> <given-names>M. Y.</given-names>
</name>
<name>
<surname>Ojwang</surname> <given-names>G. O.</given-names>
</name>
<name>
<surname>Njino</surname> <given-names>L. W.</given-names>
</name>
<name>
<surname>Kifugo</surname> <given-names>S. C.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>). <article-title>Extreme wildlife declines and concurrent increase in livestock numbers in Kenya: What are the causes</article-title>? <source>PloS One</source> <volume>11</volume> (<issue>9</issue>), <fpage>1</fpage>&#x2013;<lpage>46</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0163249</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Oksuz</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Cam</surname> <given-names>B. C.</given-names>
</name>
<name>
<surname>Kalkan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Akbas</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Imbalance problems in object detection: A review</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell.</source> <volume>43</volume> (<issue>10</issue>), <fpage>3388</fpage>&#x2013;<lpage>3415</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TPAMI.2020.2981890</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ottichilo</surname> <given-names>W. K.</given-names>
</name>
<name>
<surname>Khaemba</surname> <given-names>W. M.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Validation of observer and aircraft calibration for aerial surveys of animals</article-title>. <source>Afr. J. Ecol.</source> <volume>39</volume> (<issue>1</issue>), <fpage>45</fpage>&#x2013;<lpage>50</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1046/j.1365-2028.2001.00268.x</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="book">
<person-group person-group-type="author">
<collab>PAEAS</collab>
</person-group> (<year>2014</year>). <source>Aerial survey standards and guidelines for the Pan-African Elephant Aerial Survey</source> (<publisher-loc>Seattle, USA</publisher-loc>: <publisher-name>Vulcan Inc</publisher-name>).</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peng</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Liao</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Shao</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Yue</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Wild animal survey using UAS imagery and deep learning: modified Faster R-CNN for kiang detection in Tibetan Plateau</article-title>. <source>ISPRS J. Photogrammetry Remote Sens.</source> <volume>169</volume>, <fpage>364</fpage>&#x2013;<lpage>376</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.isprsjprs.2020.08.026</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pennycuick</surname> <given-names>C. J.</given-names>
</name>
<name>
<surname>Western</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>1969</year>). <article-title>An investigation of some sources of bias in aerial transect sampling of large mammal populations</article-title>. <source>Afr. J. Ecol.</source> <volume>10</volume> (<issue>3</issue>), <fpage>pp.175</fpage>&#x2013;<lpage>pp.191</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/j.1365-2028.1972.tb00726.x</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sarwar</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Griffin</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Rehman</surname> <given-names>S. U.</given-names>
</name>
<name>
<surname>Pasang</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Detecting sheep in UAV images</article-title>. <source>Comput. Electron. Agric.</source> <volume>187</volume>, <elocation-id>106219</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2021.106219</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schlossberg</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Chase</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>Griffin</surname> <given-names>C. R.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Testing the accuracy of aerial surveys for large mammals: An experiment with African savanna elephants (Loxodonta africana)</article-title>. <source>PloS One</source> <volume>11</volume> (<issue>10</issue>), <elocation-id>e0164904</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0164904</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shepley</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Falzon</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Meek</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Kwan</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Automated location invariant animal detection in camera trap images using publicly available data sources</article-title>. <source>Ecol. Evol.</source> <volume>11</volume>, <fpage>4494</fpage>&#x2013;<lpage>4506</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/ece3.7344</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Stelfox</surname> <given-names>J. G.</given-names>
</name>
<name>
<surname>Peden</surname> <given-names>D. G.</given-names>
</name>
</person-group> (<year>1981</year>). &#x201c;<article-title>The aerial survey programme of the Kenya Rangeland Ecological Monitoring Unit</article-title>,&#x201d; in <source>Low-Level Aerial Survey Techniques</source>. Eds. <person-group person-group-type="editor">
<name>
<surname>Grimsdell</surname> <given-names>J. J. R.</given-names>
</name>
<name>
<surname>Westley</surname> <given-names>S. B.</given-names>
</name>
</person-group> (<publisher-loc>Addis Ababa, Ethiopia</publisher-loc>: <publisher-name>ILCA Monographs 4</publisher-name>), <fpage>69</fpage>&#x2013;<lpage>83</lpage>.</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tabak</surname> <given-names>M. A.</given-names>
</name>
<name>
<surname>Norouzzadeh</surname> <given-names>M. S.</given-names>
</name>
<name>
<surname>Wolfson</surname> <given-names>D. W.</given-names>
</name>
<name>
<surname>Sweeney</surname> <given-names>S. J.</given-names>
</name>
<name>
<surname>VerCauteren</surname> <given-names>K. C.</given-names>
</name>
<name>
<surname>Snow</surname> <given-names>N. P.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Machine learning to classify animal species in camera trap images: Applications in ecology</article-title>. <source>Methods Ecol. Evol.</source> <volume>10</volume> (<issue>4</issue>), <fpage>585</fpage>&#x2013;<lpage>590</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/2041-210X.13120</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Tkachenko</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Malyuk</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Shevchenko</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Holmanyuk</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Liubimov</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2021</year>) <source>Label Studio: Data labeling software [Python]</source>. Available at: <uri xlink:href="https://github.com/heartexlabs/label-studio">https://github.com/heartexlabs/label-studio</uri>.</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Torney</surname> <given-names>C. J.</given-names>
</name>
<name>
<surname>Lloyd-Jones</surname> <given-names>D. J.</given-names>
</name>
<name>
<surname>Chevallier</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Moyer</surname> <given-names>D. C.</given-names>
</name>
<name>
<surname>Maliti</surname> <given-names>H. T.</given-names>
</name>
<name>
<surname>Mwita</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>A comparison of deep learning and citizen science techniques for counting wildlife in aerial survey images</article-title>. <source>Methods Ecol. Evol.</source> <volume>10</volume> (<issue>6</issue>), <fpage>779</fpage>&#x2013;<lpage>787</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/2041-210X.13165</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tuia</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Kellenberger</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Beery</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Costelloe</surname> <given-names>B. R.</given-names>
</name>
<name>
<surname>Zuffi</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Risse</surname> <given-names>B.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Perspectives in machine learning for wildlife conservation</article-title>. <source>Nat Commun.</source> <volume>13</volume> (<issue>1</issue>), <fpage>792</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41467-022-27980-y</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Shrivastava</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Gupta</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>A-fast-rcnn: Hard positive generation via adversary for object detection</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. (<publisher-loc>New York City, United States of America</publisher-loc>: <publisher-name>Institute for Electrical and Electronics Engineers (IEEE)</publisher-name>), <fpage>2606</fpage>&#x2013;<lpage>2615</lpage>.</citation>
</ref>
</ref-list>
</back>
</article>