<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<?covid-19-tdm?>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Comput. Sci.</journal-id>
<journal-title>Frontiers in Computer Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Comput. Sci.</abbrev-journal-title>
<issn pub-type="epub">2624-9898</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">641237</article-id>
<article-id pub-id-type="doi">10.3389/fcomp.2021.641237</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Computer Science</subject>
<subj-group>
<subject>Methods</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>The CoronaSurveys System for COVID-19 Incidence Data Collection and Processing</article-title>
<alt-title alt-title-type="left-running-head">Baquero et&#x20;al.</alt-title>
<alt-title alt-title-type="right-running-head">CoronaSurveys System for COVID-19 Monitoring</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Baquero</surname>
<given-names>Carlos</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1212645/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Casari</surname>
<given-names>Paolo</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1341732/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Fernandez Anta</surname>
<given-names>Antonio</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1043233/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Garc&#xed;a-Garc&#xed;a</surname>
<given-names>Amanda</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1354216/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Frey</surname>
<given-names>Davide</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Garcia-Agundez</surname>
<given-names>Augusto</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/580997/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Georgiou</surname>
<given-names>Chryssis</given-names>
</name>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1277386/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Girault</surname>
<given-names>Benjamin</given-names>
</name>
<xref ref-type="aff" rid="aff7">
<sup>7</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ortega</surname>
<given-names>Antonio</given-names>
</name>
<xref ref-type="aff" rid="aff7">
<sup>7</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Goessens</surname>
<given-names>Mathieu</given-names>
</name>
<xref ref-type="aff" rid="aff8">
<sup>8</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Hern&#xe1;ndez-Roig</surname>
<given-names>Harold A.</given-names>
</name>
<xref ref-type="aff" rid="aff9">
<sup>9</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1246613/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Nicolaou</surname>
<given-names>Nicolas</given-names>
</name>
<xref ref-type="aff" rid="aff10">
<sup>10</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1067077/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Stavrakis</surname>
<given-names>Efstathios</given-names>
</name>
<xref ref-type="aff" rid="aff10">
<sup>10</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1324317/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ojo</surname>
<given-names>Oluwasegun</given-names>
</name>
<xref ref-type="aff" rid="aff11">
<sup>11</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1354136/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Roberts</surname>
<given-names>Julian C.</given-names>
</name>
<xref ref-type="aff" rid="aff12">
<sup>12</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1270528/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sanchez</surname>
<given-names>Ignacio</given-names>
</name>
<xref ref-type="aff" rid="aff13">
<sup>13</sup>
</xref>
</contrib>
</contrib-group>
<aff id="aff1">
<label>
<sup>1</sup>
</label>U. Minho and INESC TEC, <addr-line>Braga</addr-line>, <country>Portugal</country>
</aff>
<aff id="aff2">
<label>
<sup>2</sup>
</label>Department of Information Engineering and Computer Science, University of Trento, <addr-line>Trento</addr-line>, <country>Italy</country>
</aff>
<aff id="aff3">
<label>
<sup>3</sup>
</label>IMDEA Networks Institute, <addr-line>Madrid</addr-line>, <country>Spain</country>
</aff>
<aff id="aff4">
<label>
<sup>4</sup>
</label>Inria Rennes, <addr-line>Rennes</addr-line>, <country>France</country>
</aff>
<aff id="aff5">
<label>
<sup>5</sup>
</label>Multimedia Communications Lab, TU Darmstadt, <addr-line>Darmstadt</addr-line>, <country>Germany</country>
</aff>
<aff id="aff6">
<label>
<sup>6</sup>
</label>Department of Computer Science, University of Cyprus, <addr-line>Nicosia</addr-line>, <country>Cyprus</country>
</aff>
<aff id="aff7">
<label>
<sup>7</sup>
</label>Department of Electrical and Computer Engineering University of Southern California, <addr-line>Los Angeles</addr-line>, <addr-line>CA</addr-line>, <country>United&#x20;States</country>
</aff>
<aff id="aff8">
<label>
<sup>8</sup>
</label>Consulting, <addr-line>Rennes</addr-line>, <country>France</country>
</aff>
<aff id="aff9">
<label>
<sup>9</sup>
</label>Department of Statistics, UC3M &#x26; UC3M-Santander Big Data Institute, <addr-line>Getafe</addr-line>, <country>Spain</country>
</aff>
<aff id="aff10">
<label>
<sup>10</sup>
</label>Algolysis Ltd, <addr-line>Nicosia</addr-line>, <country>Cyprus</country>
</aff>
<aff id="aff11">
<label>
<sup>11</sup>
</label>IMDEA Networks Institute and UC3M, <addr-line>Madrid</addr-line>, <country>Spain</country>
</aff>
<aff id="aff12">
<label>
<sup>12</sup>
</label>Skyhaven Media, <addr-line>Liverpool</addr-line>, <country>United&#x20;Kingdom</country>
</aff>
<aff id="aff13">
<label>
<sup>13</sup>
</label>InqBarna, <addr-line>Barcelona</addr-line>, <country>Spain</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/305341/overview">Mina C. Johnson-Glenberg</ext-link>, Arizona State University, United&#x20;States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/705828/overview">Andrea Seveso</ext-link>, University of Milano-Bicocca, Italy</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/756877/overview">Jan Mucha</ext-link>, Brno University of Technology, Czechia</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/187680/overview">Juan Juli&#xe1;n Merelo</ext-link>, University of Granada, Spain</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Antonio Fernandez Anta, <email>antonio.fernandez@imdea.org</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Human-Media Interaction, a section of the journal Frontiers in Computer Science</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>08</day>
<month>06</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>3</volume>
<elocation-id>641237</elocation-id>
<history>
<date date-type="received">
<day>13</day>
<month>12</month>
<year>2020</year>
</date>
<date date-type="accepted">
<day>17</day>
<month>05</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2021 Baquero, Casari, Fernandez Anta, Garc&#xed;a-Garc&#xed;a, Frey, Garcia-Agundez, Georgiou, Girault, Ortega, Goessens, Hern&#xe1;ndez-Roig, Nicolaou, Stavrakis, Ojo, Roberts and Sanchez.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Baquero, Casari, Fernandez Anta, Garc&#xed;a-Garc&#xed;a, Frey, Garcia-Agundez, Georgiou, Girault, Ortega, Goessens, Hern&#xe1;ndez-Roig, Nicolaou, Stavrakis, Ojo, Roberts and Sanchez</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these&#x20;terms.</p>
</license>
</permissions>
<abstract>
<p>CoronaSurveys is an ongoing interdisciplinary project developing a system to infer the incidence of COVID-19 around the world using anonymous open surveys. The surveys have been translated into 60 languages and are continuously collecting participant responses from any country in the world. The responses collected are pre-processed, organized, and stored in a version-controlled repository, which is publicly available to the scientific community. In addition, the CoronaSurveys team has devised several estimates computed on the basis of survey responses and other data, and makes them available on the project&#x2019;s website in the form of tables, as well as interactive plots and maps. In this paper, we describe the computational system developed for the CoronaSurveys project. The system includes multiple components and processes, including the web survey, the mobile apps, the cleaning and aggregation process of the survey responses, the process of storage and publication of the data, the processing of the data and the computation of estimates, and the visualization of the results. In this paper we describe the system architecture and the major challenges we faced in designing and deploying&#x20;it.</p>
</abstract>
<kwd-group>
<kwd>COVID-19</kwd>
<kwd>monitoring</kwd>
<kwd>survey</kwd>
<kwd>indirect reporting</kwd>
<kwd>visualization</kwd>
<kwd>network scale-up method</kwd>
<kwd>mobile app</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>During the current coronavirus pandemic, monitoring the evolution of COVID-19 cases is of utmost importance for the authorities to make informed policy decisions (e.g., lock-downs), and to raise awareness in the general public for taking appropriate public health measures.</p>
<p>At the time of the pandemic outbreak, a lack of laboratory tests, materials, and human resources implied that the evolution of officially confirmed cases did not represent the total number of cases (<xref ref-type="bibr" rid="B13">Ruppert et&#x20;al., 2018</xref>; <xref ref-type="bibr" rid="B10">Maxmen, 2020</xref>). Even now, there are significant differences across countries in terms of the availability of tests. For this reason, given the rapid progression of the pandemic, in some cases health authorities are forced to make important decisions based on sub-optimal data. For this reason, alternatives to testing that can be rapidly deployed are likely to help authorities, as well as the general population, to better understand the progress of a pandemic (<xref ref-type="bibr" rid="B16">Yang et&#x20;al., 2012</xref>), particularly at its early stages or in low income countries, where massive testing is unfeasible.</p>
<p>To this end, we have created a system, named <italic>CoronaSurveys</italic>
<xref ref-type="fn" rid="fn1">
<sup>1</sup>
</xref> to estimate the number of COVID-19 cases based on crowd-sourced open anonymous surveys. CoronaSurveys has been operating since March 2020, starting with only three countries (Spain, Portugal and Cyprus) and currently offering surveys for all the countries in the&#x20;globe.</p>
<p>CoronaSurveys uses the <italic>network scale-up</italic> method (NSUM) (<xref ref-type="bibr" rid="B14">Russell Bernard et&#x20;al., 1991</xref>; <xref ref-type="bibr" rid="B1">Bernard et&#x20;al., 2010</xref>), which implements indirect reporting to: 1) reach a wider coverage in a shorter time frame, 2) obtain estimates that converge faster to the true value, and 3) preserve the privacy of the participants. The individual responses act as snapshots of knowledge of the current situation of the pandemic from a personal point of view. When these responses are analyzed collectively, across time and geographic locations, a combined view of the pandemic can be inferred. To the best of our knowledge, this is the largest scale NSUM system ever deployed and the only one to be collecting data continuously over a period of over a year using open surveys.</p>
<p>In this paper, we present the main components of the current CoronaSurveys infrastructure, including the collection, processing and visualization methods used. The computational system powering CoronaSurveys has been designed as the aggregation of lightly coupled components that can be replaced and modified almost independently (see architecture in <xref ref-type="fig" rid="F1">Figure&#x20;1</xref>). This has enabled the system to continuously adapt to the evolution of the COVID-19 pandemic with relatively low effort, demonstrating its extensibility, re-usability, and potential to be used for tracking future pandemic outbreaks.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>CoronaSurveys computational system architecture.</p>
</caption>
<graphic xlink:href="fcomp-03-641237-g001.tif"/>
</fig>
</sec>
<sec id="s2">
<title>2 Data Collection</title>
<p>The data collection subsystem consists of 1) a user-centered web and mobile front-end interface, providing a straightforward and intuitive access to the surveys, and 2) a data collection back-end enabling response aggregation in a consistent and structured format to facilitate post-processing.</p>
<sec id="s2-1">
<title>2.1 Front-end: Survey Design</title>
<p>Usability, interaction, and user interfacing play key roles in the initial engagement and subsequent retention of participants. To this end, we pay attention to two main elements: 1) the appearance and usability of the front-end solutions, and 2) the contents and length of the survey.</p>
<p>The web and mobile survey applications have been designed to have minimal loading times, with lightweight graphical elements, a color scheme and page layout suitable for all users, including visually impaired participants and participants in geographic locations where internet speeds may be poor (see <xref ref-type="fig" rid="F2">Figure&#x20;2</xref>). For instance, a tailor-made cache system has been built and deployed to minimize the survey loading time. Similarly, in order to be able to improve accessibility and user experience, the initial website was migrated from GitHub pages to a Wordpress deployment in a server managed by the project&#x20;team.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Snapshots of the Coronasurveys app. It shows the main app screen (left), the information about the project shown when accessing the survey (center), and the survey questions (right).</p>
</caption>
<graphic xlink:href="fcomp-03-641237-g002.tif"/>
</fig>
<p>To preserve user engagement, minimize participant fatigue, and ensure a steady flow of responses we initially designed a minimal survey consisting of two simple questions:<list list-type="simple">
<list-item>
<p>&#x2003;1. <italic>How many people do you know personally in this geographical area? Include only those whose health status you are likely to be aware of</italic> (The geographical area was previously selected, see <xref ref-type="fig" rid="F2">Figure&#x20;2</xref>.)</p>
</list-item>
<list-item>
<p>&#x2003;2. <italic>How many of those were diagnosed with or have symptoms of COVID-19?</italic>
</p>
</list-item>
</list>
</p>
<p>We denote the reply to the first question as the <italic>Reach,</italic> <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and the reply to the second question as the <italic>Number of Cases,</italic> <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. In this way, the aggregated value <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mi>i</mml:mi>
</mml:munder>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>/</mml:mo>
<mml:mrow>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mi>i</mml:mi>
</mml:munder>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> provides a rough estimate of the incidence of COVID-19. The simplicity of the survey, together with the increased interest of people in the initial stages of the pandemic, led to successful initial survey deployments (e.g., 200 responses per week in Spain, 800 responses in the first day in Cyprus, and more than 1,000 in Ukraine). Despite their simplicity, these two questions were sufficient for producing rough preliminary estimates of the cumulative incidence of COVID-19 in several countries, in a period in which testing was scarce.</p>
<p>As CoronaSurveys expanded its reach, additional questions were introduced to improve granularity and estimate more parameters of the pandemic (like fatalities), while maintaining the survey completion time at around 1&#xa0;min. Currently, the survey also includes the following questions:<list list-type="simple">
<list-item>
<p>&#x2003;3. Of the people with symptoms, how many are still sick?</p>
</list-item>
<list-item>
<p>&#x2003;4. How many started with symptoms in the latest 7&#xa0;days?</p>
</list-item>
<list-item>
<p>&#x2003;5. How many passed away?</p>
</list-item>
</list>
</p>
<p>By including these additional questions, we are able to track the number of active cases (Question 3), new cases (Question 4), and the cumulative number of fatalities (Question&#x20;5).</p>
</sec>
<sec id="s2-2">
<title>2.2 Data Aggregation</title>
<p>The back-end data collection engine was designed to provide seamless aggregation of the data in a consistent and structured format. Timeliness, consistency, and proper dissemination of the data were the three main pillars of the aggregation process. CoronaSurveys updates its estimates daily to provide a comparison with the estimates of officially confirmed cases, which are also updated once per day. This daily aggregation also serves as a privacy preserving measure, as we discuss in the next section.</p>
<p>During aggregation, survey responses are classified by country and stored in individual files named as <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
<italic>-aggregate.csv</italic>, where <inline-formula id="inf5">
<mml:math id="m5">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the two letter ISO code of the country. Each row in the file corresponds to a single response and is composed of the elements that appear in <xref ref-type="table" rid="T1">Table&#x20;1</xref>: the date of the response, the country for which the response reports, the country ISO code, the region in the country for which the response reports (if any), the region ISO code, the language used to fill the survey, the answers to the survey questions (<inline-formula id="inf6">
<mml:math id="m6">
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mi>a</mml:mi>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mi>V</mml:mi>
<mml:mi>a</mml:mi>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>), a cookie that anonymously identifies a participant, and a campaign field that can be used to identify responses that correspond to specific survey dissemination campaigns. The aggregated data is then provided to the estimation engine and published in an online public repository (<xref ref-type="bibr" rid="B6">GCGImdea/coronasurveys, 2020</xref>).</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Aggregation row format.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="left"/>
<th align="left"/>
<th align="left"/>
<th align="left"/>
<th align="left"/>
<th align="left"/>
<th align="left"/>
<th align="left"/>
<th align="left"/>
<th align="left"/>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Date</td>
<td align="center">Country</td>
<td align="center">C-ISO</td>
<td align="center">Region</td>
<td align="center">R-ISO</td>
<td align="center">Lang</td>
<td align="center">
<inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mi>a</mml:mi>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">&#x2026;</td>
<td align="center">
<inline-formula id="inf8">
<mml:math id="m8">
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mi>a</mml:mi>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">Cookie</td>
<td align="center">Campaign</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2-3">
<title>2.3 User Privacy</title>
<p>Ensuring anonymity and privacy is important to minimize reservations from participants on filling the survey. Ideally, we would like to acquire as much relevant data as possible (e.g., geolocation), but this is orthogonal to anonymity and is likely to lead to less responses. CoronaSurveys implements four anonymity strategies:</p>
<sec id="s2-3-1">
<title>2.3.1 Avoid Third Party Tracking</title>
<p>One of the initial concerns was to eliminate the possibility of a third party to collect data from participants. Although first deployed in Google Forms, we quickly moved the surveys to a self-hosted instance of the open-source tool Limesurvey (<xref ref-type="bibr" rid="B9">LimeSurvey Project Team/Carsten Schmitz, 2012</xref>) to minimize this&#x20;risk.</p>
</sec>
<sec id="s2-3-2">
<title>2.3.2 Avoid Revealing User Identity</title>
<p>CoronaSurveys does not ask any personal questions, and only collects data about the contacts of the participant. The data collected from each participant is limited to the day in which the survey was completed, the geographical region for which the user wishes to provide information, and the replies to the aforementioned questions.</p>
</sec>
<sec id="s2-3-3">
<title>2.3.3 Secure User Identification</title>
<p>Identifying users who return to the CoronaSurveys system, while preserving their anonymity, is necessary to prevent malicious and repetitive responses that can skew our input data. Given our goal to avoid storing personal information, creating personal accounts was not possible, and instead we decided to create a random cookie at the participants browser, or device, to provide an identification for the user and stored it along with the time the survey was last filled in. The cookie is stored in an encrypted form. This cookie can help us detect some duplicate responses and some malicious attacks (anonymous duplication), but does not ensure security. For example, a user could submit its responses from multiple devices, and each would be associated with a different cookie. To remove further malicious responses, we implement outlier detection algorithms described in <xref ref-type="sec" rid="s3">Section&#x20;3</xref>.</p>
</sec>
<sec id="s2-3-4">
<title>2.3.4 Protecting User Identity</title>
<p>Tracking the time when a user submits a response may allow an adversary to recover their true identity. For this reason, we 1) do not include the time of the day in the aggregated and published data, and 2) shuffle the responses of a single day preventing an adversary from extracting the order in which responses were received.</p>
</sec>
</sec>
</sec>
<sec id="s3">
<title>3 Data Analysis</title>
<p>Based on the aggregated, anonymous data, CoronaSurveys employs several methods to produce estimates of the number of COVID-19 cases in all geographical areas for which sufficient data are available, comparing these estimates with those provided by the official authorities. The estimation methods are:<list list-type="simple">
<list-item>
<p>&#x2022; <bold>cCFR-based:</bold> This method is based on estimating the corrected case fatality ratio (cCFR), from the official numbers of cumulative cases and fatalities, and taking an estimation of the approximate number of cases with known outcomes into consideration. It is also assumed that a reliable value of the traditional case fatality ratio (<inline-formula id="inf9">
<mml:math id="m9">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>F</mml:mi>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mtext>&#x2a;</mml:mtext>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>) is available (We use <inline-formula id="inf10">
<mml:math id="m10">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>F</mml:mi>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mtext>&#x2a;</mml:mtext>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1.38</mml:mn>
<mml:mtext>%</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> with a <inline-formula id="inf11">
<mml:math id="m11">
<mml:mrow>
<mml:mn>95</mml:mn>
<mml:mtext>%</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> confidence interval of <inline-formula id="inf12">
<mml:math id="m12">
<mml:mrow>
<mml:mn>1.23</mml:mn>
<mml:mtext>%</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf13">
<mml:math id="m13">
<mml:mrow>
<mml:mn>1.53</mml:mn>
<mml:mtext>%</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>, as described in (<xref ref-type="bibr" rid="B15">Verity et&#x20;al., 2020</xref>).) Then, the number of cases is estimated by multiplying the official figure of cumulative cases in a region <italic>D</italic> by the ratio <inline-formula id="inf14">
<mml:math id="m14">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>F</mml:mi>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>D</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>/</mml:mo>
<mml:mi>C</mml:mi>
<mml:mi>F</mml:mi>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mtext>&#x2a;</mml:mtext>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf15">
<mml:math id="m15">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>F</mml:mi>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>D</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the cCFR estimated for&#x20;<italic>D.</italic>
</p>
</list-item>
<list-item>
<p>&#x2022; <bold>cCFR-fatalities:</bold> This method divides the official number of fatalities on a given day&#xa0;<italic>d</italic> by <inline-formula id="inf16">
<mml:math id="m16">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>F</mml:mi>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mtext>&#x2a;</mml:mtext>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, and assigns the resulting number of cases to day <inline-formula id="inf17">
<mml:math id="m17">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (<italic>P</italic> is the median number of days from symptom onset to death). We use <inline-formula id="inf18">
<mml:math id="m18">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>13</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, following the values reported by the Centers for disease Control and Prevention (<xref ref-type="bibr" rid="B2">Centers for Disease Control and Prevention, 2021a</xref>).</p>
</list-item>
<list-item>
<p>&#x2022; <bold>UMD-Symptom-Survey:</bold> This method uses the responses to direct questions about symptoms from the University of Maryland COVID-19 World Survey (<xref ref-type="bibr" rid="B4">Fan et&#x20;al., 2020</xref>) to estimate active cases. In particular, it counts the number of responses that declare fever, and cough or difficulty breathing. This survey collects more than <inline-formula id="inf19">
<mml:math id="m19">
<mml:mrow>
<mml:mn>100,000</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> individual responses&#x20;daily.</p>
</list-item>
<list-item>
<p>&#x2022; <bold>UMD-Symptom-Survey-Indirect:</bold> This method estimates active cases applying the NSUM method to the responses of an indirect question from the University of Maryland COVID-19 World Survey (<xref ref-type="bibr" rid="B4">Fan et&#x20;al., 2020</xref>). In this estimation method the <italic>Reach</italic> is obtained from the CoronaSurveys data, while the <italic>Number of Cases</italic> are the cases reported by answering YES to the question 1) &#x201c;Do you personally know anyone in your local community who is sick with a fever and either a cough or difficulty breathing?&#x201d; and answering the question 2) &#x201c;How many people do you know with these symptoms?&#x201d;</p>
</list-item>
<list-item>
<p>&#x2022; <bold>300Responses:</bold> This method uses a weighted average of 300 filtered CoronaSurveys responses for a given geographical area. Filtering consists in discarding answers that report an unusually large reach (entries larger than 1.5&#x20;times the interquartile range above the upper quartile) or an unusually large number of cases (over <inline-formula id="inf20">
<mml:math id="m20">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>/</mml:mo>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> of cases in the reach).</p>
</list-item>
<list-item>
<p>&#x2022; <bold>Estimates-W:</bold> This method uses a weighted average of CoronaSurveys responses from the last <italic>W</italic> days, using the same filtering criteria as <italic>300Responses.</italic>
</p>
</list-item>
</list>
</p>
<p>Cookies allow us to make sure we only count the latest answer for each respondent in each aggregated batch (set of 300 responses for estimates-300, or last W days for estimates-W).</p>
<p>The estimates obtained with the above methods are stored in the online public repository. Each method <italic>M</italic> stores the estimates in a folder named <italic>estimates-M/PlotData</italic>, and the estimates for each country <inline-formula id="inf21">
<mml:math id="m21">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is stored in the file <inline-formula id="inf22">
<mml:math id="m22">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
<italic>-estimate.csv</italic> in the format shown in <xref ref-type="table" rid="T2">Table&#x20;2</xref>.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Estimates row format.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="left"/>
<th align="left"/>
<th align="left"/>
<th align="left"/>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Date</td>
<td align="center">Population</td>
<td align="center">
<inline-formula id="inf23">
<mml:math id="m23">
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>s</mml:mi>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">&#x2026;</td>
<td align="center">
<inline-formula id="inf24">
<mml:math id="m24">
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>s</mml:mi>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Once in the predefined format, the estimates are imported in a time-series database, from which we generate visualizations. Time-series databases are often used to store streaming data organized in <italic>measurements</italic>. For CoronaSurveys, each series of estimates obtained with a given method is one such measurement, while the date, the country, the region, and the population, are characteristics of the measurements, facilitating the localization of the estimates.</p>
</sec>
<sec id="s4">
<title>4 Data Visualization</title>
<p>Finally, converting the computed data to meaningful visualizations is essential to observe trends, insights, and behaviors from our data, as well as to communicate our outcomes to a wider audience. Our visualization engine employs the Grafana (<xref ref-type="bibr" rid="B7">Grafana Labs, 2018</xref>) framework, which enables the creation of interactive plots of various types. We can group our plots into three categories, based on the information they provide:<list list-type="simple">
<list-item>
<p>&#x2022; CoronaSurveys participation statistics</p>
</list-item>
<list-item>
<p>&#x2022; Global-scale visualizations</p>
</list-item>
<list-item>
<p>&#x2022; Local-scale visualizations</p>
</list-item>
</list>
</p>
<p>To better map the effects of the pandemic and in order to capture a holistic view of its impact, we present the computed estimates in both global, and countrywide (local) visualisations. Global visualisations intend to expose the distribution of the pandemic around the globe, and identify areas with higher infection rates. Countrywide visualisations aim to pinpoint the estimated magnitude of the problem compared to officially reported&#x20;cases.</p>
<sec id="s4-1">
<title>4.1 Coronasurveys Participation Statistics</title>
<p>
<xref ref-type="fig" rid="F3">Figure&#x20;3</xref> depicts the statistics of CoronaSurveys participation. In just over 12&#xa0;months, CoronaSurveys has collected data from roughly <inline-formula id="inf25">
<mml:math id="m25">
<mml:mrow>
<mml:mn>25,000</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> participants worldwide, with Spain being the country with the most responses. This means that the absolute reach in Spain is significantly higher. However, the country with largest relative reach with respect to the population is Cyprus, with almost <inline-formula id="inf26">
<mml:math id="m26">
<mml:mrow>
<mml:mn>1,300</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> responses, an absolute reach of more than <inline-formula id="inf27">
<mml:math id="m27">
<mml:mrow>
<mml:mn>30,000</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, and a population of roughly 1 million. This figure also reflects the success of indirect reporting: with this method, we obtain the information of more than 50&#x20;times the number of survey responses, more than a million persons in&#x20;total.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Coronasurveys statistics of participation and&#x20;reach.</p>
</caption>
<graphic xlink:href="fcomp-03-641237-g003.tif"/>
</fig>
</sec>
<sec id="s4-2">
<title>4.2&#x20;Global-Scale Visualizations</title>
<p>Our goal for the global visualisations is twofold: 1) to provide a snapshot of the pandemic based on the latest computed estimates and 2) to provide a comparative plot exposing the progress of the virus in multiple countries.</p>
<p>A map is one of the most intuitive ways to present an instance of the data on a global scale. Therefore, <xref ref-type="fig" rid="F4">Figure&#x20;4</xref> presents a map visualization that includes the estimates of the percentage of cumulative cases (infected) per country based on the cCFR algorithm (ccfr-based). Bubble points can capture the magnitude of a value by adjusting their color based on a predefined color scale, and their radius relative to the maximum and minimum values on the map. On the top left of the figure there are visible drop-down menus to select other estimators and metrics.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Cumulative number of cases, estimated with the cCFR-based method, around the globe. A larger radius means a higher percentage.</p>
</caption>
<graphic xlink:href="fcomp-03-641237-g004.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="F5">Figure&#x20;5</xref> provides a comparison of the countries most affected by the pandemic. This plot also presents the estimates, based on the <italic>Estimates-W</italic> algorithm. We show in this figure only the lines for United&#x20;Kingdom, Brazil, Portugal, France and Chile are shown for clarity (Lines can be shown or hidden individually in the website plot.)</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Global estimates of cumulative number of cases obtained with method <italic>Estimates-W</italic>. Only the lines for United&#x20;Kingdom, Brazil, Portugal, France and Chile are shown for clarity.</p>
</caption>
<graphic xlink:href="fcomp-03-641237-g005.tif"/>
</fig>
</sec>
<sec id="s4-3">
<title>4.3&#x20;Local-Scale Visualizations</title>
<p>For local-scale visualization, we display the evolution in the number of active cases, new daily cases, and contagious cases (see <xref ref-type="fig" rid="F6">Figure&#x20;6</xref>), estimated with some of the methods described above. To estimate the number of active and contagious cases when only daily cases are available (e.g., from confirmed data) we assume that cases are active and contagious for 18 and 12&#xa0;days, respectively, (<xref ref-type="bibr" rid="B2">Centers for Disease Control and Prevention, 2021a</xref>; <xref ref-type="bibr" rid="B3">Centers for Disease Control and Prevention, 2021b</xref>). Observe in <xref ref-type="fig" rid="F6">Figure&#x20;6</xref> that the ratios of active cases estimated on the last day (April 26th, 2021) with the responses to direct symptom questions (blue line, <inline-formula id="inf28">
<mml:math id="m28">
<mml:mrow>
<mml:mn>4.31</mml:mn>
<mml:mtext>%</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>) and to the indirect questions using NSUM (purple line, <inline-formula id="inf29">
<mml:math id="m29">
<mml:mrow>
<mml:mn>2.87</mml:mn>
<mml:mtext>%</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>) are one order of magnitude larger than those obtained with the official number of cases (<inline-formula id="inf30">
<mml:math id="m30">
<mml:mrow>
<mml:mn>0.33</mml:mn>
<mml:mtext>%</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>) and the official number of fatalities (<inline-formula id="inf31">
<mml:math id="m31">
<mml:mrow>
<mml:mn>0.31</mml:mn>
<mml:mtext>%</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>) (The reason for the difference between the blue and the purple lines is currently under evaluation.)</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Estimates of number of cases in India. The estimates of active cases obtained with data from the University of Maryland COVID-19 World Survey (<xref ref-type="bibr" rid="B4">Fan et&#x20;al., 2020</xref>) are one order of magnitude higher than those obtained from official&#x20;data.</p>
</caption>
<graphic xlink:href="fcomp-03-641237-g006.tif"/>
</fig>
<p>To illustrate the estimates obtained from the survey we use Portugal, a country for which we obtained a good number of replies (see <xref ref-type="fig" rid="F7">Figure&#x20;7</xref>). Observe the increase in the number of replies in February 2021, when a paid campaign in Facebook Ads was deployed in Portugal. Country-level plots present a comparison of the different estimation methods for cumulative number of cases, including the report of the official authorities. <xref ref-type="fig" rid="F8">Figure&#x20;8</xref> presents the cumulative number of cases estimates in CoronaSurveys for Portugal. The thin green line is the number of cases reported by the official authorities, while the remaining curves present the estimates obtained with <italic>cCFR-based</italic>, <italic>300Responses</italic>, and <italic>estimates-W</italic>. As can be seen, all curves have similar trends, but <italic>cCFR-based 300Responses</italic>, and <italic>estimates-W</italic> have sensibly larger values than the official&#x20;data.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Survey responses in Portugal. Observe the increase of participation obtained in February 2021 with a paid campaign in Facebook Ads.</p>
</caption>
<graphic xlink:href="fcomp-03-641237-g007.tif"/>
</fig>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Estimates for Portugal.</p>
</caption>
<graphic xlink:href="fcomp-03-641237-g008.tif"/>
</fig>
</sec>
</sec>
<sec id="s5">
<title>5 Results</title>
<p>To test the feasibility of using CoronaSurveys to provide accurate estimates of the number of cases, we conducted a comparison between our estimates and the results of massive serology testing in Spain, a study conducted by Pollan et&#x20;al. (<xref ref-type="bibr" rid="B12">Poll&#xe1;n et&#x20;al., 2020</xref>). In this study (<xref ref-type="bibr" rid="B5">Garc&#xed;a-Agundez et&#x20;al., 2021</xref>), we calculated the correlation between our estimates and the serology results across all regions (autonomous communities) of Spain in the timeframe of the serology study. The serology study recruited <inline-formula id="inf32">
<mml:math id="m32">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>61075</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> participants, which represents <inline-formula id="inf33">
<mml:math id="m33">
<mml:mrow>
<mml:mn>0.1787</mml:mn>
<mml:mtext>%</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mo>&#xb1;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mn>0.0984</mml:mn>
<mml:mtext>%</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> of the regional population. In contrast, CoronaSurveys data provides information on <inline-formula id="inf34">
<mml:math id="m34">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>67199</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> people through indirect reporting, or <inline-formula id="inf35">
<mml:math id="m35">
<mml:mrow>
<mml:mn>0.1827</mml:mn>
<mml:mtext>%</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mo>&#xb1;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mn>0.0701</mml:mn>
<mml:mtext>%</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> of the regional population.</p>
<p>This resulted in a Pearson R squared correlation of 0.89. In addition, we observed that CoronaSurveys systematically underestimates the number of cases by a factor of 46%, possibly due to asymptomatic cases. This ratio is consistent with other survey implementations that used direct reporting instead (<xref ref-type="bibr" rid="B11">Oliver et&#x20;al., 2020</xref>).</p>
<p>Although further comparisons in other countries are necessary once we have sufficient data and similar serology studies are available, we believe this strongly supports the use of open surveys as an additional source of information to track the progress of pandemics.</p>
</sec>
<sec id="s6">
<title>6 Conclusion</title>
<p>In this article, we present the system architecture and estimation methods of CoronaSurveys, which uses open surveys to monitor the progress of the COVID-19 pandemic. Our graphical estimations require large amounts of data from active participants, but provide insightful depictions of the progress of the pandemic in different regions, offering an estimation of the cumulative and active number of cases in different geographical&#x20;areas.</p>
<p>The most important challenge and limitation of CoronaSurveys is the number of survey responses. In this sense, the dissemination of our graphical estimations is important to maximize user engagement and retention. For this reason, in the future we aim to include a forecast of the number of cases and fatalities based on recent data for different geographical areas, in order to empower the dissemination of our graphical visualizations and with it increase user recruitment.</p>
<p>In addition, our outlier detection methods are heuristic and could, in the future, be improved to be more resilient to malicious responses. CoronaSurveys is a work in progress, and features such as the number of responses per day could be implemented to detect certain types of malicious attacks which open online surveys may be subjected&#x20;to.</p>
<p>Our first evaluation, comparing the results of CoronaSurveys with a serology study in Spain provided excellent results, supporting open surveys and indirect reporting as potential sources of information to track pandemics, although further comparisons in different regions are required. An interesting topic of discussion would be the minimum number of responses required to provide reasonably accurate estimates, as increasing number of replies will balance out individual inaccuracies of over- or underestimation and improve the functionality of our outlier detection methods, following the &#x201c;wisdom of the crowd&#x201d; phenomenon. Naturally, the minimum number of responses will depend on factors such as population dispersion and cultural differences on behavior, but our initial estimate is that by indirectly providing information for a percentage of the population similar to that of a massive serology study, we can already provide valuable estimates.</p>
<p>In conclusion, massive serology testing is ultimately the standard to accurately estimate the prevalence of COVID-19 in a region. However, this has its limitations, since it requires time until deployment, involves massive resources, and is unfeasible in some scenarios and countries. As an example, in the current outbreak in India as of April 2021, the level of underreporting is likely to be very high (<xref ref-type="bibr" rid="B8">Institute for Health Metr, 2021</xref>), which matches what is observed in <xref ref-type="fig" rid="F6">Figure&#x20;6</xref>. In these scenarios, we believe indirect reporting can provide a viable alternative to obtain early approximations of prevalence. Although CoronaSurveys is a work in progress and much fine tuning is still required, we believe it provides a proof of concept of indirect reporting, as well as early results on its feasibility.</p>
</sec>
</body>
<back>
<sec id="s7">
<title>Data Availability Statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found below: <ext-link ext-link-type="uri" xlink:href="https://github.com/GCGImdea/coronasurveys/">https://github.com/GCGImdea/coronasurveys/</ext-link>.</p>
</sec>
<sec id="s8">
<title>Ethics Statement</title>
<p>The studies involving human participants were reviewed and approved by the Ethics Committee of IMDEA Networks Institute. The patients/participants provided their written informed consent to participate in this study.</p>
</sec>
<sec id="s9">
<title>Author Contributions</title>
<p>All authors listed have made a substantial, direct, and intellectual contribution to the work and approved it for publication.</p>
</sec>
<sec id="s10">
<title>Funding</title>
<p>Partially supported by grant SOLID from Fundaci&#xf3;n IMDEA Networks, and CoronaSurveys-CM from IMDEA Networks Institute and Comunidad de Madrid.</p>
</sec>
<sec sec-type="COI-statement" id="s11">
<title>Conflict of Interest</title>
<p>MG was employed by Consulting. NN and ES were employed by Algolysis Ltd. JR was employed by Skyhaven Media. IS was employed by InqBarna.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<fn-group>
<fn id="fn1">
<label>1</label>
<p>Accessible at <ext-link ext-link-type="uri" xlink:href="https://www.coronasurveys.org">https://www.coronasurveys.org</ext-link>
</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bernard</surname>
<given-names>H. R.</given-names>
</name>
<name>
<surname>Hallett</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Iovita</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Johnsen</surname>
<given-names>E. C.</given-names>
</name>
<name>
<surname>Lyerla</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>McCarty</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2010</year>). <article-title>Counting Hard-To-Count Populations: the Network Scale-Up Method for Public Health</article-title>. <source>Sex. Transm. infections</source> <volume>86</volume> (<issue>Suppl. 2</issue>), <fpage>ii11</fpage>&#x2013;<lpage>ii15</lpage>. <pub-id pub-id-type="doi">10.1136/sti.2010.044446</pub-id> </citation>
</ref>
<ref id="B2">
<citation citation-type="web">
<collab>Centers for Disease Control and Prevention</collab> (<year>2021a</year>). <article-title>Covid-19 Pandemic Planning Scenarios</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/coronavirus/2019-ncov/hcp/planning-scenarios.html">https://www.cdc.gov/coronavirus/2019-ncov/hcp/planning-scenarios.html</ext-link>
</comment> (<comment>Accessed December 12, 2020</comment>). </citation>
</ref>
<ref id="B3">
<citation citation-type="web">
<collab>Centers for Disease Control and Prevention</collab> (<year>2021b</year>). <article-title>Clinical Questions about Covid-19: Questions and Answers</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/coronavirus/2019-ncov/hcp/faq.html">https://www.cdc.gov/coronavirus/2019-ncov/hcp/faq.html</ext-link>
</comment> (<comment>Accessed 05&#x20;09, 2021</comment>). </citation>
</ref>
<ref id="B4">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Fan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Stewart</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kommareddy</surname>
<given-names>A. R.</given-names>
</name>
<name>
<surname>Bradford</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Chiu</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Covid-19 World Symptom Survey Data Api</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://covidmap.umd.edu/api.html">https://covidmap.umd.edu/api.html</ext-link>
</comment> (<comment>Accessed May 28, 2021</comment>). </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Garc&#xed;a-Agundez</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ojo</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Hern&#xe1;ndez-Roig</surname>
<given-names>H. A.</given-names>
</name>
<name>
<surname>Baquero</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Frey</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Georgiou</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Estimating the COVID-19 Prevalence in spain with Indirect Reporting via Open Surveys</article-title>. <source>Front. Public Health</source> <volume>9</volume>. Available at: <ext-link ext-link-type="uri" xlink:href="https://www.medrxiv.org/content/10.1101/2021.01.29.20248125v1">https://www.medrxiv.org/content/10.1101/2021.01.29.20248125v1</ext-link> (<comment>Accessed May 28, 2021</comment>). </citation>
</ref>
<ref id="B6">
<citation citation-type="web">
<collab>GCGImdea/coronasurveys</collab> (<year>2020</year>). <article-title>Coronasurveys Data Repository</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/GCGImdea/coronasurveys">https://github.com/GCGImdea/coronasurveys</ext-link>
</comment> (<comment>Accessed November 5, 2020</comment>). </citation>
</ref>
<ref id="B7">
<citation citation-type="web">
<collab>Grafana Labs</collab> (<year>2018</year>). <article-title>Grafana Documentation</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://grafana.com/docs/">https://grafana.com/docs/</ext-link>
</comment> (<comment>Accessed May 28, 2021</comment>). </citation>
</ref>
<ref id="B8">
<citation citation-type="web">
<collab>Institute for Health Metrics and Evaluation</collab> (<year>2021</year>). <article-title>Covid-19 Results Briefing in india</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="http://www.healthdata.org/sites/default/files/files/Projects/COVID/2021/163_briefing_India_9.pdf">http://www.healthdata.org/sites/default/files/files/Projects/COVID/2021/163_briefing_India_9.pdf</ext-link>
</comment> (<comment>Accessed May 03, 2021</comment>). </citation>
</ref>
<ref id="B9">
<citation citation-type="book">
<collab>LimeSurvey Project Team/Carsten Schmitz</collab> (<year>2012</year>). <source>LimeSurvey: An Open Source Survey Tool</source>. <publisher-loc>Hamburg, Germany</publisher-loc>: <publisher-name>LimeSurvey Project</publisher-name>.</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maxmen</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>How Much Is Coronavirus Spreading under the Radar?</article-title>. <source>Nature</source> <volume>10</volume>. <pub-id pub-id-type="doi">10.1038/d41586-020-00760-8</pub-id>
<comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.nature.com/articles/d41586-020-00760-8">https://www.nature.com/articles/d41586-020-00760-8</ext-link>
</comment> </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Oliver</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Barber</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Roomp</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Roomp</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Assessing the Impact of the Covid-19 Pandemic in spain: Large-Scale, Online, Self-Reported Population Survey</article-title>. <source>J.&#x20;Med. Internet Res.</source> <volume>22</volume> (<issue>9</issue>), <fpage>e21319</fpage>. <pub-id pub-id-type="doi">10.2196/21319</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Poll&#xe1;n</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>P&#xe9;rez-G&#xf3;mez</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Pastor-Barriuso</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Oteo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hern&#xe1;n</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>P&#xe9;rez-Olmeda</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Prevalence of Sars-Cov-2 in spain (Ene-covid): a Nationwide, Population-Based Seroepidemiological Study</article-title>. <source>The Lancet</source> <volume>396</volume> (<issue>10250</issue>), <fpage>535</fpage>&#x2013;<lpage>544</lpage>. <pub-id pub-id-type="doi">10.1016/s0140-6736(20)32266-2</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ruppert</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Gromm&#xe9;</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Upsec-Spilda</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Cakici</surname>
<given-names>B.</given-names>
</name>
</person-group>, (<year>2018</year>). <article-title>Citizen Data and Trust in Official Statistics</article-title>. <source>Economie Statistique/Economics Stat.</source> (<issue>505-506</issue>), <fpage>171</fpage>&#x2013;<lpage>184</lpage>. <pub-id pub-id-type="doi">10.24187/ecostat.2018.505d.1971</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Russell Bernard</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Johnsen</surname>
<given-names>E. C.</given-names>
</name>
<name>
<surname>Killworth</surname>
<given-names>P. D.</given-names>
</name>
<name>
<surname>Robinson</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>1991</year>). <article-title>Estimating the Size of an Average Personal Network and of an Event Subpopulation: Some Empirical Results</article-title>. <source>Soc. Sci. Res.</source> <volume>20</volume> (<issue>2</issue>), <fpage>109</fpage>&#x2013;<lpage>121</lpage>. <pub-id pub-id-type="doi">10.1016/0049-089x(91)90012-r</pub-id> </citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Verity</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Okell</surname>
<given-names>L. C.</given-names>
</name>
<name>
<surname>Dorigatti</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Peter</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Whittaker</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Imai</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Estimates of the Severity of Coronavirus Disease 2019: a Model-Based Analysis</article-title>. <source>Lancet Infect. Dis.</source> <volume>20</volume>, <fpage>669</fpage>&#x2013;<lpage>677</lpage>. <pub-id pub-id-type="doi">10.1016/S1473-3099(20)30243-7</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>A Serological Survey of Antibodies to H5, H7 and H9 Avian Influenza Viruses Amongst the Duck-Related Workers in Beijing, china</article-title>. <source>PLoS One</source> <volume>7</volume> (<issue>11</issue>), <fpage>e50770</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0050770</pub-id>
<comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0050770">https://journals.plos.org/plosone/article?id&#x3d;10.1371/journal.pone.0050770</ext-link>
</comment> </citation>
</ref>
</ref-list>
</back>
</article>