<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<?covid-19-tdm?>
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Sig. Proc.</journal-id>
<journal-title>Frontiers in Signal Processing</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Sig. Proc.</abbrev-journal-title>
<issn pub-type="epub">2673-8198</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">986293</article-id>
<article-id pub-id-type="doi">10.3389/frsip.2022.986293</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Signal Processing</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>COVID-19 respiratory sound analysis and classification using audio textures</article-title>
<alt-title alt-title-type="left-running-head">Silva et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/frsip.2022.986293">10.3389/frsip.2022.986293</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Silva</surname>
<given-names>Leticia</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1796992/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Valad&#xe3;o</surname>
<given-names>Carlos</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lampier</surname>
<given-names>Lucas</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Delisle-Rodr&#xed;guez</surname>
<given-names>Denis</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1255694/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Caldeira</surname>
<given-names>Eliete</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Bastos-Filho</surname>
<given-names>Teodiano</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Krishnan</surname>
<given-names>Sridhar</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1166993/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Electrical Engineering</institution>, <institution>Universidade Federal do Espirito Santo</institution>, <addr-line>Vit&#x00F3;ria</addr-line>, <country>Brazil</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Postgraduate Program in Electrical Engineering</institution>, <institution>Universidade Federal do Espirito Santo</institution>, <addr-line>Vit&#xf3;ria</addr-line>, <country>Brazil</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Signal Analysis Research Group, Department of Electrical, Computer and Biomedical Engineering</institution>, <institution>Toronto Metropolitan University</institution>, <addr-line>Toronto</addr-line>, <addr-line>ON</addr-line>, <country>Canada</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Edmond and Lily Safra International Institute of Neurosciences</institution>, <institution>Santos Dumont Institute</institution>, <addr-line>Macaiba</addr-line>, <country>Brazil</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1666192/overview">Shishir Maheshwari</ext-link>, Thapar Institute of Engineering &#x26; Technology, India</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1450643/overview">Ketan Kotecha</ext-link>, Symbiosis International University, India</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1885533/overview">Priya E</ext-link>., Sri Sairam Engineering College, India</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Leticia Silva, <email>leticia.silva.29@edu.ufes.br</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Biomedical Signal Processing, a section of the journal Frontiers in Signal Processing</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>05</day>
<month>10</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>2</volume>
<elocation-id>986293</elocation-id>
<history>
<date date-type="received">
<day>04</day>
<month>07</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>15</day>
<month>09</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Silva, Valad&#xe3;o, Lampier, Delisle-Rodr&#xed;guez, Caldeira, Bastos-Filho and Krishnan.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Silva, Valad&#xe3;o, Lampier, Delisle-Rodr&#xed;guez, Caldeira, Bastos-Filho and Krishnan</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Since the COVID-19 outbreak, a major scientific effort has been made by researchers and companies worldwide to develop a digital diagnostic tool to screen this disease through some biomedical signals, such as cough, and speech. Joint time&#x2013;frequency feature extraction techniques and machine learning (ML)-based models have been widely explored in respiratory diseases such as influenza, pertussis, and COVID-19 to find biomarkers from human respiratory system-generated acoustic sounds. In recent years, a variety of techniques for discriminating textures and computationally efficient local texture descriptors have been introduced, such as local binary patterns and local ternary patterns, among others. In this work, we propose an audio texture analysis of sounds emitted by subjects in suspicion of COVID-19 infection using time&#x2013;frequency spectrograms. This approach of the feature extraction method has not been widely used for biomedical sounds, particularly for COVID-19 or respiratory diseases. We hypothesize that this textural sound analysis based on local binary patterns and local ternary patterns enables us to obtain a better classification model by discriminating both people with COVID-19 and healthy subjects. Cough, speech, and breath sounds from the INTERSPEECH 2021 ComParE and Cambridge KDD databases have been processed and analyzed to evaluate our proposed feature extraction method with ML techniques in order to distinguish between positive or negative for COVID-19 sounds. The results have been evaluated in terms of an unweighted average recall (UAR). The results show that the proposed method has performed well for cough, speech, and breath sound classification, with a UAR up to 100.00%, 60.67%, and 95.00%, respectively, to infer COVID-19 infection, which serves as an effective tool to perform a preliminary screening of COVID-19.</p>
</abstract>
<kwd-group>
<kwd>COVID-19</kwd>
<kwd>local binary pattern</kwd>
<kwd>local ternary pattern</kwd>
<kwd>speech</kwd>
<kwd>cough</kwd>
<kwd>breath</kwd>
<kwd>machine learning</kwd>
<kwd>spectrogram</kwd>
</kwd-group>
<contract-sponsor id="cn001">Natural Sciences and Engineering Research Council of Canada<named-content content-type="fundref-id">10.13039/501100000038</named-content>
</contract-sponsor>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>COVID-19 (coronavirus disease 2019) is a contagious infectious disease caused by the new SARS-CoV-2 (severe acute respiratory syndrome coronavirus 2) virus, which was declared a global pandemic on 11 February 2020, by the World Health Organization (WHO) (<xref ref-type="bibr" rid="B35">World Health Organization, 2020</xref>). According to information provided by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University (JHU), accessed on 31 July 2022, almost 548 million people have been infected and more than 6.4 million people have died in the last two years. In Brazil, more than 33 million cases have been reported (with more than 678,000 deaths), and in Canada, more than 4 million cases have been reported (and almost 43,000 deaths) (Johns <xref ref-type="bibr" rid="B10">Hopkins University, 2022</xref>).</p>
<p>Clinical manifestations of COVID-19 infection vary from asymptomatic to symptomatic. This infection affects the respiratory system and includes symptoms such as fever, dry cough, dyspnea, headache, sputum production, hemoptysis, myalgia, fatigue, nausea, vomiting, diarrhea, abdominal pain, and loss of smell and taste (<xref ref-type="bibr" rid="B15">Oliveira et al., 2020</xref>; <xref ref-type="bibr" rid="B21">Rai et al., 2021</xref>). About 25% of patients with mild-to-moderate COVID-19 have been found to have dysphonia since multiple body structures are linked to human voice generation, such as the lungs, vocal folds, and laryngeal muscle (<xref ref-type="bibr" rid="B31">Suppakitjanusant et al., 2021</xref>). Thus, respiratory sounds such as cough, speech, and breath may be an important biomarker for COVID-19 diagnosis.</p>
<p>Audio signals generated by body structures and organs have been widely explored for diagnosis and monitoring of diseases by clinicians and clinical researchers (<xref ref-type="bibr" rid="B4">Brown et al., 2020</xref>). In <xref ref-type="bibr" rid="B20">Pramono et al. (2016</xref>), a pertussis identification algorithm is proposed by using cough and whoop sounds. They extracted several features, such as Mel-frequency cepstral coefficients (MFCCs) and the zero crossing rate (ZCR), and further used a logistic regression (LR) model-based classifier. <xref ref-type="bibr" rid="B16">Pahar et al. (2021</xref>) utilized cough sounds produced by patients with tuberculosis and other lung ailments to distinguish both diseases by MFCCs and the ZCR.</p>
<p>In the same direction, various research studies employing respiratory sounds were conducted for COVID-19 screening (<xref ref-type="bibr" rid="B4">Brown et al., 2020</xref>; <xref ref-type="bibr" rid="B5">Casanova et al., 2021</xref>; <xref ref-type="bibr" rid="B23">Schuller et al., 2021</xref>; <xref ref-type="bibr" rid="B33">Verde et al., 2021</xref>; <xref ref-type="bibr" rid="B17">Pahar et al., 2022</xref>; <xref ref-type="bibr" rid="B19">Pleva et al., 2022</xref>; <xref ref-type="bibr" rid="B26">Sharma et al., 2022</xref>; <xref ref-type="bibr" rid="B34">Villa-Parra et al., 2022</xref>). Cough and breathing sounds from COVID-19, asthmatic, and healthy individuals were utilized by <xref ref-type="bibr" rid="B4">Brown et al. (2020</xref>). A total of 733 dimensional features were extracted by several methods, including MFCCs and the ZCR, which were reduced afterward by principal component analysis (PCA) and tested in classifiers such as LR, gradient boosting trees, and support vector machines (SVMs). In <xref ref-type="bibr" rid="B33">Verde et al. (2021</xref>), only voice alterations due to COVID-19 infection were estimated by the main discriminant features used in clinical practice to assess the voice quality and for voice classification, such as MFCCs and spectral roll-off (SR). Experimental research is presented by <xref ref-type="bibr" rid="B17">Pahar et al. (2022</xref>) to detect COVID-19 with cough, breath, and speech sounds by using deep transfer learning and bottleneck features, employing the convolution neural network (CNN), long short-term memory network (LSTM), and ResNet50 architecture for classification. On the other hand, in <xref ref-type="bibr" rid="B26">Sharma et al. (2022</xref>), a model from textural features and the k-nearest neighbor (KNN) classifier was utilized in COVID-19 screening.</p>
<p>In this work, we propose a texture-based method for COVID-19 respiratory sound classification, which captures texture-related information from its spectrogram representation. We hypothesize that using texture-based features may increase class discrimination with low computational complexity. Although texture-based approaches are widely used in image classification, a lack of research using them in biomedical signal processing is observed. It is to be noted that the various aforementioned research studies utilized acoustic (i.e., ZCR) or deep features (i.e., deep transfer learning) (<xref ref-type="bibr" rid="B26">Sharma et al., 2022</xref>). Our study utilized cough, speech, and breath sounds from three different databases to evaluate our proposed methodology. The textural information is captured by employing local binary patterns and local ternary patterns.</p>
<p>This study is structured into four sections as follows. <xref ref-type="sec" rid="s2">Section 2</xref> describes the dataset, followed by the proposed system to detect the presence of SARS-CoV-2 through cough, speech, and breath analysis. Afterward, the results and discussions are presented in <xref ref-type="sec" rid="s3">Section 3</xref> and <xref ref-type="sec" rid="s4">Section 4</xref>, respectively, in which the performance of our approach is analyzed. Finally, the conclusions about the proposed method are given in <xref ref-type="sec" rid="s5">Section 5</xref>.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Materials and methods</title>
<p>As shown in <xref ref-type="fig" rid="F1">Figure 1</xref>, the workflow of our proposed system to infer COVID-19 by cough, speech, and breath sounds is presented. The following sections will detail each step.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Proposed scheme for COVID-19 screening using cough, speech, and breath sounds.</p>
</caption>
<graphic xlink:href="frsip-02-986293-g001.tif"/>
</fig>
<sec id="s2-1">
<title>2.1 Dataset description</title>
<p>The COVID-19 sound dataset from Cambridge University was utilized with mutual agreement for a research purpose. This dataset is approved at Cambridge University, Department of Computer Science and Technology, by following all requisites from the ethics committee. In the following section, the databases are described.</p>
<sec id="s2-1-1">
<title>2.1.1 ComParE 2021 CCS and CSS</title>
<p>In the INTERSPEECH 2021 Computational Paralinguistics Challenge (ComParE), two out of four sub-challenges were used in this work, which are the COVID-19 cough sub-challenge (CCS) and the COVID-19 speech sub-challenge (CSS). For both CCS and CSS, cough sounds and speech recordings with COVID-19 positive/negative audios were used to predict a COVID-19 infection. The &#x201c;COVID-19 Sounds App&#x201d; was used to collect audio data <italic>via</italic> multiple platforms (a webpage, an Android app, and an iOS app). Each participant was requested to provide one to three forced coughs and say <italic>&#x201c;I hope my data can help to manage the virus pandemic&#x201d;</italic> one to three times. Finally, each recording was manually checked, resampled, and converted to 16&#xa0;kHz and mono/16 bit (<xref ref-type="bibr" rid="B23">Schuller et al., 2021</xref>). <xref ref-type="table" rid="T1">Table 1</xref> shows a detailed sample distribution for this dataset. It is to be noted that the number of healthy individuals (labeled as &#x23; negative) is significantly higher than individuals infected with COVID-19, especially for the cough recordings (409 more healthy samples). To compare with the ComParE sub-challenge results, the official challenge partitions for training, validation, and testing were used in our experiments.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Sample distribution of the databases utilized in our experiments with respect to modality, number of COVID-19-positive samples (<italic>&#x23;Positive</italic>), number of COVID-19-negative samples (<italic>&#x23;Negative</italic>), and total number of samples (<italic>&#x23;Positive</italic> &#x2b; <italic>&#x23;Negative</italic>, <italic>&#x23; Total</italic>).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Database</th>
<th align="left">Modality</th>
<th align="left">&#x23;Positive</th>
<th align="left">&#x23;Negative</th>
<th align="left">&#x23;Total</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">CCS</td>
<td align="left">COVID-19 cough</td>
<td align="left">158</td>
<td align="left">567</td>
<td align="left">725</td>
</tr>
<tr>
<td align="left">CSS</td>
<td align="left">COVID-19 speech</td>
<td align="left">308</td>
<td align="left">585</td>
<td align="left">893</td>
</tr>
<tr>
<td rowspan="6" align="left">KDD</td>
<td align="left">COVID-19 cough&#x2a;</td>
<td align="left">46</td>
<td align="left">64</td>
<td align="left">110</td>
</tr>
<tr>
<td align="left">COVID-19 breath&#x2a;</td>
<td align="left">46</td>
<td align="left">64</td>
<td align="left">110</td>
</tr>
<tr>
<td align="left">COVID-19 cough<sup>&#x2020;</sup>
</td>
<td align="left">64</td>
<td align="left">138</td>
<td align="left">202</td>
</tr>
<tr>
<td align="left">COVID-19 breath<sup>&#x2020;</sup>
</td>
<td align="left">64</td>
<td align="left">144</td>
<td align="left">208</td>
</tr>
<tr>
<td align="left">Asthma cough&#x2a;</td>
<td align="left">&#x2014;</td>
<td align="left">104</td>
<td align="left">104</td>
</tr>
<tr>
<td align="left">Asthma breath&#x2a;</td>
<td align="left">&#x2014;</td>
<td align="left">104</td>
<td align="left">104</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>&#x2a; Users who have declared <italic>cough</italic> as a symptom.</p>
</fn>
<fn>
<p>&#x2020; Users who have declared to have a clean medical history, have never smoked, and have no symptoms (<italic>no cough</italic>).</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s2-1-2">
<title>2.1.2 Cambridge KDD</title>
<p>In this crowdsourced dataset, the aforementioned &#x201c;COVID-19 Sounds App&#x201d; was also utilized to record, in addition to cough and speech, breathing sounds. However, as in the study by <xref ref-type="bibr" rid="B4">Brown et al. (2020</xref>), only cough and breathing sounds were used in our study. Five different sets of users were selected from this database: positive COVID-19 with cough as a symptom, positive COVID-19 without cough as a symptom, healthy with cough as a symptom, healthy without cough as a symptom, and asthma with cough as a symptom. Unlike CCS and CSS, the Cambridge KDD database is split between web-based and Android partitions (<xref ref-type="bibr" rid="B4">Brown et al., 2020</xref>). As in the study by <xref ref-type="bibr" rid="B26">Sharma et al. (2022</xref>), in our research, we utilized the subset, which is collected by the Android application only due to the wide prevalence of smartphone users. As for CCS and CSS datasets, the data in Cambridge KDD are unbalanced (see <xref ref-type="table" rid="T1">Table 1</xref>), especially for individuals who declared a clean medical history (nonsmoking and no symptoms).</p>
</sec>
</sec>
<sec id="s2-2">
<title>2.2 Audio image representation</title>
<p>In general, an audio can be represented through a two-dimensional representation by considering the time and amplitude variation. <xref ref-type="fig" rid="F2">Figure 2A</xref> shows an example of cough, speech, and breath sound signals in the time domain, respectively. Audio can also be transformed into a time and frequency representation. The time&#x2013;frequency representation (TFR) of the audio allows us to analyze its embedded data, with spectrograms being the most commonly used (see <xref ref-type="fig" rid="F2">Figure 2B</xref>). A spectrogram represents short time periods of a signal and the power spectrum for different frequency ranges, and it can be visualized through an image for easy interpretation (<xref ref-type="bibr" rid="B26">Sharma et al., 2022</xref>). In our study, we utilized the Mel spectrogram to represent the COVID-19 respiratory sounds (<xref ref-type="bibr" rid="B36">Zhou et al., 2021</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Cough, speech, and breath signal representation. <bold>(A)</bold> shows the signals in the time domain; <bold>(B)</bold> shows the time&#x2013;frequency Mel spectrogram representation.</p>
</caption>
<graphic xlink:href="frsip-02-986293-g002.tif"/>
</fig>
<p>The Mel spectrogram is computed by extracting the coefficients relative to the compositional frequencies with a short-time Fourier transform (STFT). As the human ear does not perceive frequencies on a linear scale (lower frequencies are better to discriminate than higher frequencies), the main idea of the Mel scale is to mimic the non-linear human ear perception (<xref ref-type="bibr" rid="B13">Nanni et al., 2021</xref>; <xref ref-type="bibr" rid="B36">Zhou et al., 2021</xref>). Each frame of the spectrum is passed through a Mel filter bank, and the conversion between Hertz (<italic>f</italic>) and Mel (<italic>m</italic>) can be calculated using <xref ref-type="disp-formula" rid="e1">Eq. 1</xref>.<disp-formula id="e1">
<mml:math id="m1">
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>2595</mml:mn>
<mml:mspace width="0.17em"/>
<mml:msub>
<mml:mrow>
<mml:mi>log</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>10</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>700</mml:mn>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
</sec>
<sec id="s2-3">
<title>2.3 Audio texture feature extraction</title>
<p>In order to use machine learning techniques for training models to classify cough from COVID-19 and healthy individuals, it is necessary to extract features from the Mel spectrograms. Recently, various research studies using audio textures have shown good results for pathological speech screening (<xref ref-type="bibr" rid="B25">Sharma et al., 2020</xref>) and analysis (<xref ref-type="bibr" rid="B27">Sharma et al., 2021</xref>), COVID-19 respiratory sound analysis (<xref ref-type="bibr" rid="B26">Sharma et al., 2022</xref>), and lung sound classification (<xref ref-type="bibr" rid="B24">Sengupta et al., 2017</xref>). Texture analysis is important in applications, such as face and pattern recognition (<xref ref-type="bibr" rid="B25">Sharma et al., 2020</xref>). In the proposed study, we extract two common texture features from the Mel spectrogram images, local binary patterns, and local ternary patterns, which will be described in detail in the following subsections.</p>
<sec id="s2-3-1">
<title>2.3.1 Local binary pattern</title>
<p>The local binary pattern (LBP) is widely used in image processing for texture analysis because it is a simple and efficient operator to measure the spatial representation of local image texture and gray scale contrast. For instance, it has been used in lung sound classification (<xref ref-type="bibr" rid="B24">Sengupta et al., 2017</xref>), pathological speech screening (<xref ref-type="bibr" rid="B25">Sharma et al., 2020</xref>) and analysis (<xref ref-type="bibr" rid="B27">Sharma et al., 2021</xref>), COVID-19 screening (<xref ref-type="bibr" rid="B26">Sharma et al., 2022</xref>), scene classification (<xref ref-type="bibr" rid="B1">Abidin et al., 2018</xref>), and snore discrimination (<xref ref-type="bibr" rid="B7">Demir et al., 2018</xref>). The original LBP, termed &#x201c;uniform,&#x201d; compares each pixel of a neighborhood (the original LBP considers a region of 3 &#xd7; 3) to the center pixel value. Negative results are encoded with 0 and the other with 1, and its decimal correspondent number is used (<xref ref-type="bibr" rid="B14">Ojala et al., 2002</xref>). <xref ref-type="fig" rid="F3">Figure 3</xref> shows an example of LBP. After extracting the Mel spectrogram, the image representation was converted from RGB to the gray scale. Thereafter, the LBP features were extracted from the gray scale image. We selected a radius of 3 units and eight sampling points as proposed by <xref ref-type="bibr" rid="B26">Sharma et al. (2022</xref>) and normalized it by the <italic>L</italic>2 norm to make features invariant to rotations, resulting in 59 dimensional LBP features.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Example of LBP and LTP encoding from the Mel spectrogram.</p>
</caption>
<graphic xlink:href="frsip-02-986293-g003.tif"/>
</fig>
</sec>
<sec id="s2-3-2">
<title>2.3.2 Local ternary pattern</title>
<p>The local ternary pattern is an extension of the LBP local texture descriptor, which is more discriminant and less sensitive to noise in uniform regions. It has been used for, e.g., speech emotion recognition (<xref ref-type="bibr" rid="B29">S&#xf6;nmez and Varol, 2020</xref>), fall detection (<xref ref-type="bibr" rid="B2">Adnan et al., 2018</xref>), and heart sound classification (<xref ref-type="bibr" rid="B8">Er, 2021</xref>). Instead of representing values through thresholds such as 0 and 1 as in the case of LBP, LTP uses a constant threshold to represent (or convert) pixels into three values: &#x2212; 1, 0, and 1. In order to reduce the feature vector dimension and computational time, each ternary pattern is split into two parts, the upper pattern and the lower pattern, as shown in <xref ref-type="fig" rid="F3">Figure 3</xref>. The first one is the positive part, whereas the second is negative (<xref ref-type="bibr" rid="B32">Tan and Triggs, 2010</xref>). Similar to LBP, the LTP features were computed over Mel spectrogram gray-scale images, with a radius of 3 units; eight sampling points were selected, and 512 dimensional LTP features were utilized.</p>
</sec>
</sec>
<sec id="s2-4">
<title>2.4 Experimental tests</title>
<p>Based on the aforementioned databases, our study focused on different classification tasks to infer COVID-19 contamination, which is described as follows.</p>
<sec id="s2-4-1">
<title>2.4.1 CCS and CSS classification tasks</title>
<p>
<list list-type="simple">
<list-item>
<p>&#x2022; C1: distinguish through cough sounds, users who have declared they tested positive for COVID-19 from users who have not declared a positive test for COVID-19.</p>
</list-item>
<list-item>
<p>&#x2022; C2: distinguish through speech sounds, users who have declared they tested positive for COVID-19 from users who have not declared a positive test for COVID-19.</p>
</list-item>
<list-item>
<p>&#x2022; C3: distinguish through cough and speech sounds, users who have declared they tested positive for COVID-19 from users who have not declared a positive test for COVID-19.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s2-4-2">
<title>2.4.2 Cambridge KDD classification tasks</title>
<p>
<list list-type="simple">
<list-item>
<p>&#x2022; K1: distinguish by employing cough sounds, users who have declared they tested positive for COVID-19 (COVID-positive) from users who have not declared a positive test for COVID-19 (non-COVID) and have a clean medical history (nonsmoking and no symptoms.)</p>
</list-item>
<list-item>
<p>&#x2022; K2: distinguish by employing cough sounds, users who have declared they tested positive for COVID-19 and also have declared cough as a symptom (COVID-positive with cough) from users who have declared not to have tested positive for COVID-19 and further have cough as a symptom (non-COVID with cough).</p>
</list-item>
<list-item>
<p>&#x2022; K3: distinguish by employing breath sounds, COVID-positive users from non-COVID users.</p>
</list-item>
<list-item>
<p>&#x2022; K4: distinguish by employing breath sounds, COVID-positive users with cough from non-COVID users with cough.</p>
</list-item>
<list-item>
<p>&#x2022; K5: distinguish by employing cough and breath sounds, COVID-positive users with cough from non-COVID users with cough.</p>
</list-item>
<list-item>
<p>&#x2022; K6: distinguish by employing cough sounds, COVID-positive users with cough from users who have declared not to have tested positive for COVID-19, have reported asthma and cough as a symptom.</p>
</list-item>
</list>
</p>
</sec>
</sec>
<sec id="s2-5">
<title>2.5 Classification and evaluation</title>
<p>After applying the feature extraction methods to cough, speech, and breath data, the final feature vector is utilized as input for a classifier. For both CCS and CSS databases, the original partitions were maintained: 286 samples for training, 231 for validation, and 208 for testing. On the other side, the Cambridge KDD dataset was split into 80% for training and 20% for testing. In both databases, a 10-fold cross-trial validation was utilized to avoid overfitting and underfitting. For both datasets, the support vector machine (SVM) was explored to perform binary classification. As the datasets are highly unbalanced, we utilized the unweighted average recall (UAR) for comparison.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>3 Results</title>
<p>As mentioned previously, two feature extraction methods were evaluated. <xref ref-type="fig" rid="F4">Figure 4</xref> shows the results of all nine experiments for both CCS and CSS and the Cambridge KDD database for cough, speech, and breath classification with the SVM as a classifier for COVID-19 screening.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Respiratory sound classification performance for various modalities (cough, speech, and breath) in terms of the UAR and F1-score. For each experiment (C1&#x2013;C3 and K1&#x2013;K6), two feature extraction methods [the local binary pattern (LBP) and local ternary pattern (LTP)] were evaluated.</p>
</caption>
<graphic xlink:href="frsip-02-986293-g004.tif"/>
</fig>
<p>In the case of CCS and CSS databases, the main objective of both C1 and C2 experiments is to perform a binary classification between COVID-19 positive and negative patients by using cough and breath signals, respectively. Although LBP and LTP-based features performed the same result for the validation set (UAR &#x3d; 80.00%), employing cough sounds (C1), LTP-based features achieved better results for the test set (UAR &#x3d; 74.16%). Regarding C2, where speech samples were evaluated, LBP-based features achieved better results than LTP in both validation (UAR &#x3d; 74.24%) and test (UAR &#x3d; 58.52%) sets. In addition to the original challenges, one more experiment with CCS and CSS datasets was also carried out to investigate if the combination of cough and speech sounds could improve the distinction between COVID-19 and healthy individuals. In this experiment (C3), the results obtained in the test set for both LBP and LTP-based features are close. However, they have a significant difference in the validation set for both the UAR and F1-score.</p>
<p>For the Cambridge KDD dataset, six experiments were conducted (K1 to K6) with cough and/or breath sounds. As previously mentioned, the Cambridge KDD dataset has cough and breath audio samples from individuals who reported cough as a symptom or not. For almost all experiments (except K2 and K6), the LBP-based features achieved better or equal results to LTP in both validation and test sets. Also, to perform a binary classification, we conducted experiments (K1&#x2013;K4) to investigate the impact of COVID-19 screening in patients with cough as a symptom from those with a clean medical history (nonsmoking and no COVID-19 symptoms). Experiment K1 demonstrated good classification by using our approach in COVID-positive and non-COVID individuals (UAR &#x3d; 88.69%). As expected, in experiment K2, a UAR of 100.00% was achieved using LTP-based features in COVID-positive patients with cough and non-COVID individuals with cough. On the other hand, experiments K3 and K4 demonstrated that cough as a symptom is not a major factor in inferring COVID-19, with a UAR of 94.11% for COVID-positive patients and non-COVID individuals, and 95.00% for COVID-positive patients with cough and non-COVID patients with cough since they obtained good performance using breath sounds.</p>
<p>As explained by <xref ref-type="bibr" rid="B26">Sharma et al. (2022</xref>), in experiment K5, we also conducted an experiment using cough and breath audios combined to perform a binary classification between COVID-positive patients with cough and non-COVID individuals with cough. A UAR of 97.37% and F1-score of 0.97 were achieved using our LBP-based feature approach. On the other hand, these results suggest that combining cough and breath sounds can distinguish better between COVID-19-positive and -negative patients than cough and speech sounds together.</p>
<p>Asthma is one of the most prevalent chronic diseases in the United States, and respiratory infections are frequently thought to be asthma triggers (<xref ref-type="bibr" rid="B9">Esmaeilzadeh et al., 2022</xref>). Patients with acute asthma attacks frequently describe upper respiratory symptoms, including cough, in the days before the exacerbation starts (<xref ref-type="bibr" rid="B18">Pattemore et al., 1992</xref>). Then, we conducted experiment K6 to evaluate if a cough sound from an individual with a respiratory disease (asthma) can be distinguished from a COVID-positive individual with cough. In this experiment, LTP-based features achieved a UAR of 100.00% and F1-score of 1.00 for both validation and test sets.</p>
</sec>
<sec sec-type="discussion" id="s4">
<title>4 Discussion</title>
<p>Since the COVID-19 outbreak, several research studies have been conducted to infer infection by COVID-19 (<xref ref-type="bibr" rid="B4">Brown et al., 2020</xref>; <xref ref-type="bibr" rid="B23">Schuller et al., 2021</xref>; <xref ref-type="bibr" rid="B36">Zhou et al., 2021</xref>; <xref ref-type="bibr" rid="B17">Pahar et al., 2022</xref>; <xref ref-type="bibr" rid="B19">Pleva et al., 2022</xref>; <xref ref-type="bibr" rid="B26">Sharma et al., 2022</xref>; <xref ref-type="bibr" rid="B34">Villa-Parra et al., 2022</xref>). From the experiments, we can find that the proposed audio texture feature extraction can achieve a good performance in COVID-19 screening. <xref ref-type="bibr" rid="B6">Coppock et al. (2022</xref>) presented a summary of the INTERSPEECH 2021 ComParE. A cough and speech UAR of 75.9% (<xref ref-type="bibr" rid="B5">Casanova et al., 2021</xref>) and 72.1% (<xref ref-type="bibr" rid="B23">Schuller et al., 2021</xref>) was achieved, respectively. Although we reached a slightly lower UAR for cough (UAR &#x3d; 75.54%), it is worth noting that we did not use any data augmentation and deep learning methods. On the other hand, <xref ref-type="bibr" rid="B28">Solera-Ure&#xf1;a et al. (2021</xref>) achieved a UAR of 69.3% without data augmentation using SVM as a classifier. Unlike for cough, we did not achieve a good performance for speech tasks compared to the baseline shown by <xref ref-type="bibr" rid="B23">Schuller et al. (2021</xref>) (UAR &#x3d; 72.1%). <xref ref-type="bibr" rid="B5">Casanova et al. (2021</xref>), when exploring the same approach utilized for cough, had achieved a UAR of 70.3%. <xref ref-type="bibr" rid="B12">Klumpp et al. (2021</xref>) explored Mel spectrograms and various classifiers, such as LSTM, CNN, SVM, and LR, with data augmentation, and a UAR of 64.2% was reached. <xref ref-type="bibr" rid="B4">Brown et al. (2020</xref>) explored acoustic features in their research, and AUC-ROC up to 0.82 was achieved for binary classification. It is worth mentioning that various research studies utilized the accuracy (ACC) and area under the curve of receiver operating characteristic (AUC-ROC) as metrics. <xref ref-type="bibr" rid="B26">Sharma et al. (2022</xref>) analyzed cough, speech, and breath sounds collected from smartphones by using spectrograms and textural features (LBP and Haralick&#x2019;s), and an ACC of 98.9% and 72.2% for 2- and 5-class classification tasks were reached, respectively. The authors utilized 120-dimensional features for LBP and 14-dimensional Haralick&#x2019;s features. In contrast, our research utilized 59-dimensional features only. A study conducted by <xref ref-type="bibr" rid="B17">Pahar et al. (2022</xref>) explored deep architecture for COVID-19 detection, and they achieved an AUC-ROC of 0.98, 0.94, and 0.92, respectively, for all three sound classes (cough, breath, and speech). Respiratory audio data are also explored for chronic obstructive pulmonary disease (COPD) detection. <xref ref-type="bibr" rid="B30">Srivastava et al. (2021</xref>) explored MFCC features and achieved an AUC-ROC of 0.89. In addition to research using respiratory audio in COVID-19 screening, various research studies employing chest X-ray images are being conducted, such as the one by <xref ref-type="bibr" rid="B3">Bhatt et al. (2021</xref>). In this work, progressive resizing and transfer learning techniques are explored in normal and COVID-19-infected X-ray images, and an accuracy of up to 100.00% was achieved.</p>
<p>It is to be noted that various research studies explored acoustic features (<xref ref-type="bibr" rid="B4">Brown et al., 2020</xref>; <xref ref-type="bibr" rid="B17">Pahar et al., 2022</xref>), although textural features are still not much explored in audio analysis, particularly for COVID-19 screening, thus opening avenues in respiratory sound analysis. In general, our results show that LBP achieved better performance than LTP. It is worth mentioning that the main advantage of using LBP textural features is the low computational complexity, thus facilitating a mobile implementation of the proposed system to infer COVID-19 using a smartphone.</p>
</sec>
<sec sec-type="conclusion" id="s5">
<title>5 Conclusion</title>
<p>In this work, we presented a framework to infer COVID-19 by using cough, speech, and breath audios using textural features. For each respiratory sound, the Mel spectrogram was computed, and two different methods for feature extraction were performed over the Mel spectrogram gray-scale image: local binary patterns and local ternary patterns. We have evaluated the features with an SVM classifier for three different databases. Our study with different feature extraction methods reveals that the LBP-based feature is superior to LTP in most of the experiments. In addition, we have noticed that in a binary classification, cough sounds are better to distinguish between COVID-19-positive and -negative individuals.</p>
<p>As a limitation to our study, we performed the analysis using a small sample size. Although good performance was obtained with our proposal, it may not be a good alternative to detect COVID-19. However, it creates new opportunities to develop COVID-19 screening tools for telemedicine and remote monitoring (<xref ref-type="bibr" rid="B26">Sharma et al. 2022</xref>; <xref ref-type="bibr" rid="B34">Villa-Parra et al. 2022)</xref>. In future works, we will explore data augmentation techniques (<xref ref-type="bibr" rid="B22">Saldanha et al., 2022</xref>), transfer learning (<xref ref-type="bibr" rid="B3">Bhatt et al., 2021</xref>), and interpretable deep learning models (<xref ref-type="bibr" rid="B11">Joshi et al., 2021</xref>) to improve the interpretability and usability of our framework to help COVID-19 diagnosis.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The data analyzed in this study are subject to the following licenses/restrictions: only credentialed users who sign the data transfer agreement can access the files. Requests to access these datasets should be directed to <ext-link ext-link-type="uri" xlink:href="https://www.covid-19-sounds.org/">https://www.covid-19-sounds.org/</ext-link>.</p>
</sec>
<sec id="s7">
<title>Author contributions</title>
<p>Experimental design, software, and writing&#x2014;original draft: LS; writing&#x2014;reviewing and editing: CV, LL, DD, and EC; experimental design, supervision, and writing&#x2014;reviewing and editing: TB; and experimental design, supervision, writing&#x2014;reviewing and editing, and funding: SK. All authors contributed to manuscript revision and read and approved the submitted version.</p>
</sec>
<sec id="s8">
<title>Funding</title>
<p>This research was funded by Coordena&#xe7;&#xe3;o de Aperfei&#xe7;oamento de Pessoal de N&#xed;vel Superior (CAPES/Brazil): 012/2020 and the Natural Sciences and Engineering Research Council of the Canada Discovery Grant RGPIN-2020-04628.</p>
</sec>
<ack>
<p>The authors acknowledge the financial support from Global Affairs Canada (Canadian scholarship), CAPES/Brazil (postdoctoral fellow scholarship), CNPq/Brazil (PhD and researcher scholarships), and FACITEC/Brazil (PhD scholarship).</p>
</ack>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors, and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abidin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Togneri</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Sohel</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Spectrotemporal analysis using local binary pattern variants for acoustic scene classification</article-title>. <source>IEEE/ACM Trans. Audio Speech Lang. Process.</source> <volume>26</volume>, <fpage>2112</fpage>&#x2013;<lpage>2121</lpage>. <pub-id pub-id-type="doi">10.1109/TASLP.2018.2854861</pub-id> </citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Adnan</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Irtaza</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Aziz</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ullah</surname>
<given-names>M. O.</given-names>
</name>
<name>
<surname>Javed</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mahmood</surname>
<given-names>M. T.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Fall detection through acoustic local ternary patterns</article-title>. <source>Appl. Acoust.</source> <volume>140</volume>, <fpage>296</fpage>&#x2013;<lpage>300</lpage>. <pub-id pub-id-type="doi">10.1016/j.apacoust.2018.06.013</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bhatt</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ganatra</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kotecha</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Covid-19 pulmonary consolidations detection in chest x-ray using progressive resizing and transfer learning techniques</article-title>. <source>Heliyon</source> <volume>7</volume>, <fpage>e07211</fpage>. <pub-id pub-id-type="doi">10.1016/j.heliyon.2021.e07211</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Brown</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Chauhan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Grammenos</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hasthanasombat</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Spathis</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <source>Exploring automatic diagnosis of covid-19 from crowdsourced respiratory sound data</source>. <comment>
<italic>arXiv preprint arXiv:2006.05919</italic>
</comment>. </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Casanova</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Candido</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fernandes</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Finger</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Gris</surname>
<given-names>L. R. S.</given-names>
</name>
<name>
<surname>Ponti</surname>
<given-names>M. A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). &#x201c;<article-title>Transfer learning and data augmentation techniques to the Covid-19 identification tasks in compare 2021</article-title>,&#x201d; in <source>22nd annual conference of the international speech communication association</source>, <fpage>4301</fpage>&#x2013;<lpage>4305</lpage>.<source>INTERSPEECH</source> </citation>
</ref>
<ref id="B6">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Coppock</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Akman</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bergler</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Gerczuk</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Brown</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Chauhan</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <source>A summary of the compare covid-19 challenges</source>. <comment>arXiv preprint arXiv:2202.08981</comment>. </citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Demir</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Sengur</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Cummins</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Amiriparian</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Schuller</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2018</year>). <source>40th Annual International Conference of the IEEE Engineering in Medicine and Biology Society</source>, <article-title>Low level texture features for snore sound discrimination</article-title>, <source>(EMBC) (IEEE)</source>, <fpage>413</fpage>&#x2013;<lpage>416</lpage>. </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Er</surname>
<given-names>M. B.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Heart sounds classification using convolutional neural network with 1d-local binary pattern and 1d-local ternary pattern features</article-title>. <source>Appl. Acoust.</source> <volume>180</volume>, <fpage>108152</fpage>. <pub-id pub-id-type="doi">10.1016/j.apacoust.2021.108152</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Esmaeilzadeh</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Sanaei Dashti</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mortazavi</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Fatemian</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Vali</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Persistent cough and asthma-like symptoms post Covid-19 hospitalization in children</article-title>. <source>BMC Infect. Dis.</source> <volume>22</volume>, <fpage>244</fpage>&#x2013;<lpage>248</lpage>. <pub-id pub-id-type="doi">10.1186/s12879-022-07252-2</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Hopkins University</surname>
<given-names>Johns</given-names>
</name>
</person-group> (<year>2022</year>). <source>Covid-19 dashboard</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://coronavirus.jhu.edu/map.html">https://coronavirus.jhu.edu/map.html</ext-link> (Accessed July 1, 2022)</comment>. </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Joshi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Walambe</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Kotecha</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A review on explainability in multimodal deep neural nets</article-title>. <source>IEEE Access</source> <volume>9</volume>, <fpage>59800</fpage>&#x2013;<lpage>59821</lpage>. <pub-id pub-id-type="doi">10.1109/access.2021.3070212</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Klumpp</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Bocklet</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Arias-Vergara</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>V&#xe1;squez-Correa</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>P&#xe9;rez-Toro</surname>
<given-names>P. A.</given-names>
</name>
<name>
<surname>Bayerl</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>The phonetic footprint of Covid-19?</article-title> <source>Interspeech</source>, <fpage>441</fpage>&#x2013;<lpage>445</lpage>. <pub-id pub-id-type="doi">10.21437/Interspeech.2021-1488</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nanni</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Maguolo</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Brahnam</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Paci</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>An ensemble of convolutional neural networks for audio classification</article-title>. <source>Appl. Sci.</source> <volume>11</volume>, <fpage>5796</fpage>. <pub-id pub-id-type="doi">10.3390/app11135796</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ojala</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Pietikainen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Maenpaa</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Multiresolution gray-scale and rotation invariant texture classification with local binary patterns</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell.</source> <volume>24</volume>, <fpage>971</fpage>&#x2013;<lpage>987</lpage>. <pub-id pub-id-type="doi">10.1109/tpami.2002.1017623</pub-id> </citation>
</ref>
<ref id="B15">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Oliveira</surname>
<given-names>B. A.</given-names>
</name>
<name>
<surname>Oliveira</surname>
<given-names>L. C. d.</given-names>
</name>
<name>
<surname>Sabino</surname>
<given-names>E. C.</given-names>
</name>
<name>
<surname>Okay</surname>
<given-names>T. S.</given-names>
</name>
</person-group> (<year>2020</year>). <source>Sars-cov-2 and the covid-19 disease: a mini review on diagnostic methods</source>, <volume>62</volume>. <publisher-loc>S&#x00E3;o Paulo</publisher-loc>: <publisher-name>Revista do Instituto de Medicina Tropical de S&#xe3;o Paulo</publisher-name>. <pub-id pub-id-type="doi">10.1590/s1678-9946202062044</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pahar</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Klopper</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Reeve</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Warren</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Theron</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Niesler</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Automatic cough classification for tuberculosis screening in a real-world environment</article-title>. <source>Physiol. Meas.</source> <volume>42</volume>, <fpage>105014</fpage>. <pub-id pub-id-type="doi">10.1088/1361-6579/ac2fb8</pub-id> </citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pahar</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Klopper</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Warren</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Niesler</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Covid-19 detection in cough, breath and speech using deep transfer learning and bottleneck features</article-title>. <source>Comput. Biol. Med.</source> <volume>141</volume>, <fpage>105153</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2021.105153</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pattemore</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Johnston</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bardin</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>1992</year>). <article-title>Viruses as precipitants of asthma symptoms. i. epidemiology</article-title>. <source>Clin. Exp. Allergy</source> <volume>22</volume>, <fpage>325</fpage>&#x2013;<lpage>336</lpage>. <pub-id pub-id-type="doi">10.1111/j.1365-2222.1992.tb03094.x</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Pleva</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Martens</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Juhar</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Automated Covid-19 respiratory symptoms analysis from speech and cough</article-title>,&#x201d; in <source>2022 ieee 20th jubilee World symposium on applied machine intelligence and informatics (sami)</source> (<publisher-name>IEEE</publisher-name>). </citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pramono</surname>
<given-names>R. X. A.</given-names>
</name>
<name>
<surname>Imtiaz</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Rodriguez-Villegas</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>A cough-based algorithm for automatic diagnosis of pertussis</article-title>. <source>PloS one</source> <volume>11</volume>, <fpage>e0162128</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0162128</pub-id> </citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rai</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Kumar</surname>
<given-names>B. K.</given-names>
</name>
<name>
<surname>Deekshit</surname>
<given-names>V. K.</given-names>
</name>
<name>
<surname>Karunasagar</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Karunasagar</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Detection technologies and recent developments in the diagnosis of Covid-19 infection</article-title>. <source>Appl. Microbiol. Biotechnol.</source> <volume>105</volume>, <fpage>441</fpage>&#x2013;<lpage>455</lpage>. <pub-id pub-id-type="doi">10.1007/s00253-020-11061-5</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Saldanha</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chakraborty</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Patil</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kotecha</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kumar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Nayyar</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Data augmentation using variational autoencoders for improvement of respiratory disease classification</article-title>. <source>PloS one</source> <volume>17</volume>, <fpage>e0266467</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0266467</pub-id> </citation>
</ref>
<ref id="B23">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Schuller</surname>
<given-names>B. W.</given-names>
</name>
<name>
<surname>Batliner</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bergler</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Mascolo</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lefter</surname>
<given-names>I.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <source>The interspeech 2021 computational paralinguistics challenge: Covid-19 cough, covid-19 speech, escalation &#x26; primates</source>. <comment>
<italic>arXiv preprint arXiv:2102.13468</italic>
</comment>. </citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sengupta</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Sahidullah</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Saha</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Lung sound classification using local binary pattern</article-title> </citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sharma</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Prasad</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Umapathy</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Krishnan</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Screening and analysis of specific language impairment in young children by analyzing the textures of speech signal</article-title>. <source>Annu. Int. Conf. IEEE Eng. Med. Biol. Soc.</source>, <fpage>964</fpage>&#x2013;<lpage>967</lpage>. <pub-id pub-id-type="doi">10.1109/EMBC44109.2020.9176056</pub-id> </citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sharma</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Umapathy</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Krishnan</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Audio texture analysis of Covid-19 cough, breath, and speech sounds</article-title>. <source>Biomed. Signal Process. Control</source> <volume>76</volume>, <fpage>103703</fpage>. <pub-id pub-id-type="doi">10.1016/J.BSPC.2022.103703</pub-id> </citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sharma</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X. P.</given-names>
</name>
<name>
<surname>Umapathy</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Krishnan</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Audio texture and age-wise analysis of disordered speech in children having specific language impairment</article-title>. <source>Biomed. Signal Process. Control</source> <volume>66</volume>, <fpage>102471</fpage>. <pub-id pub-id-type="doi">10.1016/J.BSPC.2021.102471</pub-id> </citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Solera-Ure&#xf1;a</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Botelho</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Teixeira</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Rolland</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Abad</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Trancoso</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Transfer learning-based cough representations for automatic detection of Covid-19</article-title>. <source>Interspeech</source>, <fpage>436</fpage>&#x2013;<lpage>440</lpage>. <pub-id pub-id-type="doi">10.21437/Interspeech.2021-1702</pub-id> </citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>S&#xf6;nmez</surname>
<given-names>Y. l.</given-names>
</name>
<name>
<surname>Varol</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A speech emotion recognition model based on multi-level local binary and local ternary patterns</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>190784</fpage>&#x2013;<lpage>190796</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2020.3031763</pub-id> </citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Srivastava</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Jain</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Miranda</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Patil</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pandya</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kotecha</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Deep learning based respiratory sound analysis for detection of chronic obstructive pulmonary disease</article-title>. <source>PeerJ Comput. Sci.</source> <volume>7</volume>, <fpage>e369</fpage>. <pub-id pub-id-type="doi">10.7717/peerj-cs.369</pub-id> </citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Suppakitjanusant</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Sungkanuparph</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wongsinin</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Virapongsiri</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kasemkosin</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Chailurkit</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Identifying individuals with recent Covid-19 through voice classification using deep learning</article-title>. <source>Sci. Rep.</source> <volume>11</volume>, <fpage>1</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1038/s41598-021-98742-x</pub-id> </citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tan</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Triggs</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Enhanced local texture feature sets for face recognition under difficult lighting conditions</article-title>. <source>IEEE Trans. Image Process.</source> <volume>19</volume>, <fpage>1635</fpage>&#x2013;<lpage>1650</lpage>. <pub-id pub-id-type="doi">10.1109/TIP.2010.2042645</pub-id> </citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Verde</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Pietro</surname>
<given-names>G. D.</given-names>
</name>
<name>
<surname>Ghoneim</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Alrashoud</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Al-Mutib</surname>
<given-names>K. N.</given-names>
</name>
<name>
<surname>Sannino</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Exploring the use of artificial intelligence techniques to detect the presence of coronavirus Covid-19 through speech and voice analysis</article-title>. <source>IEEE Access</source> <volume>9</volume>, <fpage>65750</fpage>&#x2013;<lpage>65757</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2021.3075571</pub-id> </citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Villa-Parra</surname>
<given-names>A. C.</given-names>
</name>
<name>
<surname>Criollo</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Valad&#xe3;o</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Silva</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Coelho</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lampier</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Towards multimodal equipment to help in the diagnosis of Covid-19 using machine learning algorithms</article-title>. <source>Sensors</source> <volume>22</volume>, <fpage>4341</fpage>. <pub-id pub-id-type="doi">10.3390/s22124341</pub-id> </citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<collab>World Health Organization</collab> (<year>2020</year>). <source>Who director-general&#x2019;s opening remarks at the media briefing on covid-19 - 11 march 2020</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.who.int/director-general/speeches/detail/who-director-general-s-opening-remarks-at-the-media-briefing-on-covid-19&#x2014;11-march-2020">https://www.who.int/director-general/speeches/detail/who-director-general-s-opening-remarks-at-the-media-briefing-on-covid-19&#x2014;11-march-2020</ext-link> (Accessed June 30, 2022)</comment>.</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Shan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Cough recognition based on mel-spectrogram and convolutional neural network</article-title>. <source>Front. Robot. AI</source> <volume>8</volume>, <fpage>112</fpage>. <pub-id pub-id-type="doi">10.3389/frobt.2021.580080</pub-id> </citation>
</ref>
</ref-list>
</back>
</article>