<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Psychiatry</journal-id>
<journal-title>Frontiers in Psychiatry</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Psychiatry</abbrev-journal-title>
<issn pub-type="epub">1664-0640</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpsyt.2023.1079448</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Psychiatry</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Exploring the ability of vocal biomarkers in distinguishing depression from bipolar disorder, schizophrenia, and healthy controls</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Pan</surname>
<given-names>Wei</given-names>
</name>
<xref rid="aff1" ref-type="aff"><sup>1</sup></xref>
<xref rid="aff2" ref-type="aff"><sup>2</sup></xref>
<xref rid="aff3" ref-type="aff"><sup>3</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2068265/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Deng</surname>
<given-names>Fusong</given-names>
</name>
<xref rid="aff4" ref-type="aff"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Xianbin</given-names>
</name>
<xref rid="aff1" ref-type="aff"><sup>1</sup></xref>
<xref rid="aff2" ref-type="aff"><sup>2</sup></xref>
<xref rid="aff3" ref-type="aff"><sup>3</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2365234/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Hang</surname>
<given-names>Bowen</given-names>
</name>
<xref rid="aff1" ref-type="aff"><sup>1</sup></xref>
<xref rid="aff2" ref-type="aff"><sup>2</sup></xref>
<xref rid="aff3" ref-type="aff"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhou</surname>
<given-names>Wenwei</given-names>
</name>
<xref rid="aff1" ref-type="aff"><sup>1</sup></xref>
<xref rid="aff2" ref-type="aff"><sup>2</sup></xref>
<xref rid="aff3" ref-type="aff"><sup>3</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2295839/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhu</surname>
<given-names>Tingshao</given-names>
</name>
<xref rid="aff5" ref-type="aff"><sup>5</sup></xref>
<xref rid="aff6" ref-type="aff"><sup>6</sup></xref>
<xref rid="c001" ref-type="corresp"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/964624/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Key Laboratory of Adolescent Cyberpsychology and Behavior (CCNU), Ministry of Education</institution>, <addr-line>Wuhan</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>School of Psychology, Central China Normal University</institution>, <addr-line>Wuhan</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>Key Laboratory of Human Development and Mental Health of Hubei Province</institution>, <addr-line>Wuhan</addr-line>, <country>China</country></aff>
<aff id="aff4"><sup>4</sup><institution>Wuhan Wuchang Hospital, Wuchang Hospital Affiliated to Wuhan University of Science and Technology</institution>, <addr-line>Wuhan</addr-line>, <country>China</country></aff>
<aff id="aff5"><sup>5</sup><institution>Institute of Psychology, Chinese Academy of Sciences</institution>, <addr-line>Beijing</addr-line>, <country>China</country></aff>
<aff id="aff6"><sup>6</sup><institution>CAS Key Laboratory of Behavioral Science, Institute of Psychology, Chinese Academy of Sciences</institution>, <addr-line>Beijing</addr-line>, <country>China</country></aff>
<author-notes>
<fn id="fn0001" fn-type="edited-by"><p>Edited by: Ronghui Liu, University of Chinese Academy of Sciences, China</p></fn>
<fn id="fn0002" fn-type="edited-by"><p>Reviewed by: Paul Thuras, United States Department of Veterans Affairs, United States; Ang Li, Beijing Forestry University, China</p></fn>
<corresp id="c001">&#x002A;Correspondence: Tingshao Zhu, <email>tszhu@psych.ac.cn</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>20</day>
<month>07</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>14</volume>
<elocation-id>1079448</elocation-id>
<history>
<date date-type="received">
<day>25</day>
<month>10</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>30</day>
<month>06</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2023 Pan, Deng, Wang, Hang, Zhou and Zhu.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Pan, Deng, Wang, Hang, Zhou and Zhu</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Background</title>
<p>Vocal features have been exploited to distinguish depression from healthy controls. While there have been some claims for success, the degree to which changes in vocal features are specific to depression has not been systematically studied. Hence, we examined the performances of vocal features in differentiating depression from bipolar disorder (BD), schizophrenia and healthy controls, as well as pairwise classifications for the three disorders.</p>
</sec>
<sec>
<title>Methods</title>
<p>We sampled 32 bipolar disorder patients, 106 depression patients, 114 healthy controls, and 20 schizophrenia patients. We extracted i-vectors from Mel-frequency cepstrum coefficients (MFCCs), and built logistic regression models with ridge regularization and 5-fold cross-validation on the training set, then applied models to the test set. There were seven classification tasks: any disorder versus healthy controls; depression versus healthy controls; BD versus healthy controls; schizophrenia versus healthy controls; depression versus BD; depression versus schizophrenia; BD versus schizophrenia.</p>
</sec>
<sec>
<title>Results</title>
<p>The area under curve (AUC) score for classifying depression and bipolar disorder was 0.5 (<italic>F</italic>-<italic>score</italic>&#x2009;=&#x2009;0.44). For other comparisons, the AUC scores ranged from 0.75 to 0.92, and the <italic>F</italic>-<italic>scores</italic> ranged from 0.73 to 0.91. The model performance (AUC) of classifying depression and bipolar disorder was significantly worse than that of classifying bipolar disorder and schizophrenia (corrected <italic>p</italic>&#x2009;&#x003C;&#x2009;0.05). While there were no significant differences in the remaining pairwise comparisons of the 7 classification tasks.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>Vocal features showed discriminatory potential in classifying depression and the healthy controls, as well as between depression and other mental disorders. Future research should systematically examine the mechanisms of voice features in distinguishing depression with other mental disorders and develop more sophisticated machine learning models so that voice can assist clinical diagnosis better.</p>
</sec>
</abstract>
<kwd-group>
<kwd>depression</kwd>
<kwd>healthy controls</kwd>
<kwd>schizophrenia</kwd>
<kwd>bipolar disorder</kwd>
<kwd>i-vectors</kwd>
<kwd>logistic regression MFCCs</kwd>
</kwd-group>
<counts>
<fig-count count="0"/>
<table-count count="9"/>
<equation-count count="1"/>
<ref-count count="67"/>
<page-count count="9"/>
<word-count count="7181"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Digital Mental Health</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec5">
<title>Introduction</title>
<p>The identification and diagnosis of depression through clinical interviews are often slow and unreliable (<xref ref-type="bibr" rid="ref1">1</xref>&#x2013;<xref ref-type="bibr" rid="ref4">4</xref>). About half of cases go unrecognized: in a meta-analysis of 41 studies, recognition accuracy of depression by general practitioners was 47.3% (<xref ref-type="bibr" rid="ref5">5</xref>). Therefore, accurate and fast ways to identify cases of depression will have major clinical benefits.</p>
<p>Novel applications of computational methods are making some inroads into this problem. For example, a review of 14 studies indicated that both sensitivity and specificity of diagnostic performance of deep learning models were higher than that of health-care professionals (<xref ref-type="bibr" rid="ref6">6</xref>). In the last decade, there have been interests in the ability of exploiting vocal biomarkers to identify depression with machine learning methods to investigate whether voice can be used as an auxiliary tool to assist clinical diagnosis (<xref ref-type="bibr" rid="ref7">7</xref>&#x2013;<xref ref-type="bibr" rid="ref11">11</xref>). Previous research mainly focused on examining the ability of vocal features in classifying individuals with depression and healthy population (<xref ref-type="bibr" rid="ref12">12</xref>&#x2013;<xref ref-type="bibr" rid="ref17">17</xref>), and <italic>F</italic>-measure of relevant classifiers reached 0.9 (<xref ref-type="bibr" rid="ref12">12</xref>). These findings suggest that vocal biomarkers may have discriminatory potential in identifying depression. While the differential diagnosis is complicated by the presence of other mental disorders.</p>
<p>The prevalence of mental disorders, according to a study in China, showed that the weighted prevalence of any disorder (excluding dementia) was 16.6% (95% CI 13.0&#x2013;20.2) during the 12&#x2009;months before the interview (<xref ref-type="bibr" rid="ref18">18</xref>). Clinically, the psychiatric diagnosis necessitates distinguishing depressed individuals not only from the healthy ones, but also from other mental illnesses with similar mood symptoms or similar voice patterns. It needs to be stressed that voice conveys emotion related information. In the field of affective computing, voice features were shown their abilities in recognizing different kinds of emotions (<xref ref-type="bibr" rid="ref19">19</xref>, <xref ref-type="bibr" rid="ref20">20</xref>). Both bipolar disorder (BD) and schizophrenia exhibit symptoms comparable to depression. According to the Diagnostic and Statistical Manual of Mental Disorders, Fourth Edition (DSM-4) (<xref ref-type="bibr" rid="ref21">21</xref>), BD involves both depressive and manic episodes, while schizophrenia patients with negative symptoms report anhedonia (loss of ability to experience pleasure). The specificity of alterations in vocal characteristics to depression has not been rigorously investigated.</p>
<p>Variations in vocal features have been observed in schizophrenia and BD patients. Acoustic analyses of speech in schizophrenia have revealed subtle aberrancies in pitch variability associated with flat affect, as well as more pronounced deviations in properties such as percentage of speaking time, speech rate, and pause duration that correlate with symptoms of alogia and blunted affect (<xref ref-type="bibr" rid="ref7">7</xref>). Espinola and her colleagues (<xref ref-type="bibr" rid="ref22">22</xref>) constructed classification models based on speech samples from 31 individuals (20 of whom had a prior diagnosis of schizophrenia and 11 healthy controls). The classifiers attained an accuracy of 91.76% in distinguishing between groups. Regarding BD, studies have utilized vocal features to predict patients&#x2019; emotional states (e.g., depressive, manic, mixed states). Classification analyses yielded an area under curve (AUC) of 0.89 (<xref ref-type="bibr" rid="ref9">9</xref>, <xref ref-type="bibr" rid="ref10">10</xref>). Another study investigated whether vocal features acquired via verbal fluency tasks could accurately differentiate mixed states in BD using machine learning methods. And results showed that for depressive versus mixed depressive episodes, the <italic>F</italic>-measure was 0.86, while for hypomanic versus mixed hypomanic episodes, the <italic>F</italic>-measure was 0.75 (<xref ref-type="bibr" rid="ref23">23</xref>). These studies showed that voice features may also be informative for other psychiatric diagnosis. It should be noted that several studies have examined the utility of vocal features in developing classifiers for several mental disorders. A study (<xref ref-type="bibr" rid="ref24">24</xref>) employed polytomous logistic regression analysis of vocal features to discriminate among healthy controls (<italic>n</italic>&#x2009;=&#x2009;23), individuals with bipolar disorder (<italic>n</italic>&#x2009;=&#x2009;8), and those with major depressive disorder (<italic>n</italic>&#x2009;=&#x2009;14). The model attained 90.79% accuracy in classifying participants into the three diagnostic groups. Another study (<xref ref-type="bibr" rid="ref25">25</xref>) proposed a methodology to support the diagnosis of several mental disorders using vocal acoustic analysis and machine learning. The results showed that random forests with 300 trees achieved the best classification performance (75.27% for accuracy) for the simultaneous detection of major depressive disorder (<xref ref-type="bibr" rid="ref26">26</xref>), schizophrenia (<xref ref-type="bibr" rid="ref20">20</xref>), BD (<xref ref-type="bibr" rid="ref15">15</xref>), generalized anxiety disorder (<xref ref-type="bibr" rid="ref4">4</xref>), and healthy controls (<xref ref-type="bibr" rid="ref12">12</xref>). However, the datasets of above two studies were imbalanced for each group. The imbalanced dataset problems become more complicated in multi-class imbalanced classification tasks, in which there may be multiple minority and majority classes that cause skewed data distribution. And machine learning algorithms tend to favor the majority class samples, hence damaging the multi-classification results (<xref ref-type="bibr" rid="ref27">27</xref>, <xref ref-type="bibr" rid="ref28">28</xref>). Moreover, extensive comparisons between mental disorders and healthy controls may offer more information about the effectiveness of voice for clinically complex differential diagnosis.</p>
<p>Various speech features are indicative of depression. Mel-frequency cepstrum coefficients (MFCCs) constitute the most prevalent vocal features employed in speech recognition systems and psychiatric condition classification models (<xref ref-type="bibr" rid="ref26">26</xref>). MFCCs are obtained by extracting frequency spectral features of the speech signal using the short-time power spectrum, mapping these features onto the Mel scale to better present auditory characteristics, and then obtaining MFCC coefficients through cepstrum analysis that can characterize the speech envelope (<xref ref-type="bibr" rid="ref12">12</xref>, <xref ref-type="bibr" rid="ref29">29</xref>). Multiple studies have demonstrated the utility of MFCCs in developing classification models for depression detection (<xref ref-type="bibr" rid="ref30">30</xref>&#x2013;<xref ref-type="bibr" rid="ref32">32</xref>). For example, Di et al. (<xref ref-type="bibr" rid="ref29">29</xref>) employed MFCCs to classify major depression patients and healthy individuals, area under curve (AUC) reached 0.8.</p>
<p>The identity vector (i-vector) approach, grounded within the total variability framework, represents the state-of-the-art technique for speaker verification (<xref ref-type="bibr" rid="ref12">12</xref>, <xref ref-type="bibr" rid="ref29">29</xref>, <xref ref-type="bibr" rid="ref32">32</xref>, <xref ref-type="bibr" rid="ref33">33</xref>). The total variability framework offers an effective means of capturing speaker- and channel-related variability in a low dimensional subspace. i-vectors are highly informative for encoding cepstral variability. Classification models based on i-vectors demonstrated capacity for identifying depression with high accuracy. For instance, prior work found i-vector based model outperformed a baseline model defined by KL-means supervectors (<xref ref-type="bibr" rid="ref32">32</xref>). Nasir and his colleagues (<xref ref-type="bibr" rid="ref33">33</xref>) used i-vectors to investigate various audio and visual features for classification, reporting high accuracy with i-vector modeling of MFCC features. Indeed, one study demonstrated a 40% improvement in predictive accuracy (<italic>F</italic>-<italic>score</italic>) with the i-vector methodology (<xref ref-type="bibr" rid="ref12">12</xref>). And Di et al. (<xref ref-type="bibr" rid="ref29">29</xref>) observed a 14% enhancement in model performance (AUC) with i-vectors relative to MFCCs alone. Although the participant cohorts were exclusively female in both studies, the results demonstrated the promise of i-vectors for enhancing the accuracy of machine learning models for depression classification.</p>
<p>The objective of this study was to evaluate the efficacy of vocal features as differential diagnostic markers for depression compared to other psychiatric disorders. Three binary classification paradigms were employed in total: (1) the capacity of voice features to distinguish any psychiatric condition (depression, bipolar disorder, schizophrenia) versus healthy controls at baseline; (2) the ability of voice features to differentiate a specific psychiatric illness from healthy controls; (3) the capability of vocal features to distinguish between discrete psychiatric disorders in a pairwise manner. Among these paradigms, the first one was served as a baseline to determine whether the dataset achieved performance commensurate with existing research, as well as a benchmark for model performance under other framework conditions. The second and third paradigms were employed to systematically evaluate the capacity of vocal characteristics to distinguish between case and control groups.</p>
</sec>
<sec sec-type="methods" id="sec6">
<title>Methods</title>
<sec id="sec7">
<title>Participants</title>
<p>All participants were randomly recruited. All participants were Chinese aged 18 to 59&#x2009;years. A diagnosis of primary psychiatric illness was established for all patients using the Diagnostic and Statistical Manual of Mental Disorders (DSM-IV) (<xref ref-type="bibr" rid="ref21">21</xref>) by psychiatrists. Participants clustered into four categories based on diagnosis: healthy controls, depression, BD and schizophrenia. Clinical staging was further specified: depression subjects were actively symptomatic, BD patients were euthymic, and schizophrenia patients were in remission. Healthy controls were openly recruited. Patients with comorbid psychiatric conditions were excluded for all diagnostic categories. And the general exclusion criteria across all participants were: physical illnesses, pregnancy and lactating, substance abuse within 12&#x2009;months. Demographic variables were age, gender and education level. See <xref rid="tab1" ref-type="table">Table 1</xref>.</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption><p>Demographic information about each group.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Groups</th>
<th align="center" valign="top">Gender</th>
<th align="center" valign="top">Age</th>
<th align="center" valign="top">Education</th>
<th align="center" valign="top">Occupation</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle" rowspan="2">Health</td>
<td align="center" valign="top">58 males</td>
<td align="char" valign="top" char="&#x00B1;">34.88 &#x00B1; 10.54</td>
<td align="char" valign="top" char="&#x00B1;">7.78 &#x00B1; 2.60</td>
<td align="char" valign="top" char="&#x00B1;">4.13 &#x00B1; 3.48</td>
</tr>
<tr>
<td align="center" valign="top">57 females</td>
<td align="char" valign="top" char="&#x00B1;">34.73 &#x00B1; 9.69</td>
<td align="char" valign="top" char="&#x00B1;">7.47 &#x00B1; 2.53</td>
<td align="char" valign="top" char="&#x00B1;">3.08 &#x00B1; 3.13</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">Depression</td>
<td align="center" valign="top">53 males</td>
<td align="char" valign="top" char="&#x00B1;">32.67 &#x00B1; 8.62</td>
<td align="char" valign="top" char="&#x00B1;">6.70 &#x00B1; 2.36</td>
<td align="char" valign="top" char="&#x00B1;">2.34 &#x00B1; 2.76</td>
</tr>
<tr>
<td align="center" valign="top">70 females</td>
<td align="char" valign="top" char="&#x00B1;">34.31 &#x00B1; 11.22</td>
<td align="char" valign="top" char="&#x00B1;">7.69 &#x00B1; 2.25</td>
<td align="char" valign="top" char="&#x00B1;">3.32 &#x00B1; 3.20</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">Bipolar</td>
<td align="center" valign="top">16 males</td>
<td align="char" valign="top" char="&#x00B1;">30.06 &#x00B1; 10.46</td>
<td align="char" valign="top" char="&#x00B1;">6.75 &#x00B1; 2.61</td>
<td align="char" valign="top" char="&#x00B1;">2.55 &#x00B1; 2.74</td>
</tr>
<tr>
<td align="center" valign="top">21 females</td>
<td align="char" valign="top" char="&#x00B1;">33.10 &#x00B1; 10.59</td>
<td align="char" valign="top" char="&#x00B1;">7.02 &#x00B1; 2.16</td>
<td align="char" valign="top" char="&#x00B1;">4.93 &#x00B1; 3.14</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">Schizophrenia</td>
<td align="center" valign="top">10 males</td>
<td align="char" valign="top" char="&#x00B1;">27.86 &#x00B1; 7.14</td>
<td align="char" valign="top" char="&#x00B1;">6.81 &#x00B1; 2.33</td>
<td align="char" valign="top" char="&#x00B1;">3.84 &#x00B1; 3.34</td>
</tr>
<tr>
<td align="center" valign="top">10 females</td>
<td align="char" valign="top" char="&#x00B1;">30.81 &#x00B1; 7.61</td>
<td align="char" valign="top" char="&#x00B1;">6.41 &#x00B1; 2.28</td>
<td align="char" valign="top" char="&#x00B1;">4.64 &#x00B1; 3.41</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec8">
<title>Measures</title>
<p>Four vocal tasks were employed for data collection: video watching (VW), text reading (TR), question answering (QA), picture description (PD). Each task incorporated positive, negative, neutral emotional primes to comprehensively represent existing research paradigms (<xref ref-type="bibr" rid="ref34">34</xref>&#x2013;<xref ref-type="bibr" rid="ref38">38</xref>). In VW, participants viewed video clips then described the most memorable scenes or figures. For QA, participants provided spoken answers to nine questions (three questions/emotion), e.g., &#x201C;Please share your most wonderful experience and describe it in detail&#x201D;. In TR, participants read three 140-word paragraphs aloud. For PD, participants described facial expressions and image content from the Chinese Facial Affective Picture System and the Chinese Affective Picture System (three facial affective pictures and three affective pictures for three emotion primes), respectively. Twenty one voice recordings were collected from each participant. The emotional priming effects of these tasks were validated in previous research (<xref ref-type="bibr" rid="ref37">37</xref>, <xref ref-type="bibr" rid="ref38">38</xref>). Research also indicates that this dataset affords stable prediction accuracies across emotions and tasks (<xref ref-type="bibr" rid="ref15">15</xref>, <xref ref-type="bibr" rid="ref35">35</xref>, <xref ref-type="bibr" rid="ref37">37</xref>, <xref ref-type="bibr" rid="ref38">38</xref>).</p>
<p>All participants were seated 1&#x2009;m from a 21-inch monitor. Instructions were displayed on-screen. Speech was recorded 50&#x2009;cm distant using a professional condenser microphone (Neumann TLM102, Germany) and a voice recorder (RME Fireface UCX, Germany). The experimenter controlled recording initiation and termination for each participant to exclude the experimenter&#x2019;s speech from recordings. Participants were asked to complete all tasks in random order. Ambient noise was under 60&#x2009;dB. Recordings less than 10 s were excluded. Recording duration details were displayed in <xref rid="tab2" ref-type="table">Table 2</xref>. The speech was Mandarin Chinese. Recordings were collected with a sampling rate of 44.1&#x2009;kHz and 24-bit. Informed consent was obtained in writing pre-experiment. This study was part of a national project and was approved by the Institutional Review Board (IRB) of Institute of Psychology, Chinese Academy of Sciences.</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption><p>Duration descriptions of voice recordings in each group(s).</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Groups</th>
<th align="center" valign="top">Min</th>
<th align="center" valign="top">Max</th>
<th align="center" valign="top">M&#x2009;&#x00B1;&#x2009;SD</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Health</td>
<td align="center" valign="top">10</td>
<td align="center" valign="top">188</td>
<td align="char" valign="top" char="&#x00B1;">27.8 &#x00B1; 17.99</td>
</tr>
<tr>
<td align="left" valign="top">Depression</td>
<td align="center" valign="top">10</td>
<td align="center" valign="top">164</td>
<td align="char" valign="top" char="&#x00B1;">29.4 &#x00B1; 17.59</td>
</tr>
<tr>
<td align="left" valign="top">Bipolar</td>
<td align="center" valign="top">10</td>
<td align="center" valign="top">156</td>
<td align="char" valign="top" char="&#x00B1;">30.77 &#x00B1; 17.74</td>
</tr>
<tr>
<td align="left" valign="top">Schizophrenia</td>
<td align="center" valign="top">10</td>
<td align="center" valign="top">149</td>
<td align="char" valign="top" char="&#x00B1;">30.15 &#x00B1; 20.83</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec9">
<title>Data analysis</title>
<sec id="sec10">
<title>Preprocessing</title>
<p>A total of seven classification tasks were employed: any disorder versus healthy controls; depression versus healthy controls; BD versus healthy controls; schizophrenia versus healthy controls; depression versus BD; depression versus schizophrenia; BD versus schizophrenia (AH, DH, BH, SH, DB, DS, BS). In each task, either the mental disorder group or depression group was designated as the case group, with the other constituting the control group.</p>
<p>For model building, the data were randomly split into training set (70%) and test set (30%). Given the small sample size, group differences in demographics were assessed using permutation test (a permutation t test), a nonparametric method suitable for small samples with unknown distribution. Many parametric tests have their corresponding permutation test versions employing the same test statistic but deriving <italic>p</italic>-values from the sample-specific permutation distribution of that statistic, rather than from the theoretical distribution derived from the parametric assumption (<xref ref-type="bibr" rid="ref39">39</xref>).</p>
<p>Previous research indicates demographic factors confound the detection of depression from voice features (<xref ref-type="bibr" rid="ref15">15</xref>). It was therefore critical to control for demographics to isolate the role of vocal features. Age, gender and education differences between groups were evaluated in the training set. Owing to the small, uneven group sizes, matching was performed twice: (1) matched gender within categories through exact matching under the guidance of random sampling (a coarsened matching method for binary variables) (<xref ref-type="bibr" rid="ref40">40</xref>) to match the number of males and females according to the smaller number within the group by the sample function in R (<xref ref-type="bibr" rid="ref41">41</xref>). For example, 16 females were randomly selected in the BD group. After matching, there were 32 BD patients; 106 depression patients, 114 healthy ones, and 20 schizophrenia patients with unbiased sex ratios in each category. (2) Case-control matching within each classification task, e.g., 32 BD patients and 32 healthy controls in the model of classifying the BD versus healthy control task. Ultimately, the demographics of cases and controls were preliminarily balanced for each task.</p>
</sec>
</sec>
<sec id="sec11">
<title>Mel frequency cepstral coefficients</title>
<p>Mel Frequency Cepstral Coefficients (MFCCs) were extracted by MATLAB R2020b (<xref ref-type="bibr" rid="ref42">42</xref>) with a window size of 25&#x2009;ms, a window shift of 10&#x2009;ms, a pre-emphasis filter with coefficient 0.97, and a sinusoidal lifter with coefficient 22 (<xref ref-type="bibr" rid="ref12">12</xref>). A filter bank with 23 filters was used and 13 coefficients were extracted. Utterances were downsampled to 8&#x2009;kHz before feature extraction. The first and second derivatives of MFCCs were also extracted.</p>
<p>MFCCs were extracted for each vocal task from every participant. For each participant, the mean values across the 21 tasks were calculated to streamline modeling. And prior research validates the consistent efficacy of vocal features across vocal tasks (<xref ref-type="bibr" rid="ref15">15</xref>, <xref ref-type="bibr" rid="ref35">35</xref>, <xref ref-type="bibr" rid="ref37">37</xref>, <xref ref-type="bibr" rid="ref38">38</xref>).</p>
</sec>
<sec id="sec12">
<title>I-vector extraction</title>
<p>The i-vector extraction formula is represented as follows:</p>
<disp-formula id="EQ1"><label>(1)</label><mml:math id="M1"><mml:mrow><mml:mi>M</mml:mi><mml:mo>=</mml:mo><mml:mi>m</mml:mi><mml:mo>+</mml:mo><mml:mi>T</mml:mi><mml:mi>v</mml:mi></mml:mrow></mml:math></disp-formula>
<p>where <italic>m</italic> is the mean super-vector of the Universal Background Model (UBM). UBM representing the feature distribution of the acoustic space, is adapted to a set of given speech frames to estimate utterance-dependent Gaussian Mixture Models parameters. <italic>M</italic> is the mean-centered super-vector of the speech utterance derived using the 0th and 1st order Baum&#x2013;Welch statistics. <italic>v</italic> is the i-vector, the representation of a speech utterance (<xref ref-type="bibr" rid="ref43">43</xref>).</p>
<p>Twenty i-vectors were derived for each participant. All i-vectors underwent quantile normalization. The training data was then used for constructing logistic regression models. i-vectors extraction was performed using the Kaldi toolkit (<xref ref-type="bibr" rid="ref44">44</xref>).</p>
</sec>
<sec id="sec13">
<title>Logistic regression</title>
<p>i-vectors were subjected to logistic regression on the training set using R (<xref ref-type="bibr" rid="ref41">41</xref>). Logistic regression employed the Glmnet method (<xref ref-type="bibr" rid="ref45">45</xref>) with ridge regularization and 5-fold cross validation. The resultant models were then applied to the test sets for each classification task.</p>
</sec>
<sec id="sec14">
<title>Model building</title>
<p>To evaluate the classification ability of voice features for differential diagnosis, logistic regression models were constructed for the seven binary classification tasks. First, we examined the classification ability of voice features in distinguishing between the healthy controls and those with any mental disorder. Second, we examined model performances in separately classifying the healthy controls and each discrete clinical group. Third, model performances were evaluated in pairwise classification among the three disorders (depression versus BD; depression versus schizophrenia; BD versus schizophrenia).</p>
</sec>
</sec>
<sec sec-type="results" id="sec15">
<title>Results</title>
<sec id="sec16">
<title>Descriptive statistics</title>
<p>Age differences were tested. As shown in <xref rid="tab3" ref-type="table">Table 3</xref>, cases and controls for all classification tasks were matched on relevant variables, except for the schizophrenia versus healthy control task. Propensity score matching (<xref ref-type="bibr" rid="ref46">46</xref>) was therefore conducted to re-match the schizophrenia and the healthy control group. The case group and control group for this task were then balanced, as detailed in <xref rid="tab4" ref-type="table">Table 4</xref>.</p>
<table-wrap position="float" id="tab3">
<label>Table 3</label>
<caption><p><italic>t</italic>-tests of age difference for each classification task.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Tasks</th>
<th align="center" valign="top">Groups</th>
<th align="center" valign="top">M&#x2009;&#x00B1;&#x2009;SD</th>
<th align="center" valign="top"><italic>t</italic></th>
<th align="center" valign="top"><italic>p</italic></th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle" rowspan="2">AH<xref rid="tfn1" ref-type="table-fn"><sup>a</sup></xref></td>
<td align="left" valign="middle">Any disorder</td>
<td align="char" valign="middle" char="&#x00B1;">32.35 &#x00B1; 9.82</td>
<td align="char" valign="middle" char="." rowspan="2">&#x2212;1.42</td>
<td align="char" valign="middle" char="." rowspan="2">0.16</td>
</tr>
<tr>
<td align="left" valign="middle">Healthy controls</td>
<td align="char" valign="middle" char="&#x00B1;">34.48 &#x00B1; 10.63</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">DH</td>
<td align="left" valign="middle">Depression</td>
<td align="char" valign="middle" char="&#x00B1;">33.76 &#x00B1; 10.23</td>
<td align="char" valign="middle" char="." rowspan="2">&#x2212;0.4</td>
<td align="char" valign="middle" char="." rowspan="2">0.69</td>
</tr>
<tr>
<td align="left" valign="middle">Healthy controls</td>
<td align="char" valign="middle" char="&#x00B1;">34.43 &#x00B1; 10.48</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">BH</td>
<td align="left" valign="middle">BD</td>
<td align="char" valign="middle" char="&#x00B1;">29.68 &#x00B1; 8.99</td>
<td align="char" valign="middle" char="." rowspan="2">&#x2212;1.85</td>
<td align="char" valign="middle" char="." rowspan="2">0.07</td>
</tr>
<tr>
<td align="left" valign="middle">Healthy controls</td>
<td align="char" valign="middle" char="&#x00B1;">34.86 &#x00B1; 9.03</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">SH</td>
<td align="left" valign="middle">Schizophrenia</td>
<td align="char" valign="middle" char="&#x00B1;">29.07 &#x00B1; 7.42</td>
<td align="char" valign="middle" char="." rowspan="2">&#x2212;2.11</td>
<td align="char" valign="middle" char="." rowspan="2">0.03<sup>&#x002A;</sup></td>
</tr>
<tr>
<td align="left" valign="middle">Healthy controls</td>
<td align="char" valign="middle" char="&#x00B1;">37.14 &#x00B1; 7.42</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">DB</td>
<td align="left" valign="middle">Depression</td>
<td align="char" valign="middle" char="&#x00B1;">34.18 &#x00B1; 9.67</td>
<td align="char" valign="middle" char="." rowspan="2">1.57</td>
<td align="char" valign="middle" char="." rowspan="2">0.12</td>
</tr>
<tr>
<td align="left" valign="middle">BD</td>
<td align="char" valign="middle" char="&#x00B1;">29.68 &#x00B1; 8.99</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">DS</td>
<td align="left" valign="middle">Depression</td>
<td align="char" valign="middle" char="&#x00B1;">33.86 &#x00B1; 9.49</td>
<td align="char" valign="middle" char="." rowspan="2">1.45</td>
<td align="char" valign="middle" char="." rowspan="2">0.15</td>
</tr>
<tr>
<td align="left" valign="middle">Schizophrenia</td>
<td align="char" valign="middle" char="&#x00B1;">29.07 &#x00B1; 7.42</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">BS</td>
<td align="left" valign="middle">BD</td>
<td align="char" valign="middle" char="&#x00B1;">30.00 &#x00B1; 7.75</td>
<td align="char" valign="middle" char="." rowspan="2">0.33</td>
<td align="char" valign="middle" char="." rowspan="2">0.77</td>
</tr>
<tr>
<td align="left" valign="middle">Schizophrenia</td>
<td align="char" valign="middle" char="&#x00B1;">29.07 &#x00B1; 7.42</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="tfn1"><label>a</label><p>AH, any disorder versus healthy controls; DH, depression versus healthy controls; BH, BD versus healthy controls; SH, schizophrenia versus healthy controls; DB, depression versus BD; DS, depression versus schizophrenia; BS, BD versus schizophrenia. Similarly hereinafter.</p></fn>
<p><sup>&#x002A;</sup><italic>p</italic>&#x2009;&#x003C;&#x2009;0.05.</p>
</table-wrap-foot>
</table-wrap>
<table-wrap position="float" id="tab4">
<label>Table 4</label>
<caption><p><italic>t</italic>-test after propensity score matching for SH task.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Tasks</th>
<th align="left" valign="top">Groups</th>
<th align="center" valign="top">M&#x2009;&#x00B1;&#x2009;SD</th>
<th align="center" valign="top"><italic>t</italic></th>
<th align="center" valign="top"><italic>p</italic></th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle" rowspan="2">S_H</td>
<td align="left" valign="middle">Schizophrenia</td>
<td align="char" valign="middle" char="&#x00B1;">28.79 &#x00B1; 8.15</td>
<td align="char" valign="middle" char="." rowspan="2">&#x2212;0.74</td>
<td align="char" valign="middle" char="." rowspan="2">0.48</td>
</tr>
<tr>
<td align="left" valign="middle">Healthy controls</td>
<td align="char" valign="middle" char="&#x00B1;">31.14 &#x00B1; 8.74</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Permutation tests were conducted for both gender and education. No significant differences either in gender or education across different tasks. See <xref rid="tab5" ref-type="table">Tables 5</xref>, <xref rid="tab6" ref-type="table">6</xref>.</p>
<table-wrap position="float" id="tab5">
<label>Table 5</label>
<caption><p>Permutation tests on gender for each classification task.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Tasks</th>
<th align="center" valign="top" rowspan="2">Groups</th>
<th align="center" valign="top" colspan="2">Gender</th>
<th align="center" valign="top" rowspan="2"><italic>&#x03F0;</italic><sup>2</sup></th>
<th align="center" valign="top" rowspan="2"><italic>p</italic></th>
</tr>
<tr>
<th align="center" valign="top">Male</th>
<th align="center" valign="top">Female</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle" rowspan="2">AH</td>
<td align="left" valign="middle">Any disorder</td>
<td align="center" valign="middle">55</td>
<td align="center" valign="middle">55</td>
<td align="center" valign="middle" rowspan="2">0</td>
<td align="center" valign="middle" rowspan="2">1</td>
</tr>
<tr>
<td align="left" valign="middle">Healthy controls</td>
<td align="center" valign="middle">40</td>
<td align="center" valign="middle">40</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">DH</td>
<td align="left" valign="middle">Depression</td>
<td align="center" valign="middle">37</td>
<td align="center" valign="middle">37</td>
<td align="center" valign="middle" rowspan="2">0</td>
<td align="center" valign="middle" rowspan="2">1</td>
</tr>
<tr>
<td align="left" valign="middle">Healthy controls</td>
<td align="center" valign="middle">37</td>
<td align="center" valign="middle">37</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">BH</td>
<td align="left" valign="middle">BD</td>
<td align="center" valign="middle">11</td>
<td align="center" valign="middle">11</td>
<td align="center" valign="middle" rowspan="2">0</td>
<td align="center" valign="middle" rowspan="2">1</td>
</tr>
<tr>
<td align="left" valign="middle">Healthy controls</td>
<td align="center" valign="middle">11</td>
<td align="center" valign="middle">11</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">SH</td>
<td align="left" valign="middle">Schizophrenia</td>
<td align="center" valign="middle">7</td>
<td align="center" valign="middle">7</td>
<td align="center" valign="middle" rowspan="2">0</td>
<td align="center" valign="middle" rowspan="2">1</td>
</tr>
<tr>
<td align="left" valign="middle">Healthy controls</td>
<td align="center" valign="middle">7</td>
<td align="center" valign="middle">7</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">DB</td>
<td align="left" valign="middle">Depression</td>
<td align="center" valign="middle">11</td>
<td align="center" valign="middle">11</td>
<td align="center" valign="middle" rowspan="2">0</td>
<td align="center" valign="middle" rowspan="2">1</td>
</tr>
<tr>
<td align="left" valign="middle">BD</td>
<td align="center" valign="middle">11</td>
<td align="center" valign="middle">11</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">DS</td>
<td align="left" valign="middle">Depression</td>
<td align="center" valign="middle">7</td>
<td align="center" valign="middle">7</td>
<td align="center" valign="middle" rowspan="2">0</td>
<td align="center" valign="middle" rowspan="2">1</td>
</tr>
<tr>
<td align="left" valign="middle">Schizophrenia</td>
<td align="center" valign="middle">7</td>
<td align="center" valign="middle">7</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">BS</td>
<td align="left" valign="middle">BD</td>
<td align="center" valign="middle">7</td>
<td align="center" valign="middle">7</td>
<td align="center" valign="middle" rowspan="2">0</td>
<td align="center" valign="middle" rowspan="2">1</td>
</tr>
<tr>
<td align="left" valign="middle">Schizophrenia</td>
<td align="center" valign="middle">7</td>
<td align="center" valign="middle">7</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap position="float" id="tab6">
<label>Table 6</label>
<caption><p>Permutation tests on education for each classification task.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Tasks</th>
<th align="center" valign="top" rowspan="2">Groups</th>
<th align="center" valign="top" colspan="2">Education</th>
<th align="center" valign="top" rowspan="2"><italic>&#x03F0;</italic><sup>2</sup></th>
<th align="center" valign="top" rowspan="2"><italic>p</italic></th>
</tr>
<tr>
<th align="center" valign="top">High school level and below</th>
<th align="center" valign="top">Undergraduate and above</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle" rowspan="2">AH</td>
<td align="left" valign="middle">Any disorder</td>
<td align="center" valign="middle">46</td>
<td align="center" valign="middle">64</td>
<td align="char" valign="middle" char="." rowspan="2">0.18</td>
<td align="char" valign="middle" char="." rowspan="2">0.76</td>
</tr>
<tr>
<td align="left" valign="middle">Healthy controls</td>
<td align="center" valign="middle">31</td>
<td align="center" valign="middle">49</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">DH</td>
<td align="left" valign="middle">Depression</td>
<td align="center" valign="middle">29</td>
<td align="center" valign="middle">45</td>
<td align="char" valign="middle" char="." rowspan="2">0.03</td>
<td align="char" valign="middle" char="." rowspan="2">1</td>
</tr>
<tr>
<td align="left" valign="middle">Healthy controls</td>
<td align="center" valign="middle">28</td>
<td align="center" valign="middle">46</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">BH</td>
<td align="left" valign="middle">BD</td>
<td align="center" valign="middle">9</td>
<td align="center" valign="middle">13</td>
<td align="char" valign="middle" char="." rowspan="2">0</td>
<td align="char" valign="middle" char="." rowspan="2">1</td>
</tr>
<tr>
<td align="left" valign="middle">Healthy controls</td>
<td align="center" valign="middle">9</td>
<td align="center" valign="middle">13</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">SH</td>
<td align="left" valign="middle">Schizophrenia</td>
<td align="center" valign="middle">6</td>
<td align="center" valign="middle">8</td>
<td align="char" valign="middle" char="." rowspan="2">1.29</td>
<td align="char" valign="middle" char="." rowspan="2">0.45</td>
</tr>
<tr>
<td align="left" valign="middle">Healthy controls</td>
<td align="center" valign="middle">9</td>
<td align="center" valign="middle">5</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">DB</td>
<td align="left" valign="middle">Depression</td>
<td align="center" valign="middle">10</td>
<td align="center" valign="middle">12</td>
<td align="char" valign="middle" char="." rowspan="2">0.09</td>
<td align="char" valign="middle" char="." rowspan="2">1</td>
</tr>
<tr>
<td align="left" valign="middle">BD</td>
<td align="center" valign="middle">9</td>
<td align="center" valign="middle">13</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">DS</td>
<td align="left" valign="middle">Depression</td>
<td align="center" valign="middle">5</td>
<td align="center" valign="middle">9</td>
<td align="char" valign="middle" char="." rowspan="2">1.29</td>
<td align="char" valign="middle" char="." rowspan="2">0.45</td>
</tr>
<tr>
<td align="left" valign="middle">Schizophrenia</td>
<td align="center" valign="middle">8</td>
<td align="center" valign="middle">6</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">BS</td>
<td align="left" valign="middle">BD</td>
<td align="center" valign="middle">7</td>
<td align="center" valign="middle">7</td>
<td align="char" valign="middle" char="." rowspan="2">0.14</td>
<td align="char" valign="middle" char="." rowspan="2">1</td>
</tr>
<tr>
<td align="left" valign="middle">Schizophrenia</td>
<td align="center" valign="middle">8</td>
<td align="center" valign="middle">6</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Following matching, duration differences between groups were evaluated for each classification task. The mean value of 21 recording durations was first computed for each participant across all experimental tasks. Difference tests were then conducted between groups for each classification task. Results showed that there were no significant differences between groups for any classification task. See <xref rid="tab7" ref-type="table">Table 7</xref>.</p>
<table-wrap position="float" id="tab7">
<label>Table 7</label>
<caption><p>Duration differences between groups for each classification task.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Tasks</th>
<th align="center" valign="top">Groups</th>
<th align="center" valign="top">M&#x2009;&#x00B1;&#x2009;SD</th>
<th align="center" valign="top"><italic>t</italic></th>
<th align="center" valign="top"><italic>p</italic></th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle" rowspan="2">AH</td>
<td align="left" valign="middle">Any disorder</td>
<td align="char" valign="middle" char="&#x00B1;">30.47 &#x00B1; 8.84</td>
<td align="char" valign="middle" char="." rowspan="2">1.53</td>
<td align="char" valign="middle" char="." rowspan="2">0.13</td>
</tr>
<tr>
<td align="left" valign="middle">Healthy controls</td>
<td align="char" valign="middle" char="&#x00B1;">28.65 &#x00B1; 7.44</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">DH</td>
<td align="left" valign="middle">Depression</td>
<td align="char" valign="middle" char="&#x00B1;">30.62 &#x00B1; 7.80</td>
<td align="char" valign="middle" char="." rowspan="2">1.59</td>
<td align="char" valign="middle" char="." rowspan="2">0.12</td>
</tr>
<tr>
<td align="left" valign="middle">Healthy controls</td>
<td align="char" valign="middle" char="&#x00B1;">28.22 &#x00B1; 10.36</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">BH</td>
<td align="left" valign="middle">BD</td>
<td align="char" valign="middle" char="&#x00B1;">30.09 &#x00B1; 9.93</td>
<td align="char" valign="middle" char="." rowspan="2">0.63</td>
<td align="char" valign="middle" char="." rowspan="2">0.53</td>
</tr>
<tr>
<td align="left" valign="middle">Healthy controls</td>
<td align="char" valign="middle" char="&#x00B1;">28.42 &#x00B1; 7.81</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">SH</td>
<td align="left" valign="middle">Schizophrenia</td>
<td align="char" valign="middle" char="&#x00B1;">29.22 &#x00B1; 6.66</td>
<td align="char" valign="middle" char="." rowspan="2">0.14</td>
<td align="char" valign="middle" char="." rowspan="2">0.89</td>
</tr>
<tr>
<td align="left" valign="middle">Healthy controls</td>
<td align="char" valign="middle" char="&#x00B1;">28.82 &#x00B1; 8.65</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">DB</td>
<td align="left" valign="middle">Depression</td>
<td align="char" valign="middle" char="&#x00B1;">30.94 &#x00B1; 7.68</td>
<td align="char" valign="middle" char="." rowspan="2">0.57</td>
<td align="char" valign="middle" char="." rowspan="2">0.57</td>
</tr>
<tr>
<td align="left" valign="middle">BD</td>
<td align="char" valign="middle" char="&#x00B1;">29.58 &#x00B1; 8.17</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">DS</td>
<td align="left" valign="middle">Depression</td>
<td align="char" valign="middle" char="&#x00B1;">31.21 &#x00B1; 7.22</td>
<td align="char" valign="middle" char="." rowspan="2">0.77</td>
<td align="char" valign="middle" char="." rowspan="2">0.45</td>
</tr>
<tr>
<td align="left" valign="middle">Schizophrenia</td>
<td align="char" valign="middle" char="&#x00B1;">29.17 &#x00B1; 6.60</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">BS</td>
<td align="left" valign="middle">BD</td>
<td align="char" valign="middle" char="&#x00B1;">29.54 &#x00B1; 10.01</td>
<td align="char" valign="middle" char="." rowspan="2">0.42</td>
<td align="char" valign="middle" char="." rowspan="2">0.68</td>
</tr>
<tr>
<td align="left" valign="middle">Schizophrenia</td>
<td align="char" valign="middle" char="&#x00B1;">28.22 &#x00B1; 5.59</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec17">
<title>Classification</title>
<p>Model performance metrics for different tasks were summarized in <xref rid="tab8" ref-type="table">Table 8</xref>. For the general ability of vocal features to classify healthy versus any clinical group (AH task), the <italic>F</italic>-<italic>score</italic> was 0.82, AUC (area under curve) was 0.79. In distinguishing specific disorders from healthy controls, results showed: for DH task, <italic>F</italic>-<italic>score</italic>&#x2009;=&#x2009;0.78, AUC&#x2009;=&#x2009;0.77; for BH task, <italic>F</italic>-<italic>score</italic>&#x2009;=&#x2009;0.80, AUC&#x2009;=&#x2009;0.80; for SH task, <italic>F</italic>-<italic>score</italic>&#x2009;=&#x2009;0.73, AUC&#x2009;=&#x2009;0.75. To further examine the ability of voice features on pairwise classifications among the three mental disorders, DB, DS and BS tasks were performed. Results showed: for DB task <italic>F</italic>-<italic>score</italic>&#x2009;=&#x2009;0.44, AUC&#x2009;=&#x2009;0.50; for DS task, <italic>F</italic>-<italic>score</italic>&#x2009;=&#x2009;0.83, AUC&#x2009;=&#x2009;0.83; for BS task, <italic>F</italic>-<italic>score</italic>&#x2009;=&#x2009;0.91, AUC&#x2009;=&#x2009;0.92.</p>
<table-wrap position="float" id="tab8">
<label>Table 8</label>
<caption><p>Results on 7 classification tasks with the i-vector framework.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Tasks</th>
<th align="center" valign="top">Sensitivity</th>
<th align="center" valign="top">Specificity</th>
<th align="center" valign="top">Accuracy</th>
<th align="center" valign="top">Precision</th>
<th align="center" valign="top">Recall</th>
<th align="center" valign="top"><italic>F</italic>-score</th>
<th align="center" valign="top">AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">AH</td>
<td align="char" valign="middle" char=".">0.81</td>
<td align="char" valign="middle" char=".">0.76</td>
<td align="char" valign="middle" char=".">0.79</td>
<td align="char" valign="middle" char=".">0.83</td>
<td align="char" valign="middle" char=".">0.81</td>
<td align="char" valign="middle" char=".">0.82</td>
<td align="char" valign="middle" char=".">0.79</td>
</tr>
<tr>
<td align="left" valign="middle">DH</td>
<td align="char" valign="middle" char=".">0.81</td>
<td align="char" valign="middle" char=".">0.72</td>
<td align="char" valign="middle" char=".">0.77</td>
<td align="char" valign="middle" char=".">0.74</td>
<td align="char" valign="middle" char=".">0.81</td>
<td align="char" valign="middle" char=".">0.78</td>
<td align="char" valign="middle" char=".">0.77</td>
</tr>
<tr>
<td align="left" valign="middle">BH</td>
<td align="char" valign="middle" char=".">0.80</td>
<td align="char" valign="middle" char=".">0.80</td>
<td align="char" valign="middle" char=".">0.80</td>
<td align="char" valign="middle" char=".">0.80</td>
<td align="char" valign="middle" char=".">0.80</td>
<td align="char" valign="middle" char=".">0.80</td>
<td align="char" valign="middle" char=".">0.80</td>
</tr>
<tr>
<td align="left" valign="middle">SH</td>
<td align="char" valign="middle" char=".">0.67</td>
<td align="char" valign="middle" char=".">0.83</td>
<td align="char" valign="middle" char=".">0.75</td>
<td align="char" valign="middle" char=".">0.80</td>
<td align="char" valign="middle" char=".">0.67</td>
<td align="char" valign="middle" char=".">0.73</td>
<td align="char" valign="middle" char=".">0.75</td>
</tr>
<tr>
<td align="left" valign="middle">DB</td>
<td align="char" valign="middle" char=".">0.40</td>
<td align="char" valign="middle" char=".">0.60</td>
<td align="char" valign="middle" char=".">0.50</td>
<td align="char" valign="middle" char=".">0.50</td>
<td align="char" valign="middle" char=".">0.4</td>
<td align="char" valign="middle" char=".">0.44</td>
<td align="char" valign="middle" char=".">0.50</td>
</tr>
<tr>
<td align="left" valign="middle">DS</td>
<td align="char" valign="middle" char=".">0.83</td>
<td align="char" valign="middle" char=".">0.83</td>
<td align="char" valign="middle" char=".">0.83</td>
<td align="char" valign="middle" char=".">0.83</td>
<td align="char" valign="middle" char=".">0.83</td>
<td align="char" valign="middle" char=".">0.83</td>
<td align="char" valign="middle" char=".">0.83</td>
</tr>
<tr>
<td align="left" valign="middle">BS</td>
<td align="char" valign="middle" char=".">0.83</td>
<td align="char" valign="middle" char=".">1.00</td>
<td align="char" valign="middle" char=".">0.92</td>
<td align="char" valign="middle" char=".">1.00</td>
<td align="char" valign="middle" char=".">0.83</td>
<td align="char" valign="middle" char=".">0.91</td>
<td align="char" valign="middle" char=".">0.92</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>We also compared model performances for all classification tasks. Following Bonferroni correction (<italic>n</italic>&#x2009;=&#x2009;6), the only significant difference was that the depression versus BD model (DB task) showed worse performance (lower AUC) than the BD versus schizophrenia model (BS task; <italic>p</italic>&#x2009;&#x003C;&#x2009;0.05), as detailed in <xref rid="tab9" ref-type="table">Table 9</xref>.</p>
<table-wrap position="float" id="tab9">
<label>Table 9</label>
<caption><p>Pairwise comparisons of model performances for the 7 classification tasks.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Model comparisons</th>
<th align="center" valign="top">DeLong&#x2019;s test</th>
<th align="center" valign="top">df</th>
<th align="center" valign="top"><italic>p</italic></th>
<th align="center" valign="top">Corrected <italic>p</italic></th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">AH_DH</td>
<td align="char" valign="middle" char=".">0.32</td>
<td align="char" valign="middle" char=".">134.67</td>
<td align="char" valign="middle" char=".">0.75</td>
<td align="char" valign="middle" char=".">4.50</td>
</tr>
<tr>
<td align="left" valign="middle">AH_BH</td>
<td align="char" valign="middle" char=".">&#x2212;0.11</td>
<td align="char" valign="middle" char=".">29.02</td>
<td align="char" valign="middle" char=".">0.91</td>
<td align="char" valign="middle" char=".">5.46</td>
</tr>
<tr>
<td align="left" valign="middle">AH_SH</td>
<td align="char" valign="middle" char=".">0.27</td>
<td align="char" valign="middle" char=".">13.78</td>
<td align="char" valign="middle" char=".">0.79</td>
<td align="char" valign="middle" char=".">4.74</td>
</tr>
<tr>
<td align="left" valign="middle">AH_DB</td>
<td align="char" valign="middle" char=".">2.32</td>
<td align="char" valign="middle" char=".">25.54</td>
<td align="char" valign="middle" char=".">0.03</td>
<td align="char" valign="middle" char=".">0.18</td>
</tr>
<tr>
<td align="left" valign="middle">AH_DS</td>
<td align="char" valign="middle" char=".">&#x2212;0.35</td>
<td align="char" valign="middle" char=".">14.66</td>
<td align="char" valign="middle" char=".">0.73</td>
<td align="char" valign="middle" char=".">4.38</td>
</tr>
<tr>
<td align="left" valign="middle">AH_BS</td>
<td align="char" valign="middle" char=".">&#x2212;1.34</td>
<td align="char" valign="middle" char=".">18.71</td>
<td align="char" valign="middle" char=".">0.20</td>
<td align="char" valign="middle" char=".">1.20</td>
</tr>
<tr>
<td align="left" valign="middle">DH_BH</td>
<td align="char" valign="middle" char=".">&#x2212;0.32</td>
<td align="char" valign="middle" char=".">32.18</td>
<td align="char" valign="middle" char=".">0.75</td>
<td align="char" valign="middle" char=".">4.50</td>
</tr>
<tr>
<td align="left" valign="middle">DH_SH</td>
<td align="char" valign="middle" char=".">0.11</td>
<td align="char" valign="middle" char=".">14.70</td>
<td align="char" valign="middle" char=".">0.92</td>
<td align="char" valign="middle" char=".">5.52</td>
</tr>
<tr>
<td align="left" valign="middle">DH_DB</td>
<td align="char" valign="middle" char=".">2.09</td>
<td align="char" valign="middle" char=".">27.64</td>
<td align="char" valign="middle" char=".">0.05</td>
<td align="char" valign="middle" char=".">0.28</td>
</tr>
<tr>
<td align="left" valign="middle">DH_DS</td>
<td align="char" valign="middle" char=".">&#x2212;0.52</td>
<td align="char" valign="middle" char=".">15.88</td>
<td align="char" valign="middle" char=".">0.61</td>
<td align="char" valign="middle" char=".">3.66</td>
</tr>
<tr>
<td align="left" valign="middle">DH_BS</td>
<td align="char" valign="middle" char=".">&#x2212;1.53</td>
<td align="char" valign="middle" char=".">21.29</td>
<td align="char" valign="middle" char=".">0.14</td>
<td align="char" valign="middle" char=".">0.84</td>
</tr>
<tr>
<td align="left" valign="middle">BH_SH</td>
<td align="char" valign="middle" char=".">0.30</td>
<td align="char" valign="middle" char=".">21.48</td>
<td align="char" valign="middle" char=".">0.76</td>
<td align="char" valign="middle" char=".">4.56</td>
</tr>
<tr>
<td align="left" valign="middle">BH_DB</td>
<td align="char" valign="middle" char=".">2.01</td>
<td align="char" valign="middle" char=".">36.64</td>
<td align="char" valign="middle" char=".">0.05</td>
<td align="char" valign="middle" char=".">0.31</td>
</tr>
<tr>
<td align="left" valign="middle">BH_DS</td>
<td align="char" valign="middle" char=".">&#x2212;0.22</td>
<td align="char" valign="middle" char=".">23.92</td>
<td align="char" valign="middle" char=".">0.83</td>
<td align="char" valign="middle" char=".">4.98</td>
</tr>
<tr>
<td align="left" valign="middle">BH_BS</td>
<td align="char" valign="middle" char=".">&#x2212;0.93</td>
<td align="char" valign="middle" char=".">29.35</td>
<td align="char" valign="middle" char=".">0.36</td>
<td align="char" valign="middle" char=".">2.16</td>
</tr>
<tr>
<td align="left" valign="middle">SH_DB</td>
<td align="char" valign="middle" char=".">1.41</td>
<td align="char" valign="middle" char=".">25.27</td>
<td align="char" valign="middle" char=".">0.17</td>
<td align="char" valign="middle" char=".">1.02</td>
</tr>
<tr>
<td align="left" valign="middle">SH_DS</td>
<td align="char" valign="middle" char=".">&#x2212;0.47</td>
<td align="char" valign="middle" char=".">21.63</td>
<td align="char" valign="middle" char=".">0.65</td>
<td align="char" valign="middle" char=".">3.90</td>
</tr>
<tr>
<td align="left" valign="middle">SH_BS</td>
<td align="char" valign="middle" char=".">&#x2212;1.05</td>
<td align="char" valign="middle" char=".">18.37</td>
<td align="char" valign="middle" char=".">0.31</td>
<td align="char" valign="middle" char=".">1.86</td>
</tr>
<tr>
<td align="left" valign="middle">DB_DS</td>
<td align="char" valign="middle" char=".">&#x2212;2.02</td>
<td align="char" valign="middle" char=".">27.56</td>
<td align="char" valign="middle" char=".">0.05</td>
<td align="char" valign="middle" char=".">0.32</td>
</tr>
<tr>
<td align="left" valign="middle">DB_BS</td>
<td align="char" valign="middle" char=".">&#x2212;2.93</td>
<td align="char" valign="middle" char=".">29.93</td>
<td align="char" valign="middle" char=".">0.01</td>
<td align="char" valign="middle" char=".">0.04<sup>&#x002A;</sup></td>
</tr>
<tr>
<td align="left" valign="middle">DS_BS</td>
<td align="char" valign="middle" char=".">&#x2212;0.58</td>
<td align="char" valign="middle" char=".">19.80</td>
<td align="char" valign="middle" char=".">0.57</td>
<td align="char" valign="middle" char=".">3.42</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>&#x002A;<italic>p</italic>&#x2009;&#x003C;&#x2009;0.05.</p>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
<sec sec-type="discussions" id="sec18">
<title>Discussion</title>
<p>In this research, we investigated the ability of vocal biomarkers to classify various health conditions. Following matching, descriptive statistics showed no differences in demographic variables between the case and control groups for any task, addressing potential threats to validity (<xref ref-type="bibr" rid="ref15">15</xref>). Logistic regression models based on MFCC-derived i-vectors were developed for all classification tasks. Results indicated vocal features may assist differential diagnosis of depression, albeit with varying degrees of effectiveness across classification tasks.</p>
<p>The AH model examined the baseline ability of vocal features to distinguish clinical from healthy groups. Classifying the healthy group and the any-disorder group yielded an <italic>F</italic>-<italic>score</italic> of 0.82 and AUC of 0.79, demonstrating vocal features can distinguish mental disorders from the health.</p>
<p>Furthermore, the DH, BH, and SH models investigated the ability of voice features in distinguishing specific disorders from health. For DH, BH, and SH tasks, <italic>F</italic>-<italic>scores</italic> ranged from 0.73 to 0.80, AUC scores from 0.75 to 0.80. Model comparisons showed there were no significant differences in the pairwise comparisons among the AH and the three mental illnesses versus healthy control classification tasks. Our results about DH are consistent with the existing findings (<xref ref-type="bibr" rid="ref8">8</xref>, <xref ref-type="bibr" rid="ref12">12</xref>, <xref ref-type="bibr" rid="ref15">15</xref>, <xref ref-type="bibr" rid="ref47">47</xref>&#x2013;<xref ref-type="bibr" rid="ref50">50</xref>). For example, a previous study examined the significance of the association between voice features and depression using binary logistic regression, and the actual classification ability of voice features on depression using machine learning method, results showed that the contribution effect of voice features reached 35.65% (Nagelkerke&#x2019;s <italic>R</italic><sup>2</sup>), further classification model achieved 81% of <italic>F</italic>-measure (<xref ref-type="bibr" rid="ref15">15</xref>). We set this classification model as another baseline for the reference of further classification. The results of BH and SH classification models are also consistent with existing studies. For instance, one study investigated whether voice features from naturalistic phone calls could discriminate between BD and healthy control individuals, results showed that compared to the control group, BD was classified with a 0.79 <italic>sensitivity</italic> and 0.76 AUC (<xref ref-type="bibr" rid="ref51">51</xref>). For schizophrenia detection, Tahir et al. (<xref ref-type="bibr" rid="ref52">52</xref>) classified schizophrenia patients and healthy controls with multilayer perceptron and the accuracy achieved 81.3%. It was also suggested that speech abnormalities, related to flat affect and alogia, have been a hallmark feature of schizophrenia, and are often associated with core negative symptoms and social impairment (<xref ref-type="bibr" rid="ref7">7</xref>). Our results indicate that MFCCs capture information distinguishing depression, BD and schizophrenia from health.</p>
<p>The DB, DS, and BS models further evaluated the performances of voice features on pairwise classifications among the three mental disorders. DS (<italic>F</italic>-<italic>score</italic>&#x2009;=&#x2009;0.83; AUC&#x2009;=&#x2009;0.83) and BS (<italic>F</italic>-<italic>score</italic>&#x2009;=&#x2009;0.91; AUC&#x2009;=&#x2009;0.92) models showed promise. The BS model had the highest <italic>F</italic>-<italic>score</italic> and AUC across all seven models. However, the DB model performed the worst (<italic>F</italic>-<italic>score</italic>&#x2009;=&#x2009;0.44), with an AUC score of 0.50 indicating voice features barely distinguished depression and BD. Further pairwise model comparisons showed no significant differences among the AH, DH, BH, SH, DS, and BS models. However, DB model performance was significantly worse than that of BS. Results indicate that voice features could help distinguish depression from disorders with similar mood symptoms.</p>
<p>Differing symptoms and vocal characteristics likely explain the results of differential diagnosis. For example, depression is associated with monotonous, hoarse, breathy or slurred speech reflecting anhedonia and sadness (<xref ref-type="bibr" rid="ref53">53</xref>&#x2013;<xref ref-type="bibr" rid="ref58">58</xref>). Schizophrenia is linked to poverty of speech, increased pauses, distinctive tone/intensity associated with core negative symptoms like flat affect, decreased emotional expression and difficulty controlling speech to express emotion properly (<xref ref-type="bibr" rid="ref59">59</xref>&#x2013;<xref ref-type="bibr" rid="ref65">65</xref>). Given that Espinola et al. (<xref ref-type="bibr" rid="ref25">25</xref>) found vocal features distinguishing major depressive disorder, schizophrenia and other disorders, our MFCC derived i-vector approach may reveal subtle differences in anhedonic/sad depression symptoms versus alogia/flat affect in schizophrenia. However, further voice difference investigation between depression and schizophrenia is still needed to offer more explanation.</p>
<p>Another possible reason is that the i-vectors catch disorder-relevant information. To extract i-vectors, first the method learns shared case/control information, then removes shared components, yielding i-vectors capturing key individual differences (<xref ref-type="bibr" rid="ref29">29</xref>, <xref ref-type="bibr" rid="ref43">43</xref>). Here, i-vectors captured distinct vocal information for different mental disorders.</p>
<p>Overall, vocal features could provide clinical value in distinguishing depression from selected disorders, and model improvement is helpful, especially for closely related conditions. This analysis establishes a foundation for future studies exploring vocal biomarkers for differential diagnosis of psychiatric disorders.</p>
<p>This research was unable to clearly differentiate between depression and BD. There appears to be substantial symptom overlap between these two conditions. Furthermore, the oscillation between manic and depressive states in BD can be subtle, as demonstrated by Grande et al. (<xref ref-type="bibr" rid="ref66">66</xref>). Researchers have posited that there might be continued presence of subsyndromal residual symptoms during recovery from major affective episodes in bipolar disorder, and residual symptoms after resolution of a major affective episode indicate that the individual is at significant risk for a rapid relapse and/or recurrence, augmenting the intricacy of symptom presentation even during euthymia (<xref ref-type="bibr" rid="ref67">67</xref>). Depressive symptoms may have been present in euthymic bipolar patients in this study, confounding diagnostic classification. This observation highlights the complexity of the euthymic bipolar condition. Future research should examine symptom and voice differences between BD phases and depression. A nuanced understanding of markers that distinguish unipolar depression from BD could sharpen diagnostic precision.</p>
<p>This study has limitations. We examined only depression, BD and schizophrenia in a small cultural sample, limiting generalizability. It is worth noting that this study lacks an anxiety disorder group, which also exhibits affective symptoms similar to depression. As the data came from a previous project in China, anxiety diagnoses were not included. Future work will gather systematic data on symptoms, clinical phase, psychological factors like emotion, cognition, and severity in these and other disorders, such as anxiety. This could yield insights into voice differences between depression and other conditions, enhancing the clinical value of vocal biomarkers.</p>
</sec>
<sec sec-type="conclusions" id="sec19">
<title>Conclusion</title>
<p>This research systematically explored the ability of vocal biomarkers to distinguish depression from disorders with similar affective symptoms. Findings suggest vocal features could aid differential diagnosis for depression in clinical practice. Future research should investigate mechanisms by which vocal features differentiate depression and other disorders, and develop more advanced machine learning models so voice can enhance clinical diagnosis.</p>
</sec>
<sec sec-type="data-availability" id="sec20">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="sec21">
<title>Ethics statement</title>
<p>The studies involving human participants were reviewed and approved by the Institutional Review Board (IRB) of Institute of Psychology, Chinese Academy of Sciences. The patients/participants provided their written informed consent to participate in this study.</p>
</sec>
<sec id="sec22">
<title>Author contributions</title>
<p>WP was in charge of overall research, data analysis, and drafting the paper. FD was in charge of offering professional guidance and advice for mental illnesses as a clinical specialist. XW helped on data analysis. BH and WZ helped with the analysis and revising the manuscript. TZ was in charge of collecting and organizing the data. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec sec-type="funding-information" id="sec23">
<title>Funding</title>
<p>This work was supported by the Fundamental Research Funds for the Central Universities (CCNU21XJ021), Knowledge Innovation Program of Wuhan-Shuguang Project (2022020801020288), and the Research Program Funds of the Collaborative Innovation Center of Assessment toward Basic Education Quality (2022-04-030-BZPK01).</p>
</sec>
<sec sec-type="COI-statement" id="sec24">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="sec100" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
</body>
<back>
<ref-list>
<title>References</title>
<ref id="ref1"><label>1.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bagby</surname> <given-names>RM</given-names></name> <name><surname>Ryder</surname> <given-names>AG</given-names></name> <name><surname>Schuller</surname> <given-names>DR</given-names></name> <name><surname>Marshall</surname> <given-names>MB</given-names></name></person-group>. <article-title>The Hamilton depression rating scale: has the gold standard become a lead weight?</article-title> <source>Am J Psychiatr</source>. (<year>2004</year>) <volume>161</volume>:<fpage>2163</fpage>&#x2013;<lpage>77</lpage>. doi: <pub-id pub-id-type="doi">10.1176/appi.ajp.161.12.2163</pub-id>, PMID: <pub-id pub-id-type="pmid">15569884</pub-id></citation></ref>
<ref id="ref2"><label>2.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fried</surname> <given-names>EI</given-names></name></person-group>. <article-title>The 52 symptoms of major depression: lack of content overlap among seven common depression scales</article-title>. <source>J Affect Disord</source>. (<year>2017</year>) <volume>208</volume>:<fpage>191</fpage>&#x2013;<lpage>7</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jad.2016.10.019</pub-id>, PMID: <pub-id pub-id-type="pmid">27792962</pub-id></citation></ref>
<ref id="ref3"><label>3.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kendler</surname> <given-names>KS</given-names></name> <name><surname>Aggen</surname> <given-names>SH</given-names></name> <name><surname>Flint</surname> <given-names>J</given-names></name> <name><surname>Borsboom</surname> <given-names>D</given-names></name> <name><surname>Fried</surname> <given-names>EI</given-names></name></person-group>. <article-title>The centrality of DSM and non-DSM depressive symptoms in Han Chinese women with major depression</article-title>. <source>J Affect Disord</source>. (<year>2018</year>) <volume>227</volume>:<fpage>739</fpage>&#x2013;<lpage>44</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jad.2017.11.032</pub-id>, PMID: <pub-id pub-id-type="pmid">29179144</pub-id></citation></ref>
<ref id="ref4"><label>4.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname> <given-names>YK</given-names></name> <name><surname>Park</surname> <given-names>SC</given-names></name></person-group>. <article-title>An alternative approach to future diagnostic standards for major depressive disorder</article-title>. <source>Prog Neuro-Psychopharmacol Biol Psychiatry</source>. (<year>2020</year>) <volume>2020</volume>:<fpage>110133</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.pnpbp.2020.110133</pub-id></citation></ref>
<ref id="ref5"><label>5.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mitchell</surname> <given-names>AJ</given-names></name> <name><surname>Vaze</surname> <given-names>A</given-names></name> <name><surname>Rao</surname> <given-names>S</given-names></name></person-group>. <article-title>Clinical diagnosis of depression in primary care: a meta-analysis</article-title>. <source>Lancet</source>. (<year>2009</year>) <volume>374</volume>:<fpage>609</fpage>&#x2013;<lpage>19</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S0140-6736(09)60879-5</pub-id>, PMID: <pub-id pub-id-type="pmid">37338873</pub-id></citation></ref>
<ref id="ref6"><label>6.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>X</given-names></name> <name><surname>Faes</surname> <given-names>L</given-names></name> <name><surname>Kale</surname> <given-names>AU</given-names></name> <name><surname>Wagner</surname> <given-names>SK</given-names></name> <name><surname>Fu</surname> <given-names>DJ</given-names></name> <name><surname>Bruynseels</surname> <given-names>A</given-names></name> <etal/></person-group>. <article-title>A comparison of deep learning performance against health-care professionals in detecting diseases from medical imaging: a systematic review and meta-analysis</article-title>. <source>Lancet Digit Health</source>. (<year>2019</year>) <volume>1</volume>:<fpage>e271</fpage>&#x2013;<lpage>97</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S2589-7500(19)30123-2</pub-id>, PMID: <pub-id pub-id-type="pmid">33323251</pub-id></citation></ref>
<ref id="ref7"><label>7.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Parola</surname> <given-names>A</given-names></name> <name><surname>Simonsen</surname> <given-names>A</given-names></name> <name><surname>Bliksted</surname> <given-names>V</given-names></name> <name><surname>Fusaroli</surname> <given-names>R</given-names></name></person-group>. <article-title>Voice patterns in schizophrenia: a systematic review and Bayesian meta-analysis</article-title>. <source>Schizophr Res</source>. (<year>2020</year>) <volume>216</volume>:<fpage>24</fpage>&#x2013;<lpage>40</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.schres.2019.11.031</pub-id>, PMID: <pub-id pub-id-type="pmid">31839552</pub-id></citation></ref>
<ref id="ref8"><label>8.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cummins</surname> <given-names>N</given-names></name> <name><surname>Scherer</surname> <given-names>S</given-names></name> <name><surname>Krajewski</surname> <given-names>J</given-names></name> <name><surname>Schnieder</surname> <given-names>S</given-names></name> <name><surname>Epps</surname> <given-names>J</given-names></name> <name><surname>Quatieri</surname> <given-names>TF</given-names></name></person-group>. <article-title>A review of depression and suicide risk assessment using speech analysis</article-title>. <source>Speech Comm</source>. (<year>2015</year>) <volume>71</volume>:<fpage>10</fpage>&#x2013;<lpage>49</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.specom.2015.03.004</pub-id>, PMID: <pub-id pub-id-type="pmid">27409075</pub-id></citation></ref>
<ref id="ref9"><label>9.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Faurholt-Jepsen</surname> <given-names>M</given-names></name> <name><surname>Busk</surname> <given-names>J</given-names></name> <name><surname>Frost</surname> <given-names>M</given-names></name> <name><surname>Vinberg</surname> <given-names>M</given-names></name> <name><surname>Christensen</surname> <given-names>EM</given-names></name> <name><surname>Winther</surname> <given-names>O</given-names></name> <etal/></person-group>. <article-title>Voice analysis as an objective state marker in bipolar disorder</article-title>. <source>Nat Publ Group</source>. (<year>2016</year>) <volume>6</volume>:<fpage>856</fpage>. doi: <pub-id pub-id-type="doi">10.1038/tp.2016.123</pub-id></citation></ref>
<ref id="ref10"><label>10.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Maxhuni</surname> <given-names>A</given-names></name> <name><surname>Mu&#x00F1;oz-Mel&#x00E9;ndez</surname> <given-names>A</given-names></name> <name><surname>Osmani</surname> <given-names>V</given-names></name> <name><surname>Perez</surname> <given-names>H</given-names></name> <name><surname>Mayora</surname> <given-names>O</given-names></name> <name><surname>Morales</surname> <given-names>EF</given-names></name></person-group>. <article-title>Classification of bipolar disorder episodes based on analysis of voice and motor activity of patients</article-title>. <source>Pervasive Mobile Comput</source>. (<year>2016</year>) <volume>4</volume>:<fpage>841</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.pmcj.2016.01.008</pub-id></citation></ref>
<ref id="ref11"><label>11.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rapcan</surname> <given-names>V</given-names></name> <name><surname>D&#x2019;Arcy</surname> <given-names>S</given-names></name> <name><surname>Yeap</surname> <given-names>S</given-names></name> <name><surname>Afzal</surname> <given-names>N</given-names></name> <name><surname>Thakore</surname> <given-names>J</given-names></name> <name><surname>Reilly</surname> <given-names>RB</given-names></name></person-group>. <article-title>Acoustic and temporal analysis of speech: a potential biomarker for schizophrenia</article-title>. <source>Med Eng Phys</source>. (<year>2010</year>) <volume>32</volume>:<fpage>1074</fpage>&#x2013;<lpage>9</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.medengphy.2010.07.013</pub-id>, PMID: <pub-id pub-id-type="pmid">20692864</pub-id></citation></ref>
<ref id="ref12"><label>12.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Afshan</surname> <given-names>A</given-names></name> <name><surname>Guo</surname> <given-names>J</given-names></name> <name><surname>Park</surname> <given-names>SJ</given-names></name> <name><surname>Ravi</surname> <given-names>V</given-names></name> <name><surname>Flint</surname> <given-names>J</given-names></name> <name><surname>Alwan</surname> <given-names>A</given-names></name></person-group>. <article-title>Effectiveness of voice quality features in detecting depression</article-title>. <source>Interspeech</source>. (<year>2018</year>) <volume>2018</volume>:<fpage>1676</fpage>&#x2013;<lpage>80</lpage> doi: <pub-id pub-id-type="doi">10.21437/Interspeech.2018-1399</pub-id></citation></ref>
<ref id="ref13"><label>13.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>He</surname> <given-names>L</given-names></name> <name><surname>Cao</surname> <given-names>C</given-names></name></person-group>. <article-title>Automated depression analysis using convolutional neural networks from speech</article-title>. <source>J Biomed Inform</source>. (<year>2018</year>) <volume>83</volume>:<fpage>103</fpage>&#x2013;<lpage>11</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jbi.2018.05.007</pub-id>, PMID: <pub-id pub-id-type="pmid">29852317</pub-id></citation></ref>
<ref id="ref14"><label>14.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jiang</surname> <given-names>H</given-names></name> <name><surname>Hu</surname> <given-names>B</given-names></name> <name><surname>Liu</surname> <given-names>Z</given-names></name> <name><surname>Wang</surname> <given-names>G</given-names></name> <name><surname>Zhang</surname> <given-names>L</given-names></name> <name><surname>Li</surname> <given-names>X</given-names></name> <etal/></person-group>. <article-title>Detecting depression using an ensemble logistic regression model based on multiple speech features</article-title>. <source>Comput Math Methods Med</source>. (<year>2018</year>) <volume>2018</volume>:<fpage>6508319</fpage>. doi: <pub-id pub-id-type="doi">10.1155/2018/6508319</pub-id>, PMID: <pub-id pub-id-type="pmid">30344616</pub-id></citation></ref>
<ref id="ref15"><label>15.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pan</surname> <given-names>W</given-names></name> <name><surname>Flint</surname> <given-names>J</given-names></name> <name><surname>Shenhav</surname> <given-names>L</given-names></name> <name><surname>Liu</surname> <given-names>T</given-names></name> <name><surname>Liu</surname> <given-names>M</given-names></name> <name><surname>Hu</surname> <given-names>B</given-names></name> <etal/></person-group>. <article-title>Re-examining the robustness of voice features in predicting depression: compared with baseline of confounders</article-title>. <source>PLoS One</source>. (<year>2019</year>) <volume>14</volume>:<fpage>e0218172</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pone.0218172</pub-id>, PMID: <pub-id pub-id-type="pmid">31220113</pub-id></citation></ref>
<ref id="ref16"><label>16.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rohani</surname> <given-names>DA</given-names></name> <name><surname>Faurholt-Jepsen</surname> <given-names>M</given-names></name> <name><surname>Kessing</surname> <given-names>LV</given-names></name> <name><surname>Bardram</surname> <given-names>JE</given-names></name></person-group>. <article-title>Correlations between objective behavioral features collected from mobile and wearable devices and depressive mood symptoms in patients with affective disorders: systematic review</article-title>. <source>JMIR Mhealth Uhealth</source>. (<year>2018</year>) <volume>6</volume>:<fpage>9691</fpage>. doi: <pub-id pub-id-type="doi">10.2196/mhealth.9691</pub-id>, PMID: <pub-id pub-id-type="pmid">30104184</pub-id></citation></ref>
<ref id="ref17"><label>17.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Taguchi</surname> <given-names>T</given-names></name> <name><surname>Tachikawa</surname> <given-names>H</given-names></name> <name><surname>Nemoto</surname> <given-names>K</given-names></name> <name><surname>Suzuki</surname> <given-names>M</given-names></name> <name><surname>Nagano</surname> <given-names>T</given-names></name> <name><surname>Tachibana</surname> <given-names>R</given-names></name> <etal/></person-group>. <article-title>Major depressive disorder discrimination using vocal acoustic features</article-title>. <source>J Affect Disord</source>. (<year>2018</year>) <volume>225</volume>:<fpage>214</fpage>&#x2013;<lpage>20</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jad.2017.08.038</pub-id>, PMID: <pub-id pub-id-type="pmid">28841483</pub-id></citation></ref>
<ref id="ref18"><label>18.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>Y</given-names></name> <name><surname>Wang</surname> <given-names>YU</given-names></name> <name><surname>Wang</surname> <given-names>H</given-names></name> <name><surname>Liu</surname> <given-names>Z</given-names></name> <name><surname>Yu</surname> <given-names>X</given-names></name> <name><surname>Yan</surname> <given-names>J</given-names></name> <etal/></person-group>. <article-title>Prevalence of mental disorders in China: a cross-sectional epidemiological study</article-title>. <source>Lancet Psychiatry</source>. (<year>2019</year>) <volume>6</volume>:<fpage>211</fpage>&#x2013;<lpage>24</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S2215-0366(18)30511-X</pub-id>, PMID: <pub-id pub-id-type="pmid">30792114</pub-id></citation></ref>
<ref id="ref19"><label>19.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>El Ayadi</surname> <given-names>M</given-names></name> <name><surname>Kamel</surname> <given-names>MS</given-names></name> <name><surname>Karray</surname> <given-names>F</given-names></name></person-group>. <article-title>Survey on speech emotion recognition: features, classification schemes, and databases</article-title>. <source>Pattern Recogn</source>. (<year>2011</year>) <volume>44</volume>:<fpage>572</fpage>&#x2013;<lpage>87</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.patcog.2010.09.020</pub-id></citation></ref>
<ref id="ref20"><label>20.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Swain</surname> <given-names>M</given-names></name> <name><surname>Routray</surname> <given-names>A</given-names></name> <name><surname>Kabisatpathy</surname> <given-names>P</given-names></name></person-group>. <article-title>Databases, features and classifiers for speech emotion recognition: a review</article-title>. <source>Int J Speech Technol</source>. (<year>2018</year>) <volume>21</volume>:<fpage>93</fpage>&#x2013;<lpage>120</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10772-018-9491-z</pub-id>, PMID: <pub-id pub-id-type="pmid">28356908</pub-id></citation></ref>
<ref id="ref21"><label>21.</label><citation citation-type="book"><person-group person-group-type="author"><collab id="coll1">American Psychiatric Association</collab></person-group>. <source>Diagnostic and statistical manual of mental disorders (DSM-IV)</source> (<year>1994</year>). <publisher-loc>Washington, D.C</publisher-loc>: <publisher-name>American Psychiatric Pub</publisher-name></citation></ref>
<ref id="ref22"><label>22.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Espinola</surname> <given-names>CW</given-names></name> <name><surname>Gomes</surname> <given-names>JC</given-names></name> <name><surname>Pereira</surname> <given-names>JMS</given-names></name> <name><surname>dos Santos</surname> <given-names>WP</given-names></name></person-group>. <article-title>Vocal acoustic analysis and machine learning for the identification of schizophrenia</article-title>. <source>Res Biomed Eng</source>. (<year>2021</year>) <volume>37</volume>:<fpage>33</fpage>&#x2013;<lpage>46</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s42600-020-00097-1</pub-id></citation></ref>
<ref id="ref23"><label>23.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Weiner</surname> <given-names>L</given-names></name> <name><surname>Guidi</surname> <given-names>A</given-names></name> <name><surname>Doignon-Camus</surname> <given-names>N</given-names></name> <name><surname>Giersch</surname> <given-names>A</given-names></name> <name><surname>Bertschy</surname> <given-names>G</given-names></name> <name><surname>Vanello</surname> <given-names>N</given-names></name></person-group>. <article-title>Vocal features obtained through automated methods in verbal fluency tasks can aid the identification of mixed episodes in bipolar disorder</article-title>. <source>Transl Psychiatry</source>. (<year>2021</year>) <volume>11</volume>:<fpage>415</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41398-021-01535-z</pub-id>, PMID: <pub-id pub-id-type="pmid">34341338</pub-id></citation></ref>
<ref id="ref24"><label>24.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Higuchi</surname> <given-names>M</given-names></name> <name><surname>Tokuno</surname> <given-names>SHINICHI</given-names></name> <name><surname>Nakamura</surname> <given-names>M</given-names></name> <name><surname>Shinohara</surname> <given-names>SHUJI</given-names></name> <name><surname>Mitsuyoshi</surname> <given-names>S</given-names></name> <name><surname>Omiya</surname> <given-names>Y</given-names></name> <etal/></person-group>. <article-title>Classification of bipolar disorder, major depressive disorder, and healthy state using voice</article-title>. <source>Asian J Pharm Clin Res</source>. (<year>2018</year>) <volume>11</volume>:<fpage>89</fpage>&#x2013;<lpage>93</lpage>. doi: <pub-id pub-id-type="doi">10.22159/ajpcr.2018.v11s3.30042</pub-id></citation></ref>
<ref id="ref25"><label>25.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wanderley Espinola</surname> <given-names>C</given-names></name> <name><surname>Gomes</surname> <given-names>JC</given-names></name> <name><surname>M&#x00F4;nica Silva Pereira</surname> <given-names>J</given-names></name> <name><surname>dos Santos</surname> <given-names>WP</given-names></name></person-group>. <article-title>Detection of major depressive disorder, bipolar disorder, schizophrenia and generalized anxiety disorder using vocal acoustic analysis and machine learning: an exploratory study</article-title>. <source>Res Biomed Eng</source>. (<year>2022</year>) <volume>38</volume>:<fpage>813</fpage>&#x2013;<lpage>29</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s42600-022-00222-2</pub-id></citation></ref>
<ref id="ref26"><label>26.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhu</surname> <given-names>Y</given-names></name> <name><surname>Kim</surname> <given-names>YC</given-names></name> <name><surname>Proctor</surname> <given-names>MI</given-names></name> <name><surname>Narayanan</surname> <given-names>SS</given-names></name> <name><surname>Nayak</surname> <given-names>KS</given-names></name></person-group>. <article-title>Dynamic 3D visualization of vocal tract shaping during speech</article-title>. <source>IEEE Trans Med Imaging</source>. (<year>2013</year>) <volume>32</volume>:<fpage>838</fpage>&#x2013;<lpage>48</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TMI.2012.2230017</pub-id>, PMID: <pub-id pub-id-type="pmid">23204279</pub-id></citation></ref>
<ref id="ref27"><label>27.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tanha</surname> <given-names>J</given-names></name> <name><surname>Abdi</surname> <given-names>Y</given-names></name> <name><surname>Samadi</surname> <given-names>N</given-names></name> <name><surname>Razzaghi</surname> <given-names>N</given-names></name> <name><surname>Asadpour</surname> <given-names>M</given-names></name></person-group>. <article-title>Boosting methods for multi-class imbalanced data classification: an experimental review</article-title>. <source>J Big Data</source>. (<year>2020</year>) <volume>7</volume>:<fpage>1</fpage>&#x2013;<lpage>47</lpage>. doi: <pub-id pub-id-type="doi">10.1186/s40537-020-00349-y</pub-id></citation></ref>
<ref id="ref28"><label>28.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Batista</surname> <given-names>GE</given-names></name> <name><surname>Prati</surname> <given-names>RC</given-names></name> <name><surname>Monard</surname> <given-names>MC</given-names></name></person-group>. <article-title>A study of the behavior of several methods for balancing machine learning training data</article-title>. <source>ACM SIGKDD Explor Newsletter</source>. (<year>2004</year>) <volume>6</volume>:<fpage>20</fpage>&#x2013;<lpage>9</lpage>. doi: <pub-id pub-id-type="doi">10.1145/1007730.1007735</pub-id>, PMID: <pub-id pub-id-type="pmid">37343661</pub-id></citation></ref>
<ref id="ref29"><label>29.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Di</surname> <given-names>Y</given-names></name> <name><surname>Wang</surname> <given-names>J</given-names></name> <name><surname>Li</surname> <given-names>W</given-names></name> <name><surname>Zhu</surname> <given-names>T</given-names></name></person-group>. <article-title>Using i-vectors from voice features to identify major depressive disorder</article-title>. <source>J Affect Disord</source>. (<year>2021</year>) <volume>288</volume>:<fpage>161</fpage>&#x2013;<lpage>6</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jad.2021.04.004</pub-id>, PMID: <pub-id pub-id-type="pmid">33895418</pub-id></citation></ref>
<ref id="ref30"><label>30.</label><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Cummins</surname> <given-names>N.</given-names></name> <name><surname>Epps</surname> <given-names>J.</given-names></name> <name><surname>Breakspear</surname> <given-names>M.</given-names></name> <name><surname>Goecke</surname> <given-names>R.</given-names></name></person-group> (<year>2011</year>). <article-title>An investigation of depressed speech detection: features and normalization</article-title>. <conf-name>In 12th Annual Conference of the International Speech Communication Association</conf-name></citation></ref>
<ref id="ref31"><label>31.</label><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Cummins</surname> <given-names>N.</given-names></name> <name><surname>Epps</surname> <given-names>J.</given-names></name> <name><surname>Sethu</surname> <given-names>V.</given-names></name> <name><surname>Krajewski</surname> <given-names>J.</given-names></name></person-group> (<year>2014</year>). <article-title>Variability compensation in small data: oversampled extraction of i-vectors for the classification of depressed speech</article-title>. <conf-name>In 2014 IEEE international conference on acoustics, speech and signal processing (ICASSP)</conf-name> (<fpage>970</fpage>&#x2013;<lpage>974</lpage>). <publisher-name>IEEE</publisher-name>.</citation></ref>
<ref id="ref32"><label>32.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Joshi</surname> <given-names>J</given-names></name> <name><surname>Goecke</surname> <given-names>R</given-names></name> <name><surname>Alghowinem</surname> <given-names>S</given-names></name> <name><surname>Dhall</surname> <given-names>A</given-names></name> <name><surname>Wagner</surname> <given-names>M</given-names></name> <name><surname>Epps</surname> <given-names>J</given-names></name> <etal/></person-group>. <article-title>Multimodal assistive technologies for depression diagnosis and monitoring</article-title>. <source>J Multimodal User Interf</source>. (<year>2013</year>) <volume>7</volume>:<fpage>217</fpage>&#x2013;<lpage>28</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s12193-013-0123-2</pub-id></citation></ref>
<ref id="ref33"><label>33.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nasir</surname> <given-names>M</given-names></name> <name><surname>Jati</surname> <given-names>A</given-names></name> <name><surname>Shivakumar</surname> <given-names>PG</given-names></name> <name><surname>Chakravarthula</surname> <given-names>SN</given-names></name> <name><surname>Georgiou</surname> <given-names>P</given-names></name></person-group>. <article-title>Multimodal and multiresolution depression detection from speech and facial landmark features. AVEC 2016 &#x2013; proceedings of the 6th international workshop on audio/visual emotion challenge</article-title>. <source>Co-Located ACM Multimedia</source>. (<year>2016</year>) <volume>2016</volume>:<fpage>43</fpage>&#x2013;<lpage>50</lpage>. doi: <pub-id pub-id-type="doi">10.1145/2988257.2988261</pub-id></citation></ref>
<ref id="ref34"><label>34.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Hashim</surname> <given-names>N.</given-names></name> <name><surname>Wahidah</surname> <given-names>N. N.</given-names></name></person-group> (<year>2017</year>). <source>Analysis of power spectrum density of male speech as indicators for high risk and depressed decision (doctoral dissertation)</source>. <publisher-name>Vanderbilt University</publisher-name>.</citation></ref>
<ref id="ref35"><label>35.</label><citation citation-type="book"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Hu</surname> <given-names>B.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Liu</surname> <given-names>F.</given-names></name> <name><surname>Wang</surname> <given-names>G.</given-names></name> <name><surname>Yang</surname> <given-names>J.</given-names></name></person-group> (<year>2017</year>). <article-title>Detecting depression in speech under different speaking styles and emotional valences</article-title>. <source>International conference on brain informatics</source> (<fpage>261</fpage>&#x2013;<lpage>271</lpage>). <publisher-name>Springer</publisher-name>, <publisher-loc>Cham</publisher-loc>.</citation></ref>
<ref id="ref36"><label>36.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Simantiraki</surname> <given-names>O.</given-names></name> <name><surname>Charonyktakis</surname> <given-names>P.</given-names></name> <name><surname>Pampouchidou</surname> <given-names>A.</given-names></name> <name><surname>Tsiknakis</surname> <given-names>M.</given-names></name> <name><surname>Cooke</surname> <given-names>M.</given-names></name></person-group> (<year>2017</year>). <article-title>Glottal source features for automatic speech-based depression assessment</article-title>. <source>INTERSPEECH</source> (<fpage>2700</fpage>&#x2013;<lpage>2704</lpage>). <publisher-loc>Stockholm, Sweden</publisher-loc>.</citation></ref>
<ref id="ref37"><label>37.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pan</surname> <given-names>W</given-names></name> <name><surname>Wang</surname> <given-names>J</given-names></name> <name><surname>Liu</surname> <given-names>T</given-names></name> <name><surname>Liu</surname> <given-names>X</given-names></name> <name><surname>Liu</surname> <given-names>M</given-names></name> <name><surname>Hu</surname> <given-names>B</given-names></name> <etal/></person-group>. <article-title>Depression recognition based on speech analysis</article-title>. <source>Chin Sci Bull</source>. (<year>2018</year>) <volume>63</volume>:<fpage>2081</fpage>&#x2013;<lpage>92</lpage>. doi: <pub-id pub-id-type="doi">10.1360/N972017-01250</pub-id>, PMID: <pub-id pub-id-type="pmid">36573900</pub-id></citation></ref>
<ref id="ref38"><label>38.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>J</given-names></name> <name><surname>Zhang</surname> <given-names>L</given-names></name> <name><surname>Liu</surname> <given-names>T</given-names></name> <name><surname>Pan</surname> <given-names>W</given-names></name> <name><surname>Hu</surname> <given-names>B</given-names></name> <name><surname>Zhu</surname> <given-names>T</given-names></name></person-group>. <article-title>Acoustic differences between healthy and depressed people: a cross-situation study</article-title>. <source>BMC Psychiatry</source>. (<year>2019</year>) <volume>19</volume>:<fpage>300</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12888-019-2300-7</pub-id></citation></ref>
<ref id="ref39"><label>39.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Good</surname> <given-names>PI</given-names></name></person-group>. <article-title>Permutation, parametric and bootstrap tests of hypotheses: a practical guide to resampling methods for testing hypotheses</article-title> (<year>2005</year>) <volume>100</volume></citation></ref>
<ref id="ref40"><label>40.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cenzer</surname> <given-names>I</given-names></name> <name><surname>Boscardin</surname> <given-names>WJ</given-names></name> <name><surname>Berger</surname> <given-names>K</given-names></name></person-group>. <article-title>Performance of matching methods in studies of rare diseases: a simulation study. Intractable &#x0026; Rare Diseases</article-title>. <source>Research</source>. (<year>2020</year>) <volume>9</volume>:<fpage>79</fpage>&#x2013;<lpage>88</lpage>. doi: <pub-id pub-id-type="doi">10.5582/irdr.2020.01016</pub-id>, PMID: <pub-id pub-id-type="pmid">32494554</pub-id></citation></ref>
<ref id="ref41"><label>41.</label><citation citation-type="book"><person-group person-group-type="author"><collab id="coll2">RStudioT</collab></person-group>. <source>RStudio: Integrated development for R</source>. <publisher-loc>Boston, MA</publisher-loc>: <publisher-name>Rstudio Team, PBC</publisher-name> (<year>2020</year>) <comment>Available at:</comment> <ext-link xlink:href="https://www.rstudio.com" ext-link-type="uri">https://www.rstudio.com</ext-link>.</citation></ref>
<ref id="ref42"><label>42.</label><citation citation-type="book"><person-group person-group-type="author"><name><surname>Matlab</surname> <given-names>S</given-names></name></person-group>. <source>Matlab</source>. <publisher-loc>Natick, MA</publisher-loc>: <publisher-name>The MathWorks</publisher-name> (<year>2012</year>).</citation></ref>
<ref id="ref43"><label>43.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dehak</surname> <given-names>N</given-names></name> <name><surname>Kenny</surname> <given-names>PJ</given-names></name> <name><surname>Dehak</surname> <given-names>R</given-names></name> <name><surname>Dumouchel</surname> <given-names>P</given-names></name> <name><surname>Ouellet</surname> <given-names>P</given-names></name></person-group>. <article-title>Front-end factor analysis for speaker verification</article-title>. <source>IEEE Trans Audio Speech Lang Process</source>. (<year>2011</year>) <volume>19</volume>:<fpage>788</fpage>&#x2013;<lpage>98</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TASL.2010.2064307</pub-id></citation></ref>
<ref id="ref44"><label>44.</label><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Povey</surname> <given-names>D.</given-names></name> <name><surname>Ghoshal</surname> <given-names>A.</given-names></name> <name><surname>Boulianne</surname> <given-names>G.</given-names></name> <name><surname>Burget</surname> <given-names>L.</given-names></name> <name><surname>Glembek</surname> <given-names>O.</given-names></name> <name><surname>Goel</surname> <given-names>N.</given-names></name> <etal/></person-group>, (<year>2011</year>). <article-title>The Kaldi speech recognition toolkit</article-title>. <conf-name>IEEE 2011 workshop on automatic speech recognition and understanding (no. CONF)</conf-name>. <publisher-name>IEEE Signal Processing Society</publisher-name>.</citation></ref>
<ref id="ref45"><label>45.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Friedman</surname> <given-names>J</given-names></name> <name><surname>Hastie</surname> <given-names>T</given-names></name> <name><surname>Tibshirani</surname> <given-names>R</given-names></name></person-group>. <article-title>Regularization paths for generalized linear models via coordinate descent</article-title>. <source>J Stat Softw</source>. (<year>2010</year>) <volume>33</volume>:<fpage>1</fpage>&#x2013;<lpage>22</lpage>. PMID: <pub-id pub-id-type="pmid">20808728</pub-id></citation></ref>
<ref id="ref46"><label>46.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dehejia</surname> <given-names>RH</given-names></name> <name><surname>Wahba</surname> <given-names>S</given-names></name></person-group>. <article-title>Propensity score-matching methods for nonexperimental causal studies</article-title>. <source>Rev Econ Stat</source>. (<year>2002</year>) <volume>84</volume>:<fpage>151</fpage>&#x2013;<lpage>61</lpage>. doi: <pub-id pub-id-type="doi">10.1162/003465302317331982</pub-id></citation></ref>
<ref id="ref47"><label>47.</label><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Alghowinem</surname> <given-names>S.</given-names></name> <name><surname>Goecke</surname> <given-names>R.</given-names></name> <name><surname>Wagner</surname> <given-names>M.</given-names></name> <name><surname>Epps</surname> <given-names>J.</given-names></name> <name><surname>Breakspear</surname> <given-names>M.</given-names></name> <name><surname>Parker</surname> <given-names>G.</given-names></name></person-group> (<year>2013</year>). <article-title>Detecting depression: a comparison between spontaneous and read speech</article-title>. <conf-name>ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing &#x2013; Proceedings</conf-name>, <fpage>7547</fpage>&#x2013;<lpage>7551</lpage></citation></ref>
<ref id="ref48"><label>48.</label><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Horwitz</surname> <given-names>R.</given-names></name> <name><surname>Quatieri</surname> <given-names>T. F.</given-names></name> <name><surname>Helfer</surname> <given-names>B. S.</given-names></name> <name><surname>Yu</surname> <given-names>B.</given-names></name> <name><surname>Williamson</surname> <given-names>J. R.</given-names></name> <name><surname>Mundt</surname> <given-names>J.</given-names></name></person-group> (<year>2013</year>). <article-title>On the relative importance of vocal source, system, and prosody in human depression</article-title>. <conf-name>2013 IEEE international conference on body sensor networks, BSN 2013</conf-name></citation></ref>
<ref id="ref49"><label>49.</label><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Quatieri</surname> <given-names>T. F.</given-names></name> <name><surname>Malyska</surname> <given-names>N.</given-names></name></person-group> (<year>2012</year>). <article-title>Vocal-source biomarkers for depression: A link to psychomotor activity</article-title>. <conf-name>In 13th Annual Conference of the International Speech Communication Association</conf-name>.</citation></ref>
<ref id="ref50"><label>50.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sidorov</surname> <given-names>M</given-names></name> <name><surname>Minker</surname> <given-names>W</given-names></name></person-group>. <article-title>Emotion recognition and depression diagnosis by acoustic and visual features: a multimodal approach. AVEC 2014 &#x2013; proceedings of the 4th international workshop on audio/visual emotion challenge</article-title>. <source>Workshop MM</source>. (<year>2014</year>) <volume>2014</volume>:<fpage>81</fpage>&#x2013;<lpage>6</lpage>. doi: <pub-id pub-id-type="doi">10.1145/2661806.2661816</pub-id></citation></ref>
<ref id="ref51"><label>51.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Faurholt-Jepsen</surname> <given-names>M</given-names></name> <name><surname>Rohani</surname> <given-names>DA</given-names></name> <name><surname>Busk</surname> <given-names>J</given-names></name> <name><surname>Vinberg</surname> <given-names>M</given-names></name> <name><surname>Bardram</surname> <given-names>JE</given-names></name> <name><surname>Kessing</surname> <given-names>LV</given-names></name></person-group>. <article-title>Voice analyses using smartphone-based data in patients with bipolar disorder, unaffected relatives and healthy control individuals, and during different affective states</article-title>. <source>Int J Bipolar Disorders</source>. (<year>2021</year>) <volume>9</volume>:<fpage>1</fpage>&#x2013;<lpage>13</lpage>. doi: <pub-id pub-id-type="doi">10.1186/s40345-021-00243-3</pub-id></citation></ref>
<ref id="ref52"><label>52.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tahir</surname> <given-names>Y</given-names></name> <name><surname>Yang</surname> <given-names>Z</given-names></name> <name><surname>Chakraborty</surname> <given-names>D</given-names></name> <name><surname>Thalmann</surname> <given-names>N</given-names></name> <name><surname>Thalmann</surname> <given-names>D</given-names></name> <name><surname>Maniam</surname> <given-names>Y</given-names></name> <etal/></person-group>. <article-title>Non-verbal speech cues as objective measures for negative symptoms in patients with schizophrenia</article-title>. <source>PLoS One</source>. (<year>2019</year>) <volume>14</volume>:<fpage>e0214314</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pone.0214314</pub-id>, PMID: <pub-id pub-id-type="pmid">30964869</pub-id></citation></ref>
<ref id="ref53"><label>53.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>France</surname> <given-names>DJ</given-names></name> <name><surname>Shiavi</surname> <given-names>RG</given-names></name></person-group>. <article-title>Acoustical properties of speech as indicators of depression and suicidal risk</article-title>. <source>IEEE Trans Biomed Eng</source>. (<year>2000</year>) <volume>47</volume>:<fpage>829</fpage>&#x2013;<lpage>37</lpage>. doi: <pub-id pub-id-type="doi">10.1109/10.846676</pub-id>, PMID: <pub-id pub-id-type="pmid">10916253</pub-id></citation></ref>
<ref id="ref54"><label>54.</label><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Low</surname> <given-names>L. S. A.</given-names></name> <name><surname>Maddage</surname> <given-names>N. C.</given-names></name> <name><surname>Lech</surname> <given-names>M.</given-names></name> <name><surname>Sheeber</surname> <given-names>L.</given-names></name> <name><surname>Allen</surname> <given-names>N.</given-names></name></person-group> (<year>2010</year>). <article-title>Influence of acoustic low-level descriptors in the detection of clinical depression in adolescents</article-title>. <conf-name>ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings</conf-name>, <fpage>5154</fpage>&#x2013;<lpage>5157</lpage></citation></ref>
<ref id="ref55"><label>55.</label><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Moore</surname> <given-names>E.</given-names></name> <name><surname>Clements</surname> <given-names>M.</given-names></name> <name><surname>Peifer</surname> <given-names>J.</given-names></name> <name><surname>Weisser</surname> <given-names>L.</given-names></name></person-group> (<year>2003</year>). <article-title>Analysis of prosodic variation in speech for clinical depression</article-title>. <conf-name>Annual International Conference of the IEEE Engineering in Medicine and Biology &#x2013; Proceedings</conf-name>, <volume>3</volume>, <fpage>2925</fpage>&#x2013;<lpage>2928</lpage></citation></ref>
<ref id="ref56"><label>56.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mundt</surname> <given-names>JC</given-names></name> <name><surname>Snyder</surname> <given-names>PJ</given-names></name> <name><surname>Cannizzaro</surname> <given-names>MS</given-names></name> <name><surname>Chappie</surname> <given-names>K</given-names></name> <name><surname>Geralts</surname> <given-names>DS</given-names></name></person-group>. <article-title>Voice acoustic measures of depression severity and treatment response collected via interactive voice response (IVR) technology</article-title>. <source>J Neurolinguistics</source>. (<year>2007</year>) <volume>20</volume>:<fpage>50</fpage>&#x2013;<lpage>64</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jneuroling.2006.04.001</pub-id>, PMID: <pub-id pub-id-type="pmid">21253440</pub-id></citation></ref>
<ref id="ref57"><label>57.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ozdas</surname> <given-names>A</given-names></name> <name><surname>Shiavi</surname> <given-names>RG</given-names></name> <name><surname>Silverman</surname> <given-names>SE</given-names></name> <name><surname>Silverman</surname> <given-names>MK</given-names></name> <name><surname>Wilkes</surname> <given-names>DM</given-names></name></person-group>. <article-title>Investigation of vocal jitter and glottal flow spectrum as possible cues for depression and near-term suicidal risk</article-title>. <source>IEEE Trans Biomed Eng</source>. (<year>2004</year>) <volume>51</volume>:<fpage>1530</fpage>&#x2013;<lpage>40</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TBME.2004.827544</pub-id>, PMID: <pub-id pub-id-type="pmid">15376501</pub-id></citation></ref>
<ref id="ref58"><label>58.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Trevino</surname> <given-names>AC</given-names></name> <name><surname>Quatieri</surname> <given-names>TF</given-names></name> <name><surname>Malyska</surname> <given-names>N</given-names></name></person-group>. <article-title>Phonologically-based biomarkers for major depressive disorder</article-title>. <source>EURASIP J Adv Signal Process</source>. (<year>2011</year>) <volume>2011</volume>:<fpage>42</fpage>. doi: <pub-id pub-id-type="doi">10.1186/1687-6180-2011-42</pub-id></citation></ref>
<ref id="ref59"><label>59.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alpert</surname> <given-names>M</given-names></name> <name><surname>Rosenberg</surname> <given-names>SD</given-names></name> <name><surname>Pouget</surname> <given-names>ER</given-names></name> <name><surname>Shaw</surname> <given-names>RJ</given-names></name></person-group>. <article-title>Prosody and lexical accuracy in flat affect schizophrenia</article-title>. <source>Psychiatry Res</source>. (<year>2000</year>) <volume>97</volume>:<fpage>107</fpage>&#x2013;<lpage>18</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S0165-1781(00)00231-6</pub-id></citation></ref>
<ref id="ref60"><label>60.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cohen</surname> <given-names>AS</given-names></name> <name><surname>Mitchell</surname> <given-names>KR</given-names></name> <name><surname>Docherty</surname> <given-names>NM</given-names></name> <name><surname>Horan</surname> <given-names>WP</given-names></name></person-group>. <article-title>Vocal expression in schizophrenia: less than meets the ear</article-title>. <source>J Abnorm Psychol</source>. (<year>2016</year>) <volume>125</volume>:<fpage>299</fpage>&#x2013;<lpage>309</lpage>. doi: <pub-id pub-id-type="doi">10.1037/abn0000136</pub-id>, PMID: <pub-id pub-id-type="pmid">26854511</pub-id></citation></ref>
<ref id="ref61"><label>61.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cohen</surname> <given-names>AS</given-names></name> <name><surname>Najolia</surname> <given-names>GM</given-names></name> <name><surname>Kim</surname> <given-names>Y</given-names></name> <name><surname>Dinzeo</surname> <given-names>TJ</given-names></name></person-group>. <article-title>On the boundaries of blunt affect/alogia across severe mental illness: implications for research domain criteria</article-title>. <source>Schizophr Res</source>. (<year>2012</year>) <volume>140</volume>:<fpage>41</fpage>&#x2013;<lpage>5</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.schres.2012.07.001</pub-id>, PMID: <pub-id pub-id-type="pmid">22831770</pub-id></citation></ref>
<ref id="ref62"><label>62.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Galynker</surname> <given-names>II</given-names></name> <name><surname>Cohen</surname> <given-names>LJ</given-names></name> <name><surname>Cai</surname> <given-names>J</given-names></name></person-group>. <article-title>Negative symptoms in patients with major depressive disorder: a preliminary report</article-title>. <source>Neuropsychiatry Neuropsychol Behav Neurol</source>. (<year>2000</year>) <volume>13</volume>:<fpage>171</fpage>&#x2013;<lpage>6</lpage>. PMID: <pub-id pub-id-type="pmid">10910087</pub-id> Available at: <ext-link xlink:href="https://pubmed.ncbi.nlm.nih.gov/10910087/" ext-link-type="uri">https://pubmed.ncbi.nlm.nih.gov/10910087/</ext-link></citation></ref>
<ref id="ref63"><label>63.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hoekert</surname> <given-names>M</given-names></name> <name><surname>Kahn</surname> <given-names>RS</given-names></name> <name><surname>Pijnenborg</surname> <given-names>M</given-names></name> <name><surname>Aleman</surname> <given-names>A</given-names></name></person-group>. <article-title>Impaired recognition and expression of emotional prosody in schizophrenia: review and meta-analysis</article-title>. <source>Schizophr Res</source>. (<year>2007</year>) <volume>96</volume>:<fpage>135</fpage>&#x2013;<lpage>45</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.schres.2007.07.023</pub-id>, PMID: <pub-id pub-id-type="pmid">17766089</pub-id></citation></ref>
<ref id="ref64"><label>64.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Millan</surname> <given-names>MJ</given-names></name> <name><surname>Fone</surname> <given-names>K</given-names></name> <name><surname>Steckler</surname> <given-names>T</given-names></name> <name><surname>Horan</surname> <given-names>WP</given-names></name></person-group>. <article-title>Negative symptoms of schizophrenia: clinical characteristics, pathophysiological substrates, experimental models and prospects for improved treatment</article-title>. <source>Eur Neuropsychopharmacol</source>. (<year>2014</year>) <volume>24</volume>:<fpage>645</fpage>&#x2013;<lpage>92</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.euroneuro.2014.03.008</pub-id>, PMID: <pub-id pub-id-type="pmid">24820238</pub-id></citation></ref>
<ref id="ref65"><label>65.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tr&#x00E9;meau</surname> <given-names>F</given-names></name> <name><surname>Malaspina</surname> <given-names>D</given-names></name> <name><surname>Duval</surname> <given-names>F</given-names></name> <name><surname>Corr&#x00EA;a</surname> <given-names>H</given-names></name> <name><surname>Hager-Budny</surname> <given-names>M</given-names></name> <name><surname>Coin-Bariou</surname> <given-names>L</given-names></name> <etal/></person-group>. <article-title>Facial expressiveness in patients with schizophrenia compared to depressed patients and nonpatient comparison subjects</article-title>. <source>Am J Psychiatr</source>. (<year>2005</year>) <volume>162</volume>:<fpage>92</fpage>&#x2013;<lpage>101</lpage>. doi: <pub-id pub-id-type="doi">10.1176/appi.ajp.162.1.92</pub-id>, PMID: <pub-id pub-id-type="pmid">15625206</pub-id></citation></ref>
<ref id="ref66"><label>66.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Grande</surname> <given-names>I</given-names></name> <name><surname>Berk</surname> <given-names>M</given-names></name> <name><surname>Birmaher</surname> <given-names>B</given-names></name> <name><surname>Vieta</surname> <given-names>E</given-names></name></person-group>. <article-title>Bipolar disorder</article-title>. <source>Lancet</source>. (<year>2016</year>) <volume>387</volume>:<fpage>1561</fpage>&#x2013;<lpage>72</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S0140-6736(15)00241-X</pub-id>, PMID: <pub-id pub-id-type="pmid">37402854</pub-id></citation></ref>
<ref id="ref67"><label>67.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Judd</surname> <given-names>LL</given-names></name> <name><surname>Schettler</surname> <given-names>PJ</given-names></name> <name><surname>Akiskal</surname> <given-names>HS</given-names></name> <name><surname>Coryell</surname> <given-names>W</given-names></name> <name><surname>Leon</surname> <given-names>AC</given-names></name> <name><surname>Maser</surname> <given-names>JD</given-names></name> <etal/></person-group>. <article-title>Residual symptom recovery from major affective episodes in bipolar disorders and rapid episode relapse/recurrence</article-title>. <source>Arch Gen Psychiatry</source>. (<year>2008</year>) <volume>65</volume>:<fpage>386</fpage>&#x2013;<lpage>94</lpage>. doi: <pub-id pub-id-type="doi">10.1001/archpsyc.65.4.386</pub-id>, PMID: <pub-id pub-id-type="pmid">18391127</pub-id></citation></ref>
</ref-list>
</back>
</article>
