<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Ment Health</journal-id><journal-id journal-id-type="publisher-id">mental</journal-id><journal-title>JMIR Mental Health</journal-title><abbrev-journal-title>JMIR Ment Health</abbrev-journal-title><issn pub-type="epub">2368-7959</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v11i1e64578</article-id><article-id pub-id-type="doi">10.2196/64578</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Momentary Depression Severity Prediction in Patients With Acute Depression Who Undergo Sleep Deprivation Therapy: Speech-Based Machine Learning Approach</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Hartnagel</surname><given-names>Lisa-Marie</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Emden</surname><given-names>Daniel</given-names></name><degrees>Dipl-Inf</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Foo</surname><given-names>Jerome C</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="aff" rid="aff4">4</xref><xref ref-type="aff" rid="aff5">5</xref><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Streit</surname><given-names>Fabian</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="aff" rid="aff7">7</xref><xref ref-type="aff" rid="aff8">8</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Witt</surname><given-names>Stephanie H</given-names></name><degrees>PD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Frank</surname><given-names>Josef</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Limberger</surname><given-names>Matthias F</given-names></name><degrees>MA</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Schmitz</surname><given-names>Sara E</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Gilles</surname><given-names>Maria</given-names></name><degrees>Dr med</degrees><xref ref-type="aff" rid="aff7">7</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Rietschel</surname><given-names>Marcella</given-names></name><degrees>Prof Dr</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hahn</surname><given-names>Tim</given-names></name><degrees>Prof Dr</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ebner-Priemer</surname><given-names>Ulrich W</given-names></name><degrees>Prof Dr</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff7">7</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Sirignano</surname><given-names>Lea</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib></contrib-group><aff id="aff1"><institution>Mental mHealth Lab, Institute of Sports and Sports Science, Karlsruhe Institute of Technology</institution><addr-line>Hertzstr. 16, Building 06.31</addr-line><addr-line>Karlsruhe</addr-line><country>Germany</country></aff><aff id="aff2"><institution>Medical Machine Learning Lab, Institute for Translational Psychiatry, University of M&#x00FC;nster</institution><addr-line>M&#x00FC;nster</addr-line><country>Germany</country></aff><aff id="aff3"><institution>Department of Genetic Epidemiology in Psychiatry, Central Institute of Mental Health, Medical Faculty Mannheim / Heidelberg University</institution><addr-line>Mannheim</addr-line><country>Germany</country></aff><aff id="aff4"><institution>Institute for Psychopharmacology, Central Institute of Mental Health, Medical Faculty Mannheim / Heidelberg University</institution><addr-line>Mannheim</addr-line><country>Germany</country></aff><aff id="aff5"><institution>Neuroscience and Mental Health Institute, University of Alberta</institution><addr-line>Edmonton</addr-line><addr-line>AB</addr-line><country>Canada</country></aff><aff id="aff6"><institution>Department of Psychiatry, College of Health Sciences, University of Alberta</institution><addr-line>Edmonton</addr-line><addr-line>AB</addr-line><country>Canada</country></aff><aff id="aff7"><institution>Department of Psychiatry and Psychotherapy, Central Institute of Mental Health, Medical Faculty Mannheim / Heidelberg University</institution><addr-line>Mannheim</addr-line><country>Germany</country></aff><aff id="aff8"><institution>Hector Institute for Artificial Intelligence in Psychiatry, Central Institute of Mental Health, Medical Faculty Mannheim / Heidelberg University</institution><addr-line>Mannheim</addr-line><country>Germany</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Torous</surname><given-names>John</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Malhotra</surname><given-names>Meetu</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Pan</surname><given-names>Yue</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Lisa-Marie Hartnagel, MSc, Mental mHealth Lab, Institute of Sports and Sports Science, Karlsruhe Institute of Technology, Hertzstr. 16, Building 06.31, Karlsruhe, 76187, Germany, 49 721 608 47543; <email>lisa.hartnagel@kit.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2024</year></pub-date><pub-date pub-type="epub"><day>23</day><month>12</month><year>2024</year></pub-date><volume>11</volume><elocation-id>e64578</elocation-id><history><date date-type="received"><day>20</day><month>07</month><year>2024</year></date><date date-type="rev-recd"><day>02</day><month>10</month><year>2024</year></date><date date-type="accepted"><day>04</day><month>10</month><year>2024</year></date></history><copyright-statement>&#x00A9; Lisa-Marie Hartnagel, Daniel Emden, Jerome C Foo, Fabian Streit, Stephanie H Witt, Josef Frank, Matthias F Limberger, Sara E Schmitz, Maria Gilles, Marcella Rietschel, Tim Hahn, Ulrich W Ebner-Priemer, Lea Sirignano. Originally published in JMIR Mental Health (<ext-link ext-link-type="uri" xlink:href="https://mental.jmir.org">https://mental.jmir.org</ext-link>), 23.12.2024. </copyright-statement><copyright-year>2024</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Mental Health, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://mental.jmir.org/">https://mental.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://mental.jmir.org/2024/1/e64578"/><abstract><sec><title>Background</title><p>Mobile devices for remote monitoring are inevitable tools to support treatment and patient care, especially in recurrent diseases such as major depressive disorder. The aim of this study was to learn if machine learning (ML) models based on longitudinal speech data are helpful in predicting momentary depression severity. Data analyses were based on a dataset including 30 inpatients during an acute depressive episode receiving sleep deprivation therapy in stationary care, an intervention inducing a rapid change in depressive symptoms in a relatively short period of time. Using an ambulatory assessment approach, we captured speech samples and assessed concomitant depression severity via self-report questionnaire over the course of 3 weeks (before, during, and after therapy). We extracted 89 speech features from the speech samples using the Extended Geneva Minimalistic Acoustic Parameter Set from the Open-Source Speech and Music Interpretation by Large-Space Extraction (audEERING) toolkit and the additional parameter speech rate.</p></sec><sec><title>Objective</title><p>We aimed to understand if a multiparameter ML approach would significantly improve the prediction compared to previous statistical analyses, and, in addition, which mechanism for splitting training and test data was most successful, especially focusing on the idea of personalized prediction.</p></sec><sec sec-type="methods"><title>Methods</title><p>To do so, we trained and evaluated a set of &#x003E;500 ML pipelines including random forest, linear regression, support vector regression, and Extreme Gradient Boosting regression models and tested them on 5 different train-test split scenarios: a group 5-fold nested cross-validation at the subject level, a leave-one-subject-out approach, a chronological split, an odd-even split, and a random split.</p></sec><sec sec-type="results"><title>Results</title><p>In the 5-fold cross-validation, the leave-one-subject-out, and the chronological split approaches, none of the models were statistically different from random chance. The other two approaches produced significant results for at least one of the models tested, with similar performance. In total, the superior model was an Extreme Gradient Boosting in the odd-even split approach (<italic>R</italic>&#x00B2;=0.339, mean absolute error=0.38; both <italic>P</italic>&#x003C;.001), indicating that 33.9% of the variance in depression severity could be predicted by the speech features.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Overall, our analyses highlight that ML fails to predict depression scores of unseen patients, but prediction performance increased strongly compared to our previous analyses with multilevel models. We conclude that future personalized ML models might improve prediction performance even more, leading to better patient management and care.</p></sec></abstract><kwd-group><kwd>ambulatory assessment</kwd><kwd>depression</kwd><kwd>speech features</kwd><kwd>openSMILE</kwd><kwd>machine learning</kwd><kwd>sleep deprivation therapy</kwd><kwd>remote monitoring</kwd><kwd>depressive disorder</kwd><kwd>mobile phone</kwd><kwd>digital health</kwd><kwd>mobile health</kwd><kwd>mHealth</kwd><kwd>mental health</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Major depressive disorder (MDD) is a major global public health challenge imposing a substantial burden on individuals and society as a whole [<xref ref-type="bibr" rid="ref1">1</xref>]. Due to the recurrent nature of MDD in many patients, relapse prevention is an important treatment goal [<xref ref-type="bibr" rid="ref2">2</xref>]. Longitudinal symptom monitoring is crucial, especially for relapse prevention [<xref ref-type="bibr" rid="ref2">2</xref>], as mood deterioration and prodromal symptoms can be detected in time and additional treatment can be initiated before a severe episode fully develops. However, traditional retrospective symptom questionnaires and classification interviews typically consider the last two weeks of symptoms [<xref ref-type="bibr" rid="ref3">3</xref>], which might not be useful for the rapid detection of impending prodromal symptoms. More specifically, even an unrealistic scenario of conducting classification interviews every 2 weeks might delay the detection of a new episode by weeks [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. Accordingly, approaches are needed that operate at a higher frequency, enabling us to detect prodromal symptoms, for example, on a daily basis.</p><p>Leveraging on smartphone-based data collection, promising avenues are being opened to support the traditional monitoring of MDD symptoms [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. Offering continuous, unobtrusive, near&#x2013;real-time, active and passive everyday life data collection, the use of ambulatory assessment (AA) increases ecologically valid insights into the lives of people living with mental disorders [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. Widespread personal digital devices such as smartphones are used to capture momentary self-reported symptoms and behaviors as patients go about their normal daily activities in their natural environment [<xref ref-type="bibr" rid="ref10">10</xref>]. As clear biomarkers for MDD are lacking [<xref ref-type="bibr" rid="ref11">11</xref>], the identification of behavioral markers that can be objectively derived from digitally captured everyday life behavior has great potential to increase automated detection of new episodes, ultimately improving depression care [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>].</p><p>Speech has been discussed as one such potential behavioral marker [<xref ref-type="bibr" rid="ref14">14</xref>]. As early as 1921, Kraepelin [<xref ref-type="bibr" rid="ref15">15</xref>] observed that patients with MDD tended to speak with a lower speech rate, more monotonously, and at a lower pitch compared to healthy individuals. Since then, many studies have described further depression-related altered speech characteristics [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. However, the research field faces several challenges such as the sheer limitless volume of potential speech features. Inference statistics require a theory-driven selection of parameters, as combining thousands of them increases the &#x03B1; error [<xref ref-type="bibr" rid="ref17">17</xref>]. Machine learning (ML) techniques offer a data-driven alternative, allowing a variety of parameters to be explored without the need for a priori parameter restriction.</p><p>Most studies investigating speech in MDD (independent of using ML or classical inferential statistics) use case-control designs, comparing speech samples (or often a single sample) of patients with MDD to healthy controls [<xref ref-type="bibr" rid="ref14">14</xref>]. While this approach is initially useful, it does not address the prediction of upcoming episodes. To predict new emerging episodes or prodromal symptoms, we need patient data before an episode and during an emerging episode with prodromal symptoms; even better is to collect data during and after an episode. Such data would allow us to train models to discriminate between healthy, prodromal, and disordered states on a within-person level or to relate speech features to dimensional symptomatology. This would approximate the ultimate goal in clinical practice, namely to decide within a given patient that yesterday&#x2019;s speech features were normal, but today&#x2019;s speech features predict an emerging episode. Unfortunately, longitudinal studies of patients with MDD including regular speech samples, regular psychopathological ratings as ground truth and sufficient variance in this ground truth, that is, changes in healthy and disordered states, are rare [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref16">16</xref>].</p><p>To address this gap, we used a longitudinal dataset in which repeated assessments of depressive momentary states and speech features derived from selfie videos were collected concomitantly by patients with an acute depressive episode [<xref ref-type="bibr" rid="ref18">18</xref>]. While Wadle and colleagues [<xref ref-type="bibr" rid="ref18">18</xref>] used classical statistics (multilevel models) and focused on 3 specific, theory-driven speech features (speech rate, speech pauses, and pitch variability), which did indeed show associations with depression severity, we wanted to improve on several levels. Given the large number of speech features available, the aim of this study was to extend our previous findings by examining a comprehensive set of 89 speech features and by using more complex modeling approaches in terms of ML. We aim to contribute to this field, as we only identified 3 ML studies using longitudinally assessed data in a clinical (as opposed to subclinical) population with multiple data points per patient to predict depression severity based on speech features.</p><p>In one of the studies, speech samples and concomitant mood self-ratings were collected from 30 patients with MDD via AA over the course of 2 weeks [<xref ref-type="bibr" rid="ref19">19</xref>]. ML analyses revealed a correlation of &#x03C1;=.61 between the actual and predicted mood scores, and an improvement in prediction when using personalized (&#x03C1;=.79) instead of nonpersonalized models.</p><p>The most promising dataset at present is from the consortium of the Remote Assessment of Disease and Relapse&#x2014;Central Nervous System (RADAR&#x2014;CNS) project, with 2 relevant publications [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. In the study by Cummins et al [<xref ref-type="bibr" rid="ref21">21</xref>], speech data were collected in the form of a scripted task and a free-response task from 461 patients with MDD every 2 weeks for 18 months. A set of 28 speech features was analyzed using linear mixed models. Associations were found between elevated depression symptoms and speech rate, articulation rate, and speech intensity. However, the authors mention in their limitations that the results are based on the cohort level, which limits insights into intraindividual depression-related speech changes, which they plan to investigate in the future. The other publication from the RADAR&#x2014;CNS project focused on the benefits of model personalization [<xref ref-type="bibr" rid="ref20">20</xref>]. Data from the scripted (n=271) and free response (n=258) task from a subset of patients were used to explore personalized and generalized ML models. Three speech parameter sets were extracted from a total of 8004 speech samples, with personalization proving beneficial for their binary depression classification (high or low depression severity). Specifically, running a support vector regression (SVR) classifier based on the extended version of the Geneva Minimalistic Acoustic Parameter Set (eGeMAPS, audEERING) from the free-response task for this binary decision resulted in better performance for the personalized compared to the generalized models.</p><p>Building on previous work by the authors [<xref ref-type="bibr" rid="ref18">18</xref>], we aim to contribute to closing this gap and to the understanding of speech-based longitudinal monitoring of MDD. Specifically, we were interested in whether a multiparameter ML approach would significantly improve prediction compared to our previous study, which focused on the three most prominent speech features. In addition, we explored which mechanism for splitting training and test data was most successful, with a particular focus on the idea of personalized prediction. To do so, we analyzed a dataset of patients (n=30) diagnosed with MDD during sleep deprivation therapy, a fast-acting treatment that results in a significant improvement of depressive states in most of the patients within 36 hours [<xref ref-type="bibr" rid="ref22">22</xref>]. The given treatment ensures short-term effects, which is advantageous compared to other studies such as the RADAR&#x2014;CNS project where patients are observed for over 2 years to reveal illness episodes [<xref ref-type="bibr" rid="ref23">23</xref>]. In Wadle et al [<xref ref-type="bibr" rid="ref18">18</xref>], patients reported momentary depressive states and recorded concomitant selfie videos talking about their current feelings 2&#x2010;3 times per day for up to 3 weeks. Speech features were extracted from the speech samples using the software openSMILE [Open-Source Speech and Music Interpretation by Large-Space Extraction]) [<xref ref-type="bibr" rid="ref24">24</xref>]. To assess the potential clinical utility of automated symptom monitoring using speech features, we trained and evaluated a comprehensive set of &#x003E;500 ML pipelines (by optimizing hyperparameters of random chance and dummy regressors for baseline comparisons, random forest, linear regression, SVR, and XGBoost [Extreme Gradient Boosting] regression models) to predict individual symptom severity. We used five different approaches to evaluate whether these ML models generalize across patients or whether personalized splits are superior: (1) group 5-fold cross-validation at the subject level; (2) a leave-one-subject-out (LOSO) approach; and (3) a train-test-split with 2-fold cross-validation using different splitting techniques: (3a) chronological split with the first half as training and the second half as test set; (3b) odd-even split, with chronologically sorted data put into train and test set by turns; and (3c) a random split, which was repeated 10 times.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Sample</title><p>We analyzed a dataset that was collected as part of the Sleep Deprivation and Gene Expression II pilot study (DRKS00022025). The initial sample consisted of 30 inpatients from the Central Institute of Mental Health in Mannheim, Germany, who experienced an acute depressive episode as defined in the <italic>ICD-10</italic> (<italic>International Statistical Classification of Diseases, Tenth Revision</italic>) on admission to the hospital. The final sample to be analyzed consisted of 22 (n=12, 55% male) patients aged between 18 and 63 (mean 33.5, SD 12.4, median 29, IQR 23.25-42.75) years, as the dataset of 8 patients had to be excluded completely. Specifically, 4 patients did not record any videos, 1 patient did not say anything during the recordings (23 videos), the data of 2 patients lacked sound due to technical problems (30 videos), and 1 patient was excluded because they recorded only 2 videos. The final sample corresponds to 18 patients with moderate depression and 4 patients with severe depression at study inclusion as assessed by clinical expert interviews using the Montgomery-&#x00C5;sberg Depression Rating Scale [<xref ref-type="bibr" rid="ref25">25</xref>]. The mean score was 28 for patients with moderate depression and 39 points for patients with severe depression. Exclusion criteria were comorbid substance use disorders and personality disorders.</p></sec><sec id="s2-2"><title>Study Procedure</title><p>Data were collected by patients on a study smartphone using the movisensXS software (movisens GmbH). The patients underwent sleep deprivation therapy as part of their depression treatment. In other words, patients had to stay awake for approximately 36 hours. Treatment effect and relapse can be measured in a matter of 4 days [<xref ref-type="bibr" rid="ref22">22</xref>], resulting in substantial within-person variance for many patients in the dataset. After at least 1 day of baseline assessment, sleep deprivation therapy was conducted on what we define as day 1 (<xref ref-type="fig" rid="figure1">Figure 1</xref>). Specifically, patients stayed awake from 6 AM on day 1 to 6 PM on day 2. Recovery sleep was allowed from 6 PM on day 2 until 1 AM on day 3. Data were collected before, during, and after sleep deprivation therapy for up to 26 days. During the first week of this study, smartphones sent prompts three times per day (morning, afternoon, and evening); in addition, self-initiated assessments were possible to report specific events or to catch up on missed assessments. To reduce patient burden, the sampling scheme was changed to two prompts per day (morning and evening). At each prompt, patients were asked to complete items about their current affective state and to record a selfie video reporting how they currently felt. Patients returned the smartphone at the end of this study.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Study design.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mental_v11i1e64578_fig01.png"/></fig></sec><sec id="s2-3"><title>AA: e-Diary Ratings and Selfie Videos</title><p>The dataset contains three sets of momentary affect ratings in the form of e-diary ratings at each prompt. The full assessment tools are described in Wadle et al [<xref ref-type="bibr" rid="ref18">18</xref>]. As the analysis in this work is limited to the target variable of momentary depression, we focus here on its detailed description. Depression severity was assessed using the short version of the Allgemeine Depressionsskala (ADS-K) [<xref ref-type="bibr" rid="ref26">26</xref>]. We adapted the ADS-K to fit the characteristics of momentary assessment with 14 items on depressive mood (excluding the sleep item) rated on a scale from 0=<italic>rarely</italic> to 3=<italic>mostly</italic> (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). We recoded the reversed items so that higher scores indicated higher intensity of depressive symptoms, thereafter, we calculated mean values. In addition to the e-diary ratings just described, patients were asked to record selfie videos with the following instructions: &#x201C;Please keep the camera stable during the recording and record your whole face. Please describe in 10&#x2010;20 seconds how you currently feel.&#x201D;</p></sec><sec id="s2-4"><title>Ethical Considerations</title><p>The Ethics Committee II of the Medical Faculty Mannheim, University of Heidelberg, Germany, approved this study (2013-563N-MA). Patients were informed about the aims and study procedures. All patients gave informed consent and could withdraw from this study at any time.</p></sec><sec id="s2-5"><title>Data Preprocessing</title><sec id="s2-5-1"><title>Overview</title><p>Initially, the dataset contained 899 recorded selfie videos. As mentioned above, we excluded all videos of 4 patients (55 videos) and removed 2 additional videos with technical damage. We extracted audio tracks from the 842 remaining videos using the <italic>ffmpeg</italic> package in Python (Python Software Foundation) and archived them as .wav files (sampling rate=48 kHz, mono=1 channel). In the next step, we listened to all recordings and removed test runs (n=14), content-free accidental short recordings (n=29), recordings in which the microphone was covered (n=27), and assessments in which either the recording or the affective state rating was missing (n=24). Moreover, if two consecutive assessments occurred within 15 minutes of each other (n=21), the second assessment was removed unless the audio quality of the first recording was insufficient, in which case the second assessment was kept. Finally, we excluded recordings containing third-party speech (n=8) and recordings with insufficient speech intelligibility due to background noise (n=9). Prior to speech feature extraction, we filtered the remaining 710 recordings usingDeepFilterNet2 [<xref ref-type="bibr" rid="ref27">27</xref>] to remove background noise.</p></sec><sec id="s2-5-2"><title>Acoustic Features</title><p>We extracted acoustic features using the functionals (version 2) of eGeMAPS [<xref ref-type="bibr" rid="ref28">28</xref>] from the open-source toolkit openSMILE implemented in Python [<xref ref-type="bibr" rid="ref24">24</xref>]. Given the limitless number of potential speech features and to increase comparability across studies, this minimalistic set of 88 acoustic features is recommended for use in clinical speech analysis [<xref ref-type="bibr" rid="ref28">28</xref>]. We added the parameter <italic>speech rate</italic>, which requires transcription of the recordings. We obtained the transcripts using an automatic speech recognition system according to published procedures [<xref ref-type="bibr" rid="ref29">29</xref>] and corrected the transcripts manually. To determine speech rate, we calculated the ratio of words divided by the duration of the speech sample. In our previous publication [<xref ref-type="bibr" rid="ref18">18</xref>], we included a subset of three of these speech features (top-down selected: F0semitone From 27.5Hz_sma3nz_stddevNorm, Mean Unvoiced SegmentLength, speech rate) in multilevel model analyses and found an association between each of them and depression severity. In the present work, however, we included all of the described 89 speech features as predictors for depression severity in our ML models.</p></sec></sec><sec id="s2-6"><title>ML Analyses</title><p>Five ML analyses were conducted to determine the optimal model for predicting ADS-K mean scores from our 89 speech features (<xref ref-type="table" rid="table1">Table 1</xref>). All analyses used consistent preprocessing, including median imputation for missing data and standard scaling for feature normalization. A variety of models were evaluated: a random chance and a dummy regressor (mean and median; results of the superior are shown) for baseline comparisons, random forest, linear regression, SVR, and XGBoost regression. The models were fine-tuned using nested cross-validation and a systematic grid search to optimize the hyperparameters, ensuring the robustness and reliability of our results using the PHOTON AI (Medical Machine Learning Lab Translational Psychiatry) software package [<xref ref-type="bibr" rid="ref30">30</xref>].</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Overview of train-test split scenarios.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Train-test split scenario</td><td align="left" valign="bottom">Explanation</td><td align="left" valign="bottom">Visualization</td></tr></thead><tbody><tr><td align="left" valign="top">Group 5-fold cross-validation</td><td align="left" valign="top">Separation of data points into five bins of approximately equal size, with the condition that each patient&#x2019;s data are represented in exactly one bin, that is, either in the training set or the test set, but not both. Train on all but one bin, test on the remaining bin. Repetition of the procedure until each bin has been used once as a test bin (5-fold cross-validation).</td><td align="left" valign="top"><graphic xlink:href="mental_v11i1e64578_fig02.png"/><sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td></tr><tr><td align="left" valign="top">Leave-one-subject-out</td><td align="left" valign="top">Train on data from all but one patient. Test on data from the one left-out patient. The procedure was repeated until each subject was used in the test arm (in our study N=22).</td><td align="left" valign="top"><graphic xlink:href="mental_v11i1e64578_fig03.png"/></td></tr><tr><td align="left" valign="top">Chronological split</td><td align="left" valign="top">Train on the chronologically first 50% of data, test on the last 50%.</td><td align="left" valign="top"><graphic xlink:href="mental_v11i1e64578_fig04.png"/></td></tr><tr><td align="left" valign="top">Odd-even split</td><td align="left" valign="top">Odd assessment points were assigned to the training set, even assessment points to the test set. Then the implementation of a 2-fold cross-validation.</td><td align="left" valign="top"><graphic xlink:href="mental_v11i1e64578_fig05.png"/></td></tr><tr><td align="left" valign="top">Random split</td><td align="left" valign="top">Data points were randomly assigned to either train or test sets. This was repeated ten times with a 2-fold cross-validation calculated in each repeated run.</td><td align="left" valign="top"><graphic xlink:href="mental_v11i1e64578_fig06.png"/></td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>For visualizations: squares represent data bins in the first row and individual patients in the remaining rows; circles represent individual data points. P: patient.</p></fn></table-wrap-foot></table-wrap><p>Model performance was assessed quantitatively using the coefficient of determination (<italic>R</italic>&#x00B2;). This metric evaluates the proportion of variance in the dependent variable that can be explained by the independent variables, providing a clear measure of model effectiveness. It is essential for comparing different regression models in our analysis by quantifying how well each model explains the variability in the dataset. The performance metrics for each model and splitting technique combination were averaged to provide a comprehensive evaluation of model performance.</p><p>The calculation of the <italic>R</italic>&#x00B2; score in scikit-learn [<xref ref-type="bibr" rid="ref31">31</xref>] is executed as follows:</p><disp-formula id="E2"><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mo>,</mml:mo><mml:mrow><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mfrac><mml:mrow><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">&#x00AF;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="equWL1"/><p>where <inline-formula><mml:math id="ieqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:mstyle></mml:math></inline-formula> represents the observed values, <inline-formula><mml:math id="ieqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula> represents the predicted values by the model, <inline-formula><mml:math id="ieqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">&#x00AF;</mml:mo></mml:mover></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula> is the mean of the observed values <inline-formula><mml:math id="ieqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:mstyle></mml:math></inline-formula>, and <inline-formula><mml:math id="ieqn5"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:mstyle></mml:math></inline-formula> is the number of observations.</p><p>We also present mean absolute error (MAE) scores which measure how close the predicted and actual values are. MAEs provide a straightforward interpretation given that they are calculated in the same units as the underlying data. Clinical relevance can be inferred.</p><p>Higher <italic>R</italic>&#x00B2; scores and lower MAE scores indicate superior model performance. <italic>P</italic> values &#x003C;.05 are considered to be statistically significant. Negative <italic>R</italic>&#x00B2; scores indicate poor model performance, and in such cases, the <italic>P</italic> value is not of interest.</p><sec id="s2-6-1"><title>Group 5-Fold Cross-Validation</title><p>In our first analytical approach, we used group 5-fold nested cross-validation to assess model performance. Data points were divided into five bins of approximately equal size, ensuring that each patient&#x2019;s data appeared in only one bin, either in the training set or the test set, but not both. This means that samples from a single patient were treated as a distinct group, ensuring the integrity of individual data within each validation fold. The model was trained on four bins and tested on the remaining bin. The procedure was repeated until each bin had been used as the test bin, completing the 5-fold cross-validation. This approach tested whether the predictive patterns identified could generalize from one group of patients to another by modeling the association between speech features and depression severity across multiple patients.</p></sec><sec id="s2-6-2"><title>LOSO Split</title><p>In the second approach, we used the maximum possible data in a subject-based split for the training set. That is, we used data from all but one patient in the training set with the goal of predicting data from this one unknown patient. This reflects a potential future clinical use case where a trained model is applied to a new, unknown patient. Thus, this analysis tests whether the identified predictive pattern generalizes to an unknown patient.</p><p>In the following three approaches, we split the data fifty-fifty by using three different splitting techniques: a chronological split, an odd-even split, and a random split.</p></sec><sec id="s2-6-3"><title>Chronological Split</title><p>In this approach, we used a chronological train-test split where the first 50% of the data (355 data points), ordered by assessment date, were used as the training set and the last 50% were used as the test set (355 data points). Note that our patients were recruited over a time period of 3 years and 2 months. This means that sometimes data were collected from only 1 patient and sometimes from 2 patients at the same time. Specifically, 13 patients of our final sample were enrolled consecutively. For 9 consecutive patients (ie, 9 pairs of patients), there is an overlap in assessment time when comparing the first assessment and the last assessment of an individual patient. Consequently, &#x201C;earlier&#x201D; patients are included in the training set, &#x201C;later&#x201D; patients only in the test set, and 3 patients in both. No cross-validation was applied, as this would indicate a prediction backward in time. This approach aimed to simulate a realistic prediction scenario by training the models on earlier assessments and testing their performance on later data points, thereby evaluating the predictive performance for future depression severity based on past assessments.</p></sec><sec id="s2-6-4"><title>Odd-Even Split</title><p>This method used a nested 2-fold cross-validation approach, in which patient-wise chronologically sorted data were alternately assigned to the training or test set based on odd and even collection points. As a result, half of the data from each patient is represented in the training set and half in the test set. Importantly, with this splitting mechanism, we assume that both the test and training sets are likely to contain data points from different states, namely severely depressed states and euthymic states right after the intervention. This approach has the advantage that the model is trained with both individual data from depressive and euthymic states, and it avoids having all depressive data in the training set but euthymic data only in the test set. Accordingly, this allows us to model and evaluate the predictive performance of speech features in clinical use cases. For example, predicting the severity of depression in a new depressive episode of a patient with a history of recurrent depression, who is already known by the model.</p></sec><sec id="s2-6-5"><title>Random Split</title><p>Since there is only one way to split data into training and test sets in the odd-even split, we aimed to test the replicability of these findings here. We randomly split our data into test and training sets and performed 2-fold cross-validation. There are 710 <italic>choose</italic> 355=1.612 &#x00D7; 10<sup>143</sup> ways to randomly split the data into 2 halves. With this splitting mechanism, it is possible that some data points never appear in the training set. Therefore, we repeated this random split ten times and report the mean values.</p></sec></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Descriptive Results</title><p>Our final dataset consisted of 710 pairs of self-reported depressive momentary states and speech features extracted from concomitantly recorded selfie videos. Self-reported depression severity, as indicated by ADS-K responses (scale 0&#x2010;3), was on average 1.2 (SD 0.6). The intraclass correlation coefficient for the ADS-K was 0.47, indicating that 53% of the variance in momentary depression symptoms is attributable to within-person variability. The reliability index of the ADS-K in this study was excellent as evaluated according to McDonald &#x03C9; (0.87 within-person and 0.90 between-person). Histograms and correlation plots to illustrate our data structure are found in the <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p></sec><sec id="s3-2"><title>ML Results</title><sec id="s3-2-1"><title>Overview</title><p>We present the performance of each of our 30 ML approaches in <xref ref-type="table" rid="table2">Table 2</xref>. All combinations of our 6 models (from top to bottom: random chance, dummy regression, random forest regression, linear regression, SVR, and XGBoost regression) and our 5 splitting mechanisms (from left to right: group 5-fold cross-validation, LOSO, chronological split, odd-even split, and random split) are included in the table. We show <italic>R</italic>&#x00B2; scores and MAE along with their <italic>P</italic> values.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Model performances.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom" colspan="10">Splitting techniques</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">Group 5-fold cross-validation</td><td align="left" valign="top" colspan="2">Leave-one-subject-out</td><td align="left" valign="top" colspan="2">Chronological split</td><td align="left" valign="top" colspan="2">Odd-even-split</td><td align="left" valign="top" colspan="2">Random split</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><italic>R</italic>&#x00B2; score</td><td align="left" valign="top">MAE</td><td align="left" valign="top"><italic>R</italic>&#x00B2; score</td><td align="left" valign="top">MAE</td><td align="left" valign="top"><italic>R</italic>&#x00B2; score</td><td align="left" valign="top">MAE</td><td align="left" valign="top"><italic>R</italic>&#x00B2; score</td><td align="left" valign="top">MAE</td><td align="left" valign="top"><italic>R</italic>&#x00B2; score</td><td align="left" valign="top">MAE</td></tr></thead><tbody><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">Random chance (<italic>P</italic> value)</td><td align="left" valign="top">&#x2013;3.306<break/>(N/A)<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="char" char="." valign="top">0.920<break/>(N/A)</td><td align="char" char="." valign="top">&#x2013;6.833<break/>(N/A)</td><td align="char" char="." valign="top">0.910<break/>(N/A)</td><td align="char" char="." valign="top">&#x2013;2.364<break/>(N/A)</td><td align="char" char="." valign="top">0.941<break/>(N/A)</td><td align="char" char="." valign="top">&#x2013;2.115(N/A)</td><td align="char" char="." valign="top">0.920(N/A)</td><td align="char" char="." valign="top">&#x2013;2.205(N/A)</td><td align="char" char="." valign="top">0.890(N/A)</td></tr><tr><td align="left" valign="top">Dummy regression (median)</td><td align="left" valign="top">&#x2013;0.289<break/>(.79)</td><td align="char" char="." valign="top">0.499<break/>(.41)</td><td align="char" char="." valign="top">&#x2013;3.624<break/>(.92)</td><td align="char" char="." valign="top">0.557<break/>(.72)</td><td align="char" char="." valign="top">&#x2013;0.107<break/>(.99)</td><td align="char" char="." valign="top">0.491<break/>(.99)</td><td align="char" char="." valign="top">&#x2013;0.001<break/>(.35)</td><td align="char" char="." valign="top">0.482<break/>(.48)</td><td align="char" char="." valign="top">&#x2013;0.007<break/>(.72)</td><td align="char" char="." valign="top">0.488<break/>(.84)</td></tr><tr><td align="left" valign="top">Random forest regression (<italic>P</italic> value)</td><td align="left" valign="top">&#x2013;0.102<break/>(.09)</td><td align="char" char="." valign="top">0.455<break/>(.04)</td><td align="char" char="." valign="top">&#x2013;4.392<break/>(.99)</td><td align="char" char="." valign="top">0.540<break/>(.29)</td><td align="char" char="." valign="top">&#x2013;0.213<break/>(.65)</td><td align="char" char="." valign="top">0.519<break/>(.81)</td><td align="char" char="." valign="top">0.336<break/>(&#x003C;.001)</td><td align="char" char="." valign="top">0.381<break/>(&#x003C;.001)</td><td align="char" char="." valign="top">0.305<break/>(&#x003C;.001)</td><td align="char" char="." valign="top">0.396<break/>(&#x003C;.001)</td></tr><tr><td align="left" valign="top">Linear regression (<italic>P</italic> value)</td><td align="left" valign="top">&#x2013;25.508<break/>(.67)</td><td align="char" char="." valign="top">0.588<break/>(.50)</td><td align="char" char="." valign="top">&#x2013;37.258<break/>(.71)</td><td align="char" char="." valign="top">0.602<break/>(.31)</td><td align="char" char="." valign="top">&#x2013;0.364<break/>(.15)</td><td align="char" char="." valign="top">0.534<break/>(.18)</td><td align="char" char="." valign="top">&#x2013;0.179<break/>(&#x003C;.001)</td><td align="char" char="." valign="top">0.445<break/>(&#x003C;.001)</td><td align="char" char="." valign="top">&#x2013;0.558<break/>(.06)</td><td align="char" char="." valign="top">0.459<break/>(&#x003C;.001)</td></tr><tr><td align="left" valign="top">Support vector regression (<italic>P</italic> value)</td><td align="left" valign="top">&#x2013;0.136<break/>(.008)</td><td align="char" char="." valign="top">0.468<break/>(.004)</td><td align="char" char="." valign="top">&#x2013;4.006<break/>(.88)</td><td align="char" char="." valign="top">0.570<break/>(.89)</td><td align="char" char="." valign="top">&#x2013;0.106<break/>(.59)</td><td align="char" char="." valign="top">0.439<break/>(.87)</td><td align="char" char="." valign="top">0.313<break/>(&#x003C;.001)</td><td align="char" char="." valign="top">0.388<break/>(&#x003C;.001)</td><td align="char" char="." valign="top">0.293<break/>(&#x003C;.001)</td><td align="char" char="." valign="top">0.401<break/>(&#x003C;.001)</td></tr><tr><td align="left" valign="top">XGBoost regression (<italic>P</italic> value)</td><td align="left" valign="top">&#x2013;0.093<break/>(.07)</td><td align="char" char="." valign="top">0.455<break/>(.03)</td><td align="char" char="." valign="top">&#x2013;3.568<break/>(.41)</td><td align="char" char="." valign="top">0.550<break/>(.03)</td><td align="char" char="." valign="top">0.084<break/>(.98)</td><td align="char" char="." valign="top">0.442<break/>(.99)</td><td align="char" char="." valign="top">0.339<break/>(&#x003C;.001)</td><td align="char" char="." valign="top">0.380<break/>(&#x003C;.001)</td><td align="char" char="." valign="top">0.289<break/>(&#x003C;.001)</td><td align="char" char="." valign="top">0.399<break/>(&#x003C;.001)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>N/A: not available.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2-2"><title>Group 5-Fold Cross-Validation</title><p>In our initial analysis using group 5-fold cross-validation, all tested regressors yielded negative <italic>R</italic><sup>2</sup> scores and failed to reach a performance above chance level (<xref ref-type="table" rid="table2">Table 2</xref>). This indicates that none of the models were able to significantly explain the variance of the target variable and thus failed to provide reliable predictive insights for the ADS-K mean scores in this specific setup. The models were not suitable for the dataset under the group 5-fold cross-validation scheme. This finding necessitates a reconsideration of the model parameters, feature selection, or possibly the experimental design to improve predictive performance.</p></sec><sec id="s3-2-3"><title>LOSO Results</title><p>The LOSO approach yielded comparable results. All models tested yielded nonsignificant negative <italic>R</italic><sup>2</sup> scores (<xref ref-type="table" rid="table2">Table 2</xref>). This indicates that none of the models effectively explained the variance of the target variable and all models were unable to predict the mean of the ADS-K scores for an unknown patient in this particular setup.</p></sec><sec id="s3-2-4"><title>Chronological Split</title><p>In the chronological split analysis, none of the models achieved statistically significant results (<xref ref-type="table" rid="table2">Table 2</xref>). These results suggest that none of the models evaluated were effective in explaining the variance in the ADS-K mean scores or providing reliable predictions in this setup.</p></sec><sec id="s3-2-5"><title>Odd-Even Split</title><p>Overall, the performance of three models tested was above chance level (<xref ref-type="table" rid="table2">Table 2</xref>). The XGBoost regression emerged as the superior performer, achieving an <italic>R</italic>&#x00B2; score of 0.339 and an MAE of 0.38 (both <italic>P</italic>&#x003C;.001). These results indicate that approximately 33.9% of the variance in the ADS-K mean scores can be explained by the speech features using this model. The MAE indicates that the mean difference between the predicted and the actual scores is 0.38 units on the ADS-K depression severity scale ranging from 0 to 3. This substantial improvement in model performance of the superior model in this approach compared to our previous ML approaches demonstrates the potential effectiveness of the XGBoost model when data are alternately assigned to training and test sets based on odd and even collection points. This analysis highlights the importance of including both depressive and euthymic data points from the same individual in both the training and test set. In addition to the XGBoost model, the SVR and random forest regression yielded statistically significant results of a descriptively comparable order of magnitude.</p></sec><sec id="s3-2-6"><title>Random Split</title><p>The random forest regression emerged as the superior performer (<xref ref-type="table" rid="table2">Table 2</xref>) in the random split. The model achieved an <italic>R</italic>&#x00B2; score of 0.305 and an MAE of 0.396 (both <italic>P</italic>&#x003C;.001). These results indicate that using this model, approximately 30.5% of the variance in the ADS-K mean scores can be explained by the speech features. The MAE of the random forest model indicates that the mean difference between the predicted and the actual scores is 0.396 units on the ADS-K depression severity scale ranging from 0&#x2010;3. In addition to the random forest regression, the SVR and XGBoost regression models reached statistical significance with descriptively comparable performance.</p></sec></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>The objective of this study was to evaluate if speech-based multiparameter ML models and specific train-test splits would significantly increase the prediction of depression severity ratings compared to previous statistical analyses. Uniquely, we used a longitudinal dataset of patients with MDD undergoing sleep deprivation therapy. This approach allows the observation of treatment onset and relapse within a few days, thereby allowing for a maximum of within-person variance of momentary depressive states in our dataset. The most effective ML model (XGBoost regression with odd-even splitting) explains 33.9% of the variance of the target variable depression severity with an MAE of 0.38. It is noteworthy that this represents a 17-fold increase in predictive power over our previous analyses of this (same) dataset, which revealed an <italic>R</italic>&#x00B2;<sub>Hox</sub> of 2% [<xref ref-type="bibr" rid="ref18">18</xref>]. It should be noted that in our previous analysis we focused on a subset of 3 speech features, whereas in this work 89 speech features were included into the models. Furthermore, in our previous work, we used inference statistics in the form of multilevel models; and ML here. The present results suggest that integrating a larger number of speech features and allowing for more complex modeling can significantly improve prediction performance. However, these findings need to be replicated in a different sample.</p><p>Moreover, our findings revealed that several models reached statistical significance, but with varying predictive power. In short, models in which both the training and the test set contained data from the same patients were successful in predicting depression severity based on speech features (odd-even split and random split). In contrast, all of our models which were tested on data from patients for whom the model was na&#x00EF;ve, failed (chronological split, 5-fold cross-validation, and LOSO). Interestingly, for both the odd-even and the random split, three ML models (random forest, SVR, and XGBoost) achieved statistical significance, with an <italic>R</italic>&#x00B2; and MAE of descriptively comparable size. This suggests that these two approaches perform similarly and it is probably not critical which one is ultimately chosen. However, this conclusion must be taken with caution as we did not test the models against each other as this would require orders of magnitude more computational power than all the analyses combined here.</p><p>As noted above, all models trying to predict depression scores only of patients for whom the model was na&#x00EF;ve, failed. This finding suggests that the predictive patterns do not appear to generalize across patients. This indicates that ML models need to be fine-tuned to the specific patient about whom predictions are to be made. This is consistent with previous research indicating better predictive performance for personalized models compared to generalized models [<xref ref-type="bibr" rid="ref20">20</xref>]. It underscores the importance of longitudinal datasets, which are still scarce. Only when multiple data points per patient are available for training purposes, that is, longitudinal data, can prediction reach a sufficient level.</p><p>In this context, the heterogeneity of the clinical picture of MDD must also be taken into account. Widely used diagnostic criteria allow for more than 400 possible combinations of symptoms [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>]. This might explain why there is no one-size-fits-all approach, that is, associations from one patient can be easily transferred to another patient. In future work, it might be interesting to test whether models trained and tested on different patients, but with a similar clinical picture, would perform better. For example, a model trained on patients whose clinical picture is strongly characterized by having low energy might be transferable to patients with similar characteristics, but not to patients with a high degree of hyperarousal.</p></sec><sec id="s4-2"><title>Limitations</title><p>Although our study demonstrates the potential use of speech features in clinical monitoring, particularly of patients with recurrent MDD, some limitations must be mentioned. First, our sample size is relatively small. However, we believe that a unique strength of our dataset is the inclusion of patients with an acute clinical diagnosis of a depressive episode requiring an inpatient stay (rather than subclinical study participants), and the true within-person design. Additionally, due to our longitudinal intervention design, we do have a relatively high number of data points per patient and a meaningful amount of variance in our target variable. Future studies are needed to test the replicability of our findings. Second, although eGeMAPS is a standardized set of speech features recommended for clinical use cases, it may not capture all relevant speech characteristics associated with depression. Nevertheless, we prefer to use predefined feature sets suggested by the community rather than creating our own features to increase the comparability across studies. In light of the previous two arguments, pooling of datasets will become very important in the future, another argument for relying on well-known feature sets. Third, we limited our analyses to 5 different splitting techniques, for each of which we trained over 500 ML models. Nowadays, computational power would allow us to run huge amounts of ML models [<xref ref-type="bibr" rid="ref34">34</xref>]. However, even with our small set of ML variants, we were still able to demonstrate the importance of individualized ML models with well-designed splitting mechanisms.</p></sec><sec id="s4-3"><title>Future Directions</title><p>Although we did not test personalized ML models per se in this work, our results support the idea that personalized state-of-the-art approaches, that is, individual ML models, are the most promising [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. A prerequisite for this is the collection of sufficient data points per person in a first step. Importantly, there must be sufficient within-person variance in illness states during this so-called burn-in phase [<xref ref-type="bibr" rid="ref36">36</xref>]. Once a sufficient amount of data of this patient is available, a first model could be trained. As new data is coming in permanently, the model can be constantly updated with the individual&#x2019;s data, thus continuously improving its performance. Another idea is to start with a generalized or semipersonalized model (eg, trained on same-sex data) to avoid the cold start problem [<xref ref-type="bibr" rid="ref36">36</xref>]. Incoming data from the patient could be used to fine-tune the model. This is certainly a complex endeavor that requires patience and perseverance on the part of the patients, but might be worth it once a sufficiently functional model is established. In the long term, this could be particularly helpful for patients with a history of recurrent MDD. To test the feasibility of this, longitudinal studies over even longer time periods than those of the few that already exist are needed.</p><p>Moreover, to reduce patient burden, it is even more attractive to use behavioral features that patients do not have to actively collect, such as speech. Since we carry our smartphones with us most of the time anyway, and most people speak naturally in their everyday lives, these features seem promising. However, there are still many ethical and privacy questions with regard to the specific category of speech data. For example, speaker identification algorithms are needed that work reliably, on the fly, and in everyday environments (including varying background noise) to ensure that only the target&#x2019;s speech is analyzed.</p></sec><sec id="s4-4"><title>Conclusion</title><p>Our study contributes to the emerging field of digital behavioral markers as indicators of mental health by highlighting the potential and challenges of using speech features to monitor depression. While our results suggest that speech features might be useful in predicting momentary depression severity, future research is needed to evaluate whether these findings can be replicated. Ultimately, speech-based depression monitoring systems could significantly improve patient care in the future.</p></sec></sec></body><back><ack><p>This paper was funded by the German Research Foundation/Deutsche Forschungsgemeinschaft (DFG; GRK2739/1 "Research Training Group: KD&#x00B2;School&#x2014;Designing Adaptive Systems for Economic Decisions" [447089431]). This work was supported in part by grants from the DFG (TRR 265 &#x201C;Losing and Regaining Control over Drug intake&#x201D; [402170461], Collaborative Research Center 393 &#x201C;Trajectories of affective disorders: cognitive-emotional mechanisms of symptom change&#x201D; [521379614]), and the Federal Ministry of Research and Education (BMBF; RELATER 01EF1803B "Improving communication in psychiatric care of refugees using mobile technology"), and the European Research Area Network ("Neuron EMBED: Early Life Metabolic and Psychosocial Stress on Susceptibility to Mental Disorders" [01EW1904]).</p></ack><fn-group><fn fn-type="con"><p>MR, JCF, JF, SHW, LS, MG, FS, and UWE-P planned the investigation and developed the sampling scheme. MG and LS were responsible for data collection. LMH preprocessed the data, interpreted results, drafted and finalized this paper. DE and TH ran ML analyses. UWE-P, DE, TH, and SES contributed to the interpretation of results and UWE-P and DE to drafting this paper. All authors revised and edited this paper critically and had final approval of the version to be published.</p></fn><fn fn-type="conflict"><p>UWE-P reports consultancy for Boehringer-Ingelheim and speaker honorarium from Angelini Pharma, which both had no influence on the content of this article. All other authors declare no conflict of interest.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AA</term><def><p>ambulatory assessment</p></def></def-item><def-item><term id="abb2">ADS-K</term><def><p>Allgemeine Depressionsskala</p></def></def-item><def-item><term id="abb3">eGeMAPS</term><def><p>Extended Geneva Minimalistic Acoustic Parameter Set</p></def></def-item><def-item><term id="abb4"><italic>ICD-10</italic></term><def><p><italic>International Statistical Classification of Diseases, Tenth Revision</italic></p></def></def-item><def-item><term id="abb5">LOSO</term><def><p>leave-one-subject-out</p></def></def-item><def-item><term id="abb6">MAE</term><def><p>mean absolute error</p></def></def-item><def-item><term id="abb7">MDD</term><def><p>major depressive disorder</p></def></def-item><def-item><term id="abb8">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb9">openSMILE</term><def><p>Open-Source Speech and Music Interpretation by Large-Space Extraction</p></def></def-item><def-item><term id="abb10">RADAR&#x2013;CNS</term><def><p>Remote Assessment of Disease and Relapse&#x2013;Central Nervous System</p></def></def-item><def-item><term id="abb11">SVR</term><def><p>support vector regression</p></def></def-item><def-item><term id="abb12">XGBoost</term><def><p>Extreme Gradient Boosting</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vos</surname><given-names>T</given-names> </name><name name-style="western"><surname>Lim</surname><given-names>SS</given-names> </name><name name-style="western"><surname>Abbafati</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Global burden of 369 diseases and injuries in 204 countries and territories, 1990&#x2013;2019: a systematic analysis for the Global Burden of Disease Study 2019</article-title><source>The Lancet</source><year>2020</year><month>10</month><volume>396</volume><issue>10258</issue><fpage>1204</fpage><lpage>1222</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(20)30925-9</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Benasi</surname><given-names>G</given-names> </name><name name-style="western"><surname>Fava</surname><given-names>GA</given-names> </name><name name-style="western"><surname>Guidi</surname><given-names>J</given-names> </name></person-group><article-title>Prodromal symptoms in depression: a systematic review</article-title><source>Psychother Psychosom</source><year>2021</year><volume>90</volume><issue>6</issue><fpage>365</fpage><lpage>372</lpage><pub-id pub-id-type="doi">10.1159/000517953</pub-id><pub-id pub-id-type="medline">34350890</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Colombo</surname><given-names>D</given-names> </name><name name-style="western"><surname>Fern&#x00E1;ndez-&#x00C1;lvarez</surname><given-names>J</given-names> </name><name name-style="western"><surname>Patan&#x00E9;</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Current state and future directions of technology-based ecological momentary assessment and intervention for major depressive disorder: a systematic review</article-title><source>J Clin Med</source><year>2019</year><month>04</month><day>5</day><volume>8</volume><issue>4</issue><fpage>465</fpage><pub-id pub-id-type="doi">10.3390/jcm8040465</pub-id><pub-id pub-id-type="medline">30959828</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ebrahimi</surname><given-names>OV</given-names> </name><name name-style="western"><surname>Burger</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hoffart</surname><given-names>A</given-names> </name><name name-style="western"><surname>Johnson</surname><given-names>SU</given-names> </name></person-group><article-title>Within- and across-day patterns of interplay between depressive symptoms and related psychopathological processes: a dynamic network approach during the COVID-19 pandemic</article-title><source>BMC Med</source><year>2021</year><month>11</month><day>30</day><volume>19</volume><issue>1</issue><fpage>317</fpage><pub-id pub-id-type="doi">10.1186/s12916-021-02179-y</pub-id><pub-id pub-id-type="medline">34844588</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fried</surname><given-names>EI</given-names> </name><name name-style="western"><surname>Flake</surname><given-names>JK</given-names> </name><name name-style="western"><surname>Robinaugh</surname><given-names>DJ</given-names> </name></person-group><article-title>Revisiting the theoretical and methodological foundations of depression measurement</article-title><source>Nat Rev Psychol</source><year>2022</year><month>06</month><volume>1</volume><issue>6</issue><fpage>358</fpage><lpage>368</lpage><pub-id pub-id-type="doi">10.1038/s44159-022-00050-2</pub-id><pub-id pub-id-type="medline">38107751</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abd-Alrazaq</surname><given-names>A</given-names> </name><name name-style="western"><surname>AlSaad</surname><given-names>R</given-names> </name><name name-style="western"><surname>Aziz</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Wearable artificial intelligence for anxiety and depression: scoping review</article-title><source>J Med Internet Res</source><year>2023</year><month>01</month><day>19</day><volume>25</volume><fpage>e42672</fpage><pub-id pub-id-type="doi">10.2196/42672</pub-id><pub-id pub-id-type="medline">36656625</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Torous</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kiang</surname><given-names>MV</given-names> </name><name name-style="western"><surname>Lorme</surname><given-names>J</given-names> </name><name name-style="western"><surname>Onnela</surname><given-names>JP</given-names> </name></person-group><article-title>New tools for new research in psychiatry: a scalable and customizable platform to empower data driven smartphone research</article-title><source>JMIR Ment Health</source><year>2016</year><month>05</month><day>5</day><volume>3</volume><issue>2</issue><fpage>e16</fpage><pub-id pub-id-type="doi">10.2196/mental.5165</pub-id><pub-id pub-id-type="medline">27150677</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ebner-Priemer</surname><given-names>UW</given-names> </name><name name-style="western"><surname>M&#x00FC;hlbauer</surname><given-names>E</given-names> </name><name name-style="western"><surname>Neubauer</surname><given-names>AB</given-names> </name><etal/></person-group><article-title>Digital phenotyping: towards replicable findings with comprehensive assessments and integrative models in bipolar disorders</article-title><source>Int J Bipolar Disord</source><year>2020</year><month>11</month><day>17</day><volume>8</volume><issue>1</issue><fpage>35</fpage><pub-id pub-id-type="doi">10.1186/s40345-020-00210-4</pub-id><pub-id pub-id-type="medline">33211262</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Trull</surname><given-names>TJ</given-names> </name><name name-style="western"><surname>Ebner-Priemer</surname><given-names>U</given-names> </name></person-group><article-title>The role of ambulatory assessment in psychological science</article-title><source>Curr Dir Psychol Sci</source><year>2014</year><month>12</month><volume>23</volume><issue>6</issue><fpage>466</fpage><lpage>470</lpage><pub-id pub-id-type="doi">10.1177/0963721414550706</pub-id><pub-id pub-id-type="medline">25530686</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ebner-Priemer</surname><given-names>UW</given-names> </name><name name-style="western"><surname>Trull</surname><given-names>TJ</given-names> </name></person-group><article-title>Ecological momentary assessment of mood disorders and mood dysregulation</article-title><source>Psychol Assess</source><year>2009</year><month>12</month><volume>21</volume><issue>4</issue><fpage>463</fpage><lpage>475</lpage><pub-id pub-id-type="doi">10.1037/a0017075</pub-id><pub-id pub-id-type="medline">19947781</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rimti</surname><given-names>FH</given-names> </name><name name-style="western"><surname>Shahbaz</surname><given-names>R</given-names> </name><name name-style="western"><surname>Bhatt</surname><given-names>K</given-names> </name><name name-style="western"><surname>Xiang</surname><given-names>A</given-names> </name></person-group><article-title>A review of new insights into existing major depressive disorder biomarkers</article-title><source>Heliyon</source><year>2023</year><month>08</month><volume>9</volume><issue>8</issue><fpage>e18909</fpage><pub-id pub-id-type="doi">10.1016/j.heliyon.2023.e18909</pub-id><pub-id pub-id-type="medline">37664743</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>De Angel</surname><given-names>V</given-names> </name><name name-style="western"><surname>Lewis</surname><given-names>S</given-names> </name><name name-style="western"><surname>White</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Digital health tools for the passive monitoring of depression: a systematic review of methods</article-title><source>NPJ Digit Med</source><year>2022</year><month>01</month><day>11</day><volume>5</volume><issue>1</issue><fpage>3</fpage><pub-id pub-id-type="doi">10.1038/s41746-021-00548-8</pub-id><pub-id pub-id-type="medline">35017634</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zarate</surname><given-names>D</given-names> </name><name name-style="western"><surname>Stavropoulos</surname><given-names>V</given-names> </name><name name-style="western"><surname>Ball</surname><given-names>M</given-names> </name><name name-style="western"><surname>de Sena Collier</surname><given-names>G</given-names> </name><name name-style="western"><surname>Jacobson</surname><given-names>NC</given-names> </name></person-group><article-title>Exploring the digital footprint of depression: a PRISMA systematic literature review of the empirical evidence</article-title><source>BMC Psychiatry</source><year>2022</year><month>06</month><day>22</day><volume>22</volume><issue>1</issue><fpage>421</fpage><pub-id pub-id-type="doi">10.1186/s12888-022-04013-y</pub-id><pub-id pub-id-type="medline">35733121</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Low</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Bentley</surname><given-names>KH</given-names> </name><name name-style="western"><surname>Ghosh</surname><given-names>SS</given-names> </name></person-group><article-title>Automated assessment of psychiatric disorders using speech: a systematic review</article-title><source>Laryngoscope Investig Otolaryngol</source><year>2020</year><month>02</month><volume>5</volume><issue>1</issue><fpage>96</fpage><lpage>116</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://onlinelibrary.wiley.com/toc/23788038/5/1">https://onlinelibrary.wiley.com/toc/23788038/5/1</ext-link></comment><pub-id pub-id-type="doi">10.1002/lio2.354</pub-id><pub-id pub-id-type="medline">32128436</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Kraepelin</surname><given-names>E</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Robertson</surname><given-names>RM</given-names> </name></person-group><source>Manic-Depressive Insanity and Paranoia</source><year>1921</year><publisher-name>E&#x0026;S Livingstone</publisher-name><pub-id pub-id-type="doi">10.1192/bjp.67.278.342</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cummins</surname><given-names>N</given-names> </name><name name-style="western"><surname>Scherer</surname><given-names>S</given-names> </name><name name-style="western"><surname>Krajewski</surname><given-names>J</given-names> </name><name name-style="western"><surname>Schnieder</surname><given-names>S</given-names> </name><name name-style="western"><surname>Epps</surname><given-names>J</given-names> </name><name name-style="western"><surname>Quatieri</surname><given-names>TF</given-names> </name></person-group><article-title>A review of depression and suicide risk assessment using speech analysis</article-title><source>Speech Commun</source><year>2015</year><month>07</month><volume>71</volume><fpage>10</fpage><lpage>49</lpage><pub-id pub-id-type="doi">10.1016/j.specom.2015.03.004</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wadle</surname><given-names>LM</given-names> </name><name name-style="western"><surname>Ebner-Priemer</surname><given-names>UW</given-names> </name></person-group><article-title>Smart digital phenotyping</article-title><source>Eur Neuropsychopharmacol</source><year>2023</year><month>11</month><volume>76</volume><fpage>1</fpage><lpage>2</lpage><pub-id pub-id-type="doi">10.1016/j.euroneuro.2023.07.002</pub-id><pub-id pub-id-type="medline">37451161</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wadle</surname><given-names>LM</given-names> </name><name name-style="western"><surname>Ebner-Priemer</surname><given-names>UW</given-names> </name><name name-style="western"><surname>Foo</surname><given-names>JC</given-names> </name><etal/></person-group><article-title>Speech features as predictors of momentary depression severity in patients with depressive disorder undergoing sleep deprivation therapy: ambulatory assessment pilot study</article-title><source>JMIR Ment Health</source><year>2024</year><month>01</month><day>18</day><volume>11</volume><fpage>e49222</fpage><pub-id pub-id-type="doi">10.2196/49222</pub-id><pub-id pub-id-type="medline">38236637</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Gerczuk</surname><given-names>M</given-names> </name><name name-style="western"><surname>Triantafyllopoulos</surname><given-names>A</given-names> </name><name name-style="western"><surname>Amiriparian</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Personalised deep learning for monitoring depressed mood from speech</article-title><conf-name>2022 E-Health and Bioengineering Conference (EHB)</conf-name><conf-loc>Iasi, Romania</conf-loc><fpage>1</fpage><lpage>5</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://ieeexplore.ieee.org/abstract/document/9991737/">https://ieeexplore.ieee.org/abstract/document/9991737/</ext-link></comment><pub-id pub-id-type="doi">10.1109/EHB55594.2022.9991737</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Campbell</surname><given-names>EL</given-names> </name><name name-style="western"><surname>Dineley</surname><given-names>J</given-names> </name><name name-style="western"><surname>Conde</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Classifying depression symptom severity: assessment of speech representations in personalized and generalized machine learning models</article-title><conf-name>INTERSPEECH 2023</conf-name><fpage>1738</fpage><lpage>1742</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.isca-archive.org/interspeech_2023/">https://www.isca-archive.org/interspeech_2023/</ext-link></comment><pub-id pub-id-type="doi">10.21437/Interspeech.2023-1721</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cummins</surname><given-names>N</given-names> </name><name name-style="western"><surname>Dineley</surname><given-names>J</given-names> </name><name name-style="western"><surname>Conde</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Multilingual markers of depression in remotely collected speech samples: a preliminary analysis</article-title><source>J Affect Disord</source><year>2023</year><month>11</month><day>15</day><volume>341</volume><fpage>128</fpage><lpage>136</lpage><pub-id pub-id-type="doi">10.1016/j.jad.2023.08.097</pub-id><pub-id pub-id-type="medline">37598722</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wirz-Justice</surname><given-names>A</given-names> </name><name name-style="western"><surname>Benedetti</surname><given-names>F</given-names> </name></person-group><article-title>Perspectives in affective disorders: clocks and sleep</article-title><source>Eur J Neurosci</source><year>2020</year><month>01</month><volume>51</volume><issue>1</issue><fpage>346</fpage><lpage>365</lpage><pub-id pub-id-type="doi">10.1111/ejn.14362</pub-id><pub-id pub-id-type="medline">30702783</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Matcham</surname><given-names>F</given-names> </name><name name-style="western"><surname>Barattieri di San Pietro</surname><given-names>C</given-names> </name><name name-style="western"><surname>Bulgari</surname><given-names>V</given-names> </name><etal/></person-group><article-title>Remote assessment of disease and relapse in major depressive disorder (RADAR-MDD): a multi-centre prospective cohort study protocol</article-title><source>BMC Psychiatry</source><year>2019</year><month>02</month><day>18</day><volume>19</volume><issue>1</issue><fpage>72</fpage><pub-id pub-id-type="doi">10.1186/s12888-019-2049-z</pub-id><pub-id pub-id-type="medline">30777041</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Eyben</surname><given-names>F</given-names> </name><name name-style="western"><surname>W&#x00F6;llmer</surname><given-names>M</given-names> </name><name name-style="western"><surname>Schuller</surname><given-names>B</given-names> </name></person-group><article-title>Opensmile: the munich versatile and fast open-source audio feature extractor</article-title><year>2010</year><conf-name>Proceedings of the 18th ACM International Conference on Multimedia</conf-name><conf-date>Oct 25-29, 2010</conf-date><conf-loc>Firenze, Italy</conf-loc><fpage>1459</fpage><lpage>1462</lpage><pub-id pub-id-type="doi">10.1145/1873951.1874246</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Montgomery</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Asberg</surname><given-names>M</given-names> </name></person-group><article-title>A new depression scale designed to be sensitive to change</article-title><source>Br J Psychiatry</source><year>1979</year><month>04</month><volume>134</volume><issue>4</issue><fpage>382</fpage><lpage>389</lpage><pub-id pub-id-type="doi">10.1192/bjp.134.4.382</pub-id><pub-id pub-id-type="medline">444788</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hautzinger</surname><given-names>M</given-names> </name></person-group><article-title>Ein Depressionsmessinstrument f&#x00FC;r Untersuchungen in der Allgemeinbev&#x00F6;lkerung</article-title><source>Diag</source><year>1988</year><volume>34</volume><fpage>167</fpage><lpage>173</lpage></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Schr&#x00F6;ter</surname><given-names>H</given-names> </name><name name-style="western"><surname>Maier</surname><given-names>A</given-names> </name><name name-style="western"><surname>Escalante-B</surname><given-names>AN</given-names> </name><name name-style="western"><surname>Rosenkranz</surname><given-names>T</given-names> </name></person-group><article-title>Deepfilternet2: towards real-time speech enhancement on embedded devices for full-band audio</article-title><conf-name>2022 International Workshop on Acoustic Signal Enhancement (IWAENC)</conf-name><conf-loc>Bamberg, Germany</conf-loc><pub-id pub-id-type="doi">10.1109/IWAENC53105.2022.9914782</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eyben</surname><given-names>F</given-names> </name><name name-style="western"><surname>Scherer</surname><given-names>KR</given-names> </name><name name-style="western"><surname>Schuller</surname><given-names>BW</given-names> </name><etal/></person-group><article-title>The Geneva Minimalistic Acoustic Parameter Set (GeMAPS) for voice research and affective computing</article-title><source>IEEE Trans Affect Comput</source><year>2016</year><volume>7</volume><issue>2</issue><fpage>190</fpage><lpage>202</lpage><pub-id pub-id-type="doi">10.1109/TAFFC.2015.2457417</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Abulimiti</surname><given-names>A</given-names> </name><name name-style="western"><surname>Weiner</surname><given-names>J</given-names> </name><name name-style="western"><surname>Schultz</surname><given-names>T</given-names> </name></person-group><article-title>Automatic speech recognition for ILSE-interviews: longitudinal conversational speech recordings covering aging and cognitive decline</article-title><year>2020</year><conf-name>Interspeech 2020</conf-name><conf-date>Oct 25-29, 2020</conf-date><conf-loc>Shanghai, China</conf-loc><fpage>3799</fpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.isca-archive.org/interspeech_2020/">https://www.isca-archive.org/interspeech_2020/</ext-link></comment><pub-id pub-id-type="doi">10.21437/Interspeech.2020-2829</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Leenings</surname><given-names>R</given-names> </name><name name-style="western"><surname>Winter</surname><given-names>NR</given-names> </name><name name-style="western"><surname>Plagwitz</surname><given-names>L</given-names> </name><etal/></person-group><article-title>PHOTONAI-A Python API for rapid machine learning model development</article-title><source>PLoS One</source><year>2021</year><volume>16</volume><issue>7</issue><fpage>e0254062</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0254062</pub-id><pub-id pub-id-type="medline">34288935</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><article-title>Scikit-learn</article-title><access-date>2024-11-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://scikit-learn.org/stable/modules/model_evaluation.html#r2-score">https://scikit-learn.org/stable/modules/model_evaluation.html#r2-score</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goldberg</surname><given-names>D</given-names> </name></person-group><article-title>The heterogeneity of &#x201C;major depression.&#x201D;</article-title><source>World Psychiatry</source><year>2011</year><month>10</month><volume>10</volume><issue>3</issue><fpage>226</fpage><lpage>228</lpage><pub-id pub-id-type="doi">10.1002/j.2051-5545.2011.tb00061.x</pub-id><pub-id pub-id-type="medline">21991283</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>&#x00D8;stergaard</surname><given-names>SD</given-names> </name><name name-style="western"><surname>Jensen</surname><given-names>SOW</given-names> </name><name name-style="western"><surname>Bech</surname><given-names>P</given-names> </name></person-group><article-title>The heterogeneity of the depressive syndrome: when numbers get serious</article-title><source>Acta Psychiatr Scand</source><year>2011</year><month>12</month><volume>124</volume><issue>6</issue><fpage>495</fpage><lpage>496</lpage><pub-id pub-id-type="doi">10.1111/j.1600-0447.2011.01744.x</pub-id><pub-id pub-id-type="medline">21838736</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Winter</surname><given-names>NR</given-names> </name><name name-style="western"><surname>Blanke</surname><given-names>J</given-names> </name><name name-style="western"><surname>Leenings</surname><given-names>R</given-names> </name><etal/></person-group><article-title>A systematic evaluation of machine learning-based biomarkers for major depressive disorder</article-title><source>JAMA Psychiatry</source><year>2024</year><month>04</month><day>1</day><volume>81</volume><issue>4</issue><fpage>386</fpage><lpage>395</lpage><pub-id pub-id-type="doi">10.1001/jamapsychiatry.2023.5083</pub-id><pub-id pub-id-type="medline">38198165</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>W&#x00F6;rtwein</surname><given-names>T</given-names> </name><name name-style="western"><surname>Allen</surname><given-names>NB</given-names> </name><name name-style="western"><surname>Sheeber</surname><given-names>LB</given-names> </name><name name-style="western"><surname>Auerbach</surname><given-names>RP</given-names> </name><name name-style="western"><surname>Cohn</surname><given-names>JF</given-names> </name><name name-style="western"><surname>Morency</surname><given-names>LP</given-names> </name></person-group><article-title>Neural mixed effects for nonlinear personalized predictions</article-title><year>2023</year><conf-name>ICMI &#x2019;23</conf-name><conf-date>Oct 9-13, 2023</conf-date><conf-loc>Paris France</conf-loc><fpage>445</fpage><lpage>454</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://dl.acm.org/doi/proceedings/10.1145/3577190">https://dl.acm.org/doi/proceedings/10.1145/3577190</ext-link></comment><pub-id pub-id-type="doi">10.1145/3577190.3614115</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kathan</surname><given-names>A</given-names> </name><name name-style="western"><surname>Harrer</surname><given-names>M</given-names> </name><name name-style="western"><surname>K&#x00FC;ster</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Personalised depression forecasting using mobile sensor data and ecological momentary assessment</article-title><source>Front Digit Health</source><year>2022</year><volume>4</volume><fpage>964582</fpage><pub-id pub-id-type="doi">10.3389/fdgth.2022.964582</pub-id><pub-id pub-id-type="medline">36465087</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>ADS-K Items. ADS-K: Allgemeine Depressionsskala.</p><media xlink:href="mental_v11i1e64578_app1.pdf" xlink:title="PDF File, 31 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Data visualizations.</p><media xlink:href="mental_v11i1e64578_app2.pdf" xlink:title="PDF File, 3427 KB"/></supplementary-material></app-group></back></article>