@Article{info:doi/10.2196/67624, author="Coren, A. Morgan and Lindhiem, Oliver and Angus, R. Abby and Toevs, K. Emma and Radovic, Ana", title="Provider Perspectives on Implementing an Enhanced Digital Screening for Adolescent Depression and Suicidality: Qualitative Study", journal="JMIR Form Res", year="2025", month="Apr", day="10", volume="9", pages="e67624", keywords="depression", keywords="suicidality", keywords="adolescent mental health screening", keywords="primary care", keywords="digital tools", abstract="Background: With a growing adolescent mental health crisis, pediatric societies are increasingly recommending that primary care providers (PCPs) engage in mental health screening. While symptom-level screens identify symptoms, novel technology interventions can assist PCPs with providing additional point-of-care guidance to increase uptake for behavioral health services. Objective: In this study, we sought community PCP feedback on a web-based, digitally enhanced mental health screening tool for adolescents in primary care previously only evaluated in research studies to inform implementation in community settings. Methods: A total of 10 adolescent providers were recruited to trial the new screening tool and participate in structured interviews based on the Consolidated Framework for Implementation Research domains. Interviews were audio recorded, transcribed, and coded according to a prespecified codebook using a template analysis approach. Results: Providers identified improving mental health screening and treatment in pediatric primary care as a priority and agreed that a web-based digitally enhanced screening tool could help facilitate identification of and management of adolescent depression. Salient barriers identified were lack of electronic health record integration, time to administer screening, implications on clinic workflow, accessibility, and lack of transparency within health care organizations about the process of approving new technologies for clinical use. Providers made multiple suggestions to enhance implementation in community settings, such as incorporating customization options. Conclusions: Technology interventions can help address the need for improved behavioral health support in primary care settings. However, numerous barriers exist, complicating implementation of new technologies in real-world settings. ", doi="10.2196/67624", url="https://formative.jmir.org/2025/1/e67624" } @Article{info:doi/10.2196/63105, author="Templeton, Michael John and Poellabauer, Christian and Schneider, Sandra and Rahimi, Morteza and Braimoh, Taofeek and Tadamarry, Fhaheem and Margolesky, Jason and Burke, Shanna and Al Masry, Zeina", title="Modernizing the Staging of Parkinson Disease Using Digital Health Technology", journal="J Med Internet Res", year="2025", month="Apr", day="4", volume="27", pages="e63105", keywords="digital health", keywords="Parkinson disease", keywords="disease classification", keywords="wearables", keywords="personalized medicine", keywords="neurocognition", keywords="artificial intelligence", keywords="AI", doi="10.2196/63105", url="https://www.jmir.org/2025/1/e63105" } @Article{info:doi/10.2196/65178, author="West, Matthew and Cheng, You and He, Yingnan and Leng, Yu and Magdamo, Colin and Hyman, T. Bradley and Dickson, R. John and Serrano-Pozo, Alberto and Blacker, Deborah and Das, Sudeshna", title="Unsupervised Deep Learning of Electronic Health Records to Characterize Heterogeneity Across Alzheimer Disease and Related Dementias: Cross-Sectional Study", journal="JMIR Aging", year="2025", month="Mar", day="31", volume="8", pages="e65178", keywords="Alzheimer disease and related dementias", keywords="electronic health records", keywords="large language models", keywords="clustering", keywords="unsupervised learning", abstract="Background: Alzheimer disease and related dementias (ADRD) exhibit prominent heterogeneity. Identifying clinically meaningful ADRD subtypes is essential for tailoring treatments to specific patient phenotypes. Objective: We aimed to use unsupervised learning techniques on electronic health records (EHRs) from memory clinic patients to identify ADRD subtypes. Methods: We used pretrained embeddings of non-ADRD diagnosis codes (International Classification of Diseases, Ninth Revision) and large language model (LLM)--derived embeddings of clinical notes from patient EHRs. Hierarchical clustering of these embeddings was used to identify ADRD subtypes. Clusters were characterized regarding their demographic and clinical features. Results: We analyzed a cohort of 3454 patients with ADRD from a memory clinic at Massachusetts General Hospital, each with a specialist diagnosis. Clustering pretrained embeddings of the non-ADRD diagnosis codes in patient EHRs revealed the following 3 patient subtypes: one with skin conditions, another with psychiatric disorders and an earlier age of onset, and a third with diabetes complications. Similarly, using LLM-derived embeddings of clinical notes, we identified 3 subtypes of patients as follows: one with psychiatric manifestations and higher prevalence of female participants (prevalence ratio: 1.59), another with cardiovascular and motor problems and higher prevalence of male participants (prevalence ratio: 1.75), and a third one with geriatric health disorders. Notably, we observed significant overlap between clusters from both data modalities ($\chi$24=89.4; P<.001). Conclusions: By integrating International Classification of Diseases, Ninth Revision codes and LLM-derived embeddings, our analysis delineated 2 distinct ADRD subtypes with sex-specific comorbid and clinical presentations, offering insights for potential precision medicine approaches. ", doi="10.2196/65178", url="https://aging.jmir.org/2025/1/e65178" } @Article{info:doi/10.2196/57986, author="Schnepper, Rebekka and Roemmel, Noa and Schaefert, Rainer and Lambrecht-Walzinger, Lena and Meinlschmidt, Gunther", title="Exploring Biases of Large Language Models in the Field of Mental Health: Comparative Questionnaire Study of the Effect of Gender and Sexual Orientation in Anorexia Nervosa and Bulimia Nervosa Case Vignettes", journal="JMIR Ment Health", year="2025", month="Mar", day="20", volume="12", pages="e57986", keywords="anorexia nervosa", keywords="artificial intelligence", keywords="bulimia nervosa", keywords="ChatGPT", keywords="eating disorders", keywords="LLM", keywords="responsible AI", keywords="transformer", keywords="bias", keywords="large language model", keywords="gender", keywords="vignette", keywords="quality of life", keywords="symptomatology", keywords="questionnaire", keywords="generative AI", keywords="mental health", keywords="AI", abstract="Background: Large language models (LLMs) are increasingly used in mental health, showing promise in assessing disorders. However, concerns exist regarding their accuracy, reliability, and fairness. Societal biases and underrepresentation of certain populations may impact LLMs. Because LLMs are already used for clinical practice, including decision support, it is important to investigate potential biases to ensure a responsible use of LLMs. Anorexia nervosa (AN) and bulimia nervosa (BN) show a lifetime prevalence of 1\%?2\%, affecting more women than men. Among men, homosexual men face a higher risk of eating disorders (EDs) than heterosexual men. However, men are underrepresented in ED research, and studies on gender, sexual orientation, and their impact on AN and BN prevalence, symptoms, and treatment outcomes remain limited. Objectives: We aimed to estimate the presence and size of bias related to gender and sexual orientation produced by a common LLM as well as a smaller LLM specifically trained for mental health analyses, exemplified in the context of ED symptomatology and health-related quality of life (HRQoL) of patients with AN or BN. Methods: We extracted 30 case vignettes (22 AN and 8 BN) from scientific papers. We adapted each vignette to create 4 versions, describing a female versus male patient living with their female versus male partner (2 {\texttimes} 2 design), yielding 120 vignettes. We then fed each vignette into ChatGPT-4 and to ``MentaLLaMA'' based on the Large Language Model Meta AI (LLaMA) architecture thrice with the instruction to evaluate them by providing responses to 2 psychometric instruments, the RAND-36 questionnaire assessing HRQoL and the eating disorder examination questionnaire. With the resulting LLM-generated scores, we calculated multilevel models with a random intercept for gender and sexual orientation (accounting for within-vignette variance), nested in vignettes (accounting for between-vignette variance). Results: In ChatGPT-4, the multilevel model with 360 observations indicated a significant association with gender for the RAND-36 mental composite summary (conditional means: 12.8 for male and 15.1 for female cases; 95\% CI of the effect --6.15 to ?0.35; P=.04) but neither with sexual orientation (P=.71) nor with an interaction effect (P=.37). We found no indications for main effects of gender (conditional means: 5.65 for male and 5.61 for female cases; 95\% CI --0.10 to 0.14; P=.88), sexual orientation (conditional means: 5.63 for heterosexual and 5.62 for homosexual cases; 95\% CI --0.14 to 0.09; P=.67), or for an interaction effect (P=.61, 95\% CI --0.11 to 0.19) for the eating disorder examination questionnaire overall score (conditional means 5.59?5.65 95\% CIs 5.45 to 5.7). MentaLLaMA did not yield reliable results. Conclusions: LLM-generated mental HRQoL estimates for AN and BN case vignettes may be biased by gender, with male cases scoring lower despite no real-world evidence supporting this pattern. This highlights the risk of bias in generative artificial intelligence in the field of mental health. Understanding and mitigating biases related to gender and other factors, such as ethnicity, and socioeconomic status are crucial for responsible use in diagnostics and treatment recommendations. ", doi="10.2196/57986", url="https://mental.jmir.org/2025/1/e57986" } @Article{info:doi/10.2196/63962, author="Paz-Arbaizar, Leire and Lopez-Castroman, Jorge and Art{\'e}s-Rodr{\'i}guez, Antonio and Olmos, M. Pablo and Ram{\'i}rez, David", title="Emotion Forecasting: A Transformer-Based Approach", journal="J Med Internet Res", year="2025", month="Mar", day="18", volume="27", pages="e63962", keywords="affect", keywords="emotional valence", keywords="machine learning", keywords="mental disorder", keywords="monitoring", keywords="mood", keywords="passive data", keywords="Patient Health Questionnaire-9", keywords="PHQ-9", keywords="psychological distress", keywords="time-series forecasting", abstract="Background: Monitoring the emotional states of patients with psychiatric problems has always been challenging due to the noncontinuous nature of clinical assessments, the effect of the health care environment, and the inherent subjectivity of evaluation instruments. However, mental states in psychiatric disorders exhibit substantial variability over time, making real-time monitoring crucial for preventing risky situations and ensuring appropriate treatment. Objective: This study aimed to leverage new technologies and deep learning techniques to enable more objective, real-time monitoring of patients. This was achieved by passively monitoring variables such as step count, patient location, and sleep patterns using mobile devices. We aimed to predict patient self-reports and detect sudden variations in their emotional valence, identifying situations that may require clinical intervention. Methods: Data for this project were collected using the Evidence-Based Behavior (eB2) app, which records both passive and self-reported variables daily. Passive data refer to behavioral information gathered via the eB2 app through sensors embedded in mobile devices and wearables. These data were obtained from studies conducted in collaboration with hospitals and clinics that used eB2. We used hidden Markov models (HMMs) to address missing data and transformer deep neural networks for time-series forecasting. Finally, classification algorithms were applied to predict several variables, including emotional state and responses to the Patient Health Questionnaire-9. Results: Through real-time patient monitoring, we demonstrated the ability to accurately predict patients' emotional states and anticipate changes over time. Specifically, our approach achieved high accuracy (0.93) and a receiver operating characteristic (ROC) area under the curve (AUC) of 0.98 for emotional valence classification. For predicting emotional state changes 1 day in advance, we obtained an ROC AUC of 0.87. Furthermore, we demonstrated the feasibility of forecasting responses to the Patient Health Questionnaire-9, with particularly strong performance for certain questions. For example, in question 9, related to suicidal ideation, our model achieved an accuracy of 0.9 and an ROC AUC of 0.77 for predicting the next day's response. Moreover, we illustrated the enhanced stability of multivariate time-series forecasting when HMM preprocessing was combined with a transformer model, as opposed to other time-series forecasting methods, such as recurrent neural networks or long short-term memory cells. Conclusions: The stability of multivariate time-series forecasting improved when HMM preprocessing was combined with a transformer model, as opposed to other time-series forecasting methods (eg, recurrent neural network and long short-term memory), leveraging the attention mechanisms to capture longer time dependencies and gain interpretability. We showed the potential to assess the emotional state of a patient and the scores of psychiatric questionnaires from passive variables in advance. This allows real-time monitoring of patients and hence better risk detection and treatment adjustment. ", doi="10.2196/63962", url="https://www.jmir.org/2025/1/e63962" } @Article{info:doi/10.2196/60844, author="Gulec, Hayriye and Muzik, Michal and Smahel, David and Dedkova, Lenka", title="Longitudinal Associations Between Adolescents' mHealth App Use, Body Dissatisfaction, and Physical Self-Worth: Random Intercept Cross-Lagged Panel Study", journal="JMIR Ment Health", year="2025", month="Mar", day="11", volume="12", pages="e60844", keywords="mHealth app", keywords="body dissatisfaction", keywords="physical self-worth", keywords="random intercept cross-lagged panel model", keywords="RI-CLPM", keywords="longitudinal study", keywords="adolescent", abstract="Background: Longitudinal investigation of the association between mobile health (mHealth) app use and attitudes toward one's body during adolescence is scarce. mHealth apps might shape adolescents' body image perceptions by influencing their attitudes toward their bodies. Adolescents might also use mHealth apps based on how they feel and think about their bodies. Objective: This prospective study examined the longitudinal within-person associations between mHealth app use, body dissatisfaction, and physical self-worth during adolescence. Methods: The data were gathered from a nationally representative sample of Czech adolescents aged between 11 and 16 years (N=2500; n=1250, 50\% girls; mean age 13.43, SD 1.69 years) in 3 waves with 6-month intervals. Participants completed online questionnaires assessing their mHealth app use, physical self-worth, and body dissatisfaction at each wave. The mHealth app use was determined by the frequency of using sports, weight management, and nutritional intake apps. Physical self-worth was assessed using the physical self-worth subscale of the Physical Self Inventory-Short Form. Body dissatisfaction was measured with the items from the body dissatisfaction subscale of the Eating Disorder Inventory-3. The random intercept cross-lagged panel model examined longitudinal within-person associations between the variables. A multigroup design was used to compare genders. Due to the missing values, the final analyses used data from 2232 adolescents (n=1089, 48.8\% girls; mean age 13.43, SD 1.69 years). Results: The results revealed a positive within-person effect of mHealth app use on the physical self-worth of girls: increased mHealth app use predicted higher physical self-worth 6 months later ($\beta$=.199, P=.04). However, this effect was not consistent from the 6th to the 12th month: a within-person increase in using apps in the 6th month did not predict changes in girls' physical self-worth in the 12th month ($\beta$=.161, P=.07). Regardless of gender, the within-person changes in the frequency of using apps did not influence adolescents' body dissatisfaction. In addition, neither body dissatisfaction nor physical self-worth predicted app use frequency at the within-person level. Conclusions: This study highlighted that within-person changes in using mHealth apps were differentially associated with adolescents' body-related attitudes. While increased use of mHealth apps did not influence body dissatisfaction across genders, it significantly predicted higher physical self-worth in adolescent girls 6 months later. A similar association was not observed among boys after 6 months. These findings indicate that using mHealth apps is unlikely to have a detrimental impact on adolescents' body dissatisfaction and physical self-worth; instead, they may have a positive influence, particularly in boosting the physical self-worth of adolescent girls. ", doi="10.2196/60844", url="https://mental.jmir.org/2025/1/e60844" } @Article{info:doi/10.2196/64352, author="Gomes da Rocha, Carla and von Gunten, Armin and Vandel, Pierre and Jopp, S. Daniela and Ribeiro, Olga and Verloo, Henk", title="Building Consensus on the Relevant Criteria to Screen for Depressive Symptoms Among Near-Centenarians and Centenarians: Modified e-Delphi Study", journal="JMIR Aging", year="2025", month="Mar", day="5", volume="8", pages="e64352", keywords="centenarians", keywords="near-centenarians", keywords="depressive symptoms", keywords="depression diagnosis", keywords="screening", keywords="assessment", keywords="e-Delphi technique", keywords="web-based survey", abstract="Background: The number of centenarians worldwide is expected to increase dramatically, reaching 3.4 million by 2050 and >25 million by 2100. Despite these projections, depression remains a prevalent yet underdiagnosed and undertreated condition among this population that carries significant health risks. Objective: This study aimed to identify and achieve consensus on the most representative signs and symptoms of depression in near-centenarians and centenarians (aged ?95 years) through an e-Delphi study with an international and interdisciplinary panel of experts. Ultimately, the outcomes of this study might help create a screening instrument that is specifically designed for this unique population. Methods: A modified e-Delphi study was carried out to achieve expert consensus on depressive symptoms in near-centenarians and centenarians. A panel of 28 international experts was recruited. Consensus was defined as 70\% agreement on the relevance of each item. Data were collected through a web-based questionnaire over 3 rounds. Experts rated 104 items that were divided into 24 dimensions and 80 criteria to identify the most representative signs and symptoms of depression in this age group. Results: The panel consisted of experts from various countries, including physicians with experience in old age psychiatry or geriatrics as well as nurses and psychologists. The response rate remained consistent over the rounds (20/28, 71\% to 21/28, 75\%). In total, 4 new dimensions and 8 new criteria were proposed by the experts, and consensus was reached on 86\% (24/28) of the dimensions and 80\% (70/88) of the criteria. The most consensual potentially relevant dimensions were lack of hope (21/21, 100\%), loss of interest (27/28, 96\%), lack of reactivity to pleasant events (27/28, 96\%), depressed mood (26/28, 93\%), and previous episodes of depression or diagnosed depression (19/21, 90\%). In addition, the most consensual potentially relevant criteria were despondency, gloom, and despair (25/25, 100\%); depressed (27/27, 100\%); lack of reactivity to pleasant events or circumstances (28/28, 100\%); suicidal ideation (28/28, 100\%); suicide attempt(s) (28/28, 100\%); ruminations (27/28, 96\%); recurrent thoughts of death or suicide (27/28, 96\%); feelings of worthlessness (25/26, 96\%); critical life events (20/21, 95\%); anhedonia (20/21, 95\%); loss of interest in activities (26/28, 93\%); loss of pleasure in activities (26/28, 93\%); and sadness (24/26, 92\%). Moreover, when assessing depression in very old age, the duration, number, frequency, and severity of signs and symptoms should also be considered, as evidenced by the high expert agreement. Conclusions: The classification of most elements as relevant highlights the importance of a multidimensional approach for optimal depression screening among individuals of very old age. This study offers a first step toward improving depression assessment in near-centenarians and centenarians. The development of a more adapted screening tool could improve early detection and intervention, enhancing the quality of mental health care for this population. ", doi="10.2196/64352", url="https://aging.jmir.org/2025/1/e64352", url="http://www.ncbi.nlm.nih.gov/pubmed/40053803" } @Article{info:doi/10.2196/65292, author="Huynh, Duong and Sun, Kevin and Patterson, Mary and Hosseini Ghomi, Reza and Huang, Bin", title="Performance of a Digital Cognitive Assessment in Predicting Dementia Stages Delineated by the Dementia Severity Rating Scale: Retrospective Study", journal="JMIR Aging", year="2025", month="Feb", day="26", volume="8", pages="e65292", keywords="stage", keywords="severity", keywords="progression", keywords="correlation", keywords="association", keywords="cognitive impairment", keywords="functional activities", keywords="cognitive assessment", keywords="BrainCheck", keywords="dementia", keywords="Alzheimer disease", keywords="gerontology", keywords="geriatric", keywords="old", keywords="elderly", keywords="aging", keywords="retrospective analysis", keywords="digital assessment", keywords="patient assessment", keywords="digital cognitive assessment", keywords="digital health", keywords="neurodegeneration", keywords="memory loss", keywords="memory function", keywords="risk factors", abstract="Background: Dementia is characterized by impairments in an individual's cognitive and functional abilities. Digital cognitive assessments have been shown to be effective in detecting mild cognitive impairment and dementia, but whether they can stage the disease remains to be studied. Objective: In this study, we examined (1) the correlation between scores obtained from BrainCheck standard battery of cognitive assessments (BC-Assess), a digital cognitive assessment, and scores obtained from the Dementia Severity Rating Scale (DSRS), and (2) the accuracy of using the BC-Assess score to predict dementia stage delineated by the DSRS score. We also explored whether BC-Assess can be combined with information from the Katz Index of Independence in activities of daily living (ADL) to obtain enhanced accuracy. Methods: Retrospective analysis was performed on a BrainCheck dataset containing 1751 patients with dementia with different cognitive and functional assessments completed for cognitive care planning, including the DSRS, the ADL, and the BC-Assess. The patients were staged according to their DSRS total score (DSRS-TS): 982 mild (DSRS-TS 10?18), 656 moderate (DSRS-TS 19-26), and 113 severe (DSRS-TS 37-54) patients. Pearson correlation was used to assess the associations between BC-Assess overall score (BC-OS), ADL total score (ADL-TS), and DSRS-TS. Logistic regression was used to evaluate the possibility of using patients' BC-OS and ADL-TS to predict their stage. Results: We found moderate Pearson correlations between DSRS-TS and BC-OS (r=?0.53), between DSRS-TS and ADL-TS (r=?0.55), and a weak correlation between BC-OS and ADL-TS (r=0.37). Both BC-OS and ADL-TS significantly decreased with increasing severity. BC-OS demonstrated to be a good predictor of dementia stages, with an area under the receiver operating characteristic curve (ROC-AUC) of classification using logistic regression ranging from .733 to .917. When BC-Assess was combined with ADL, higher prediction accuracies were achieved, with an ROC-AUC ranging from 0.786 to 0.961. Conclusions: Our results suggest that BC-Assess could serve as an effective alternative tool to DSRS for grading dementia severity, particularly in cases where DSRS, or other global assessments, may be challenging to obtain due to logistical and time constraints. ", doi="10.2196/65292", url="https://aging.jmir.org/2025/1/e65292" } @Article{info:doi/10.2196/68347, author="Hadar-Shoval, Dorit and Lvovsky, Maya and Asraf, Kfir and Shimoni, Yoav and Elyoseph, Zohar", title="The Feasibility of Large Language Models in Verbal Comprehension Assessment: Mixed Methods Feasibility Study", journal="JMIR Form Res", year="2025", month="Feb", day="24", volume="9", pages="e68347", keywords="large language models", keywords="verbal comprehension assessment", keywords="artificial intelligence", keywords="AI in psychodiagnostics", keywords="personalized intelligence tests", keywords="verbal comprehension index", keywords="Wechsler Adult Intelligence Scale", keywords="WAIS-III", keywords="psychological test validity", keywords="ethics in computerized cognitive assessment", abstract="Background: Cognitive assessment is an important component of applied psychology, but limited access and high costs make these evaluations challenging. Objective: This study aimed to examine the feasibility of using large language models (LLMs) to create personalized artificial intelligence--based verbal comprehension tests (AI-BVCTs) for assessing verbal intelligence, in contrast with traditional assessment methods based on standardized norms. Methods: We used a within-participants design, comparing scores obtained from AI-BVCTs with those from the Wechsler Adult Intelligence Scale (WAIS-III) verbal comprehension index (VCI). In total, 8 Hebrew-speaking participants completed both the VCI and AI-BVCT, the latter being generated using the LLM Claude. Results: The concordance correlation coefficient (CCC) demonstrated strong agreement between AI-BVCT and VCI scores (Claude: CCC=.75, 90\% CI 0.266-0.933; GPT-4: CCC=.73, 90\% CI 0.170-0.935). Pearson correlations further supported these findings, showing strong associations between VCI and AI-BVCT scores (Claude: r=.84, P<.001; GPT-4: r=.77, P=.02). No statistically significant differences were found between AI-BVCT and VCI scores (P>.05). Conclusions: These findings support the potential of LLMs to assess verbal intelligence. The study attests to the promise of AI-based cognitive tests in increasing the accessibility and affordability of assessment processes, enabling personalized testing. The research also raises ethical concerns regarding privacy and overreliance on AI in clinical work. Further research with larger and more diverse samples is needed to establish the validity and reliability of this approach and develop more accurate scoring procedures. ", doi="10.2196/68347", url="https://formative.jmir.org/2025/1/e68347" } @Article{info:doi/10.2196/64716, author="Harvey, Philip and Curiel-Cid, Rosie and Kallestrup, Peter and Mueller, Annalee and Rivera-Molina, Andrea and Czaja, Sara and Crocco, Elizabeth and Loewenstein, David", title="Digital Migration of the Loewenstein Acevedo Scales for Semantic Interference and Learning (LASSI-L): Development and Validation Study in Older Participants", journal="JMIR Ment Health", year="2025", month="Feb", day="19", volume="12", pages="e64716", keywords="mild cognitive impairment", keywords="cognitive challenge tests", keywords="elder", keywords="aging", keywords="amyloid biomarkers", keywords="cognition", keywords="cognitive decline", keywords="deterioration", keywords="semantic interference", keywords="Alzheimer disease", keywords="self-administered", keywords="voice recognition", keywords="technology", keywords="assessment study", keywords="accuracy", keywords="artificial intelligence", keywords="treatment", keywords="medication", keywords="mental health", keywords="biomarkers", keywords="amnesia", keywords="neurodegeneration", keywords="patient health", keywords="health monitoring", keywords="digital mental health", keywords="neuroscience", keywords="neurotechnology", keywords="Loewenstein Acevedo Scales for Semantic Interference and Learning", keywords="LASSI-L", keywords="digital Loewenstein-Acevedo Scales for Semantic Interference", keywords="LASSI-D", abstract="Background: The early detection of mild cognitive impairment is crucial for providing treatment before further decline. Cognitive challenge tests such as the Loewenstein-Acevedo Scales for Semantic Interference and Learning (LASSI-L) can identify individuals at highest risk for cognitive deterioration. Performance on elements of the LASSI-L, particularly proactive interference, correlate with the presence of critical Alzheimer disease biomarkers. However, in-person paper tests require skilled testers and are not practical in many community settings or for large-scale screening in prevention. Objective: This study reports on the development and initial validation of a self-administered computerized version of the Loewenstein-Acevedo Scales for Semantic Interference (LASSI), the digital LASSI (LASSI-D). A self-administered digital version, with an artificial intelligence--generated avatar assistant, was the migrated assessment. Methods: Cloud-based software was developed, using voice recognition technology, for English and Spanish versions of the LASSI-D. Participants were assessed with either the LASSI-L or LASSI-D first, in a sequential assessment study. Participants with amnestic mild cognitive impairment (aMCI; n=54) or normal cognition (NC; n=58) were also tested with traditional measures such as the Alzheimer Disease Assessment Scale-Cognition. We examined group differences in performance across the legacy and digital versions of the LASSI, as well as correlations between LASSI performance and other measures across the versions. Results: Differences on recall and intrusion variables between aMCI and NC samples on both versions were all statistically significant (all P<.001), with at least medium effect sizes (d>0.68). There were no statistically significant performance differences in these variables between legacy and digital administration in either sample (all P<.13). There were no language differences in any variables (P>.10), and correlations between LASSI variables and other cognitive variables were statistically significant (all P<.01). The most predictive legacy variables, proactive interference and failure to recover from proactive interference, were identical across legacy and migrated versions within groups and were identical to results of previous studies with the legacy LASSI-L. Classification accuracy was 88\% for NC and 78\% for aMCI participants. Conclusions: The results for the digital migration of the LASSI-D were highly convergent with the legacy LASSI-L. Across all indices of similarity, including sensitivity, criterion validity, classification accuracy, and performance, the versions converged across languages. Future studies will present additional validation data, including correlations with blood-based Alzheimer disease biomarkers and alternative forms. The current data provide convincing evidence of the use of a fully self-administered digitally migrated cognitive challenge test. ", doi="10.2196/64716", url="https://mental.jmir.org/2025/1/e64716" } @Article{info:doi/10.2196/66838, author="Shin, Hyeonsang and Seong, Woohyun and Woo, Yeonju and Kim, Joo-Hee and Park, Kwang-Rak and Lee, Hyuk Dong", title="Neural Mechanism of Cognitive Reserve in Acupuncture Stimulation: Protocol for a Randomized, Placebo-Controlled Functional Near-Infrared Spectroscopy Trial", journal="JMIR Res Protoc", year="2025", month="Feb", day="19", volume="14", pages="e66838", keywords="cognitive reserve", keywords="acupuncture", keywords="dementia", keywords="mild cognitive impairment", keywords="neuroimaging", keywords="fNIRS", keywords="brain connectivity", keywords="neural mechanism", keywords="RCT", keywords="randomized controlled trial", abstract="Background: Dementia is a clinical syndrome characterized by a progressive decline in various cognitive domains. Since there is still no treatment for dementia, early diagnosis and prevention are the best approaches. In this context, the cognitive reserve (CR) concept has received considerable attention in dementia research with regard to prognosis. It originates from discrepancies between the degree of brain pathology and clinical manifestations. Acupuncture, as a complementary intervention, has long been widely applied in neurological diseases in East Asia. At the macroscale level, how acupuncture stimulation affects neural activity concerning CR in normal aging and dementia is largely unknown. Objective: The aim of this study is to investigate the acute neural mechanisms of acupuncture stimulation concerning CR in the normal aging group and the group with cognitive impairment using neuroimaging methods. Methods: This study is a randomized, placebo-controlled trial. Participants without (n=30) and with cognitive impairment (n=30) will be randomly assigned to the verum or sham acupuncture groups. The verum acupuncture group will receive acupuncture stimulation at acupoints related to cognitive function and gain deqi sensation. The sham acupuncture group will receive superficial needling at nonacupoints not related to cognitive function. Each group will undergo cognitive function tests, functional near-infrared spectroscopy imaging before and after acupuncture stimulation, and an assessment of CR. The primary outcomes will be differences in resting brain activities according to disease status, differences in resting brain connectivity before and after acupuncture stimulation between the 2 groups, and changes in brain activity in relation to the CR index. The secondary outcomes will be brain connectivity or network metrics associated with CR and differences in neural activity between the cognitive task and resting states. Results: The recruitment began in August 2023; to date, there have been 50 participants, divided into 20 in the group with cognitive impairment and 30 in the unimpaired group. The recruitment process will continue until February 2025. Conclusions: CR refers to the individual susceptibility to age-related brain changes and pathologies in cognitive impairment, and it is a factor affecting the trajectories of the disease. Although acupuncture is a widely used intervention for various neurological diseases, including dementia, its mechanism associated with CR at the macroscale has not been clearly identified. This study could contribute to identifying the neural mechanisms of acupuncture stimulation associated with CR using neuroimaging methods and provide a basis for future longitudinal research. Trial Registration: Clinical Research Information Service of the Republic of Korea KCT0008719; https://tinyurl.com/ydv5537n International Registered Report Identifier (IRRID): DERR1-10.2196/66838 ", doi="10.2196/66838", url="https://www.researchprotocols.org/2025/1/e66838" } @Article{info:doi/10.2196/60754, author="Loch, Andrade Alexandre and Kotov, Roman", title="Promises and Pitfalls of Internet Search Data in Mental Health: Critical Review", journal="JMIR Ment Health", year="2025", month="Feb", day="18", volume="12", pages="e60754", keywords="privacy", keywords="stigma", keywords="online", keywords="prevention", keywords="internet", keywords="search data", keywords="mental health", keywords="health care", keywords="clinical information", keywords="World Health Organization", keywords="WHO", keywords="digital health", keywords="mental illness", keywords="digital technologies", keywords="social network", keywords="mobile health", keywords="mHealth", doi="10.2196/60754", url="https://mental.jmir.org/2025/1/e60754" } @Article{info:doi/10.2196/59015, author="Shimada, Hiroyuki and Doi, Takehiko and Tsutsumimoto, Kota and Makino, Keitaro and Harada, Kenji and Tomida, Kouki and Morikawa, Masanori and Makizako, Hyuma", title="A New Computer-Based Cognitive Measure for Early Detection of Dementia Risk (Japan Cognitive Function Test): Validation Study", journal="J Med Internet Res", year="2025", month="Feb", day="14", volume="27", pages="e59015", keywords="cognition", keywords="neurocognitive test", keywords="dementia", keywords="Alzheimer disease", keywords="aged", keywords="MMSE", keywords="cognitive impairment", keywords="Mini-Mental State Examination", keywords="monitoring", keywords="eHealth", abstract="Background: The emergence of disease-modifying treatment options for Alzheimer disease is creating a paradigm shift in strategies to identify patients with mild symptoms in primary care settings. Systematic reviews on digital cognitive tests reported that most showed diagnostic performance comparable with that of paper-and-pencil tests for mild cognitive impairment and dementia. However, most studies have small sample sizes, with fewer than 100 individuals, and are based on case-control or cross-sectional designs. Objective: This study aimed to examine the predictive validity of the Japanese Cognitive Function Test (J-Cog), a new computerized cognitive battery test, for dementia development. Methods: We randomly assigned 2520 older adults (average age 72.7, SD 6.7 years) to derivation and validation groups to determine and validate cutoff points for the onset of dementia. The Mini-Mental State Examination (MMSE) was used for comparison purposes. The J-Cog consists of 12 tasks that assess orientation, designation, attention and calculation, mental rotation, verbal fluency, sentence completion, working memory, logical reasoning, attention, common knowledge, word memory recall, and episodic memory recall. The onset of dementia was monitored for 60 months. In the derivation group, receiver operating characteristic curves were plotted to determine the MMSE and J-Cog cutoff points that best discriminated between the groups with and without dementia. In the validation group, Cox proportional regression models were developed to predict the associations of the group classified using the cutoff points of the J-Cog or MMSE with dementia incidence. Harrell C-statistic was estimated to summarize how well a predicted risk score described an observed sequence of events. The Akaike information criterion was calculated for relative goodness of fit, where lower absolute values indicate a better model fit. Results: Significant hazard ratios (HRs) for dementia incidence were found using the MMSE cutoff between 23 and 24 point (HR 1.93, 95\% CI 1.13-3.27) and the J-Cog cutoff between 43 and 44 points (HR 2.42, 95\% CI 1.50-3.93). In the total validation group, the C-statistic was above 0.8 for all cutoff points. Akaike information criterion with MMSE cutoff between 23 and 24 points as a reference showed a poor fit for MMSE cutoff between 28 and 29 points, and a good fit for the J-Cog cutoff between 43 and 44 points. Conclusions: The J-Cog has higher accuracy in predicting the development of dementia than the MMSE and has advantages for use in the community as a test of cognitive function, which can be administered by nonprofessionals. ", doi="10.2196/59015", url="https://www.jmir.org/2025/1/e59015" } @Article{info:doi/10.2196/66665, author="Mardini, T. Mamoun and Khalil, E. Georges and Bai, Chen and DivaKaran, Menon Aparna and Ray, M. Jessica", title="Identifying Adolescent Depression and Anxiety Through Real-World Data and Social Determinants of Health: Machine Learning Model Development and Validation", journal="JMIR Ment Health", year="2025", month="Feb", day="12", volume="12", pages="e66665", keywords="social determinants of health", keywords="adolescents", keywords="anxiety", keywords="depression", keywords="machine learning", keywords="real-world data", keywords="teenagers", keywords="youth", keywords="XGBoost", keywords="cross-validation technique", keywords="SHapley Additive exPlanation", keywords="mental health", keywords="mental disorder", keywords="mental illness", keywords="health outcomes", keywords="clinical data", abstract="Background: The prevalence of adolescent mental health conditions such as depression and anxiety has significantly increased. Despite the potential of machine learning (ML), there is a shortage of models that use real-world data (RWD) to enhance early detection and intervention for these conditions. Objective: This study aimed to identify depression and anxiety in adolescents using ML techniques on RWD and social determinants of health (SDoH). Methods: We analyzed RWD of adolescents aged 10?17 years, considering various factors such as demographics, prior diagnoses, prescribed medications, medical procedures, and laboratory measurements recorded before the onset of anxiety or depression. Clinical data were linked with SDoH at the block-level. Three separate models were developed to predict anxiety, depression, and both conditions. Our ML model of choice was Extreme Gradient Boosting (XGBoost) and we evaluated its performance using the nested cross-validation technique. To interpret the model predictions, we used the Shapley additive explanation method. Results: Our cohort included 52,054 adolescents, identifying 12,572 with anxiety, 7812 with depression, and 14,019 with either condition. The models achieved area under the curve values of 0.80 for anxiety, 0.81 for depression, and 0.78 for both combined. Excluding SDoH data had a minimal impact on model performance. Shapley additive explanation analysis identified gender, race, educational attainment, and various medical factors as key predictors of anxiety and depression. Conclusions: This study highlights the potential of ML in early identification of depression and anxiety in adolescents using RWD. By leveraging RWD, health care providers may more precisely identify at-risk adolescents and intervene earlier, potentially leading to improved mental health outcomes. ", doi="10.2196/66665", url="https://mental.jmir.org/2025/1/e66665" } @Article{info:doi/10.2196/63149, author="Downing, J. Gregory and Tramontozzi, M. Lucas and Garcia, Jackson and Villanueva, Emma", title="Harnessing Internet Search Data as a Potential Tool for Medical Diagnosis: Literature Review", journal="JMIR Ment Health", year="2025", month="Feb", day="11", volume="12", pages="e63149", keywords="health", keywords="informatics", keywords="internet search data", keywords="early diagnosis", keywords="web search", keywords="information technology", keywords="internet", keywords="machine learning", keywords="medical records", keywords="diagnosis", keywords="health care", keywords="self-diagnosis", keywords="detection", keywords="intervention", keywords="patient education", keywords="internet search", keywords="health-seeking behavior", keywords="artificial intelligence", keywords="AI", abstract="Background: The integration of information technology into health care has created opportunities to address diagnostic challenges. Internet searches, representing a vast source of health-related data, hold promise for improving early disease detection. Studies suggest that patterns in search behavior can reveal symptoms before clinical diagnosis, offering potential for innovative diagnostic tools. Leveraging advancements in machine learning, researchers have explored linking search data with health records to enhance screening and outcomes. However, challenges like privacy, bias, and scalability remain critical to its widespread adoption. Objective: We aimed to explore the potential and challenges of using internet search data in medical diagnosis, with a specific focus on diseases and conditions such as cancer, cardiovascular disease, mental and behavioral health, neurodegenerative disorders, and nutritional and metabolic diseases. We examined ethical, technical, and policy considerations while assessing the current state of research, identifying gaps and limitations, and proposing future research directions to advance this emerging field. Methods: We conducted a comprehensive analysis of peer-reviewed literature and informational interviews with subject matter experts to examine the landscape of internet search data use in medical research. We searched for published peer-reviewed literature on the PubMed database between October and December 2023. Results: Systematic selection based on predefined criteria included 40 articles from the 2499 identified articles. The analysis revealed a nascent domain of internet search data research in medical diagnosis, marked by advancements in analytics and data integration. Despite challenges such as bias, privacy, and infrastructure limitations, emerging initiatives could reshape data collection and privacy safeguards. Conclusions: We identified signals correlating with diagnostic considerations in certain diseases and conditions, indicating the potential for such data to enhance clinical diagnostic capabilities. However, leveraging internet search data for improved early diagnosis and health care outcomes requires effectively addressing ethical, technical, and policy challenges. By fostering interdisciplinary collaboration, advancing infrastructure development, and prioritizing patient engagement and consent, researchers can unlock the transformative potential of internet search data in medical diagnosis to ultimately enhance patient care and advance health care practice and policy. ", doi="10.2196/63149", url="https://mental.jmir.org/2025/1/e63149", url="http://www.ncbi.nlm.nih.gov/pubmed/39813106" } @Article{info:doi/10.2196/66104, author="Bai, Anying and He, Shan and Jiang, Yu and Xu, Weihao and Lin, Zhanyi", title="Comparison of 3 Aging Metrics in Dual Declines to Capture All-Cause Dementia and Mortality Risk: Cohort Study", journal="JMIR Aging", year="2025", month="Jan", day="30", volume="8", pages="e66104", keywords="gerontology", keywords="geriatrics", keywords="older adults", keywords="older people", keywords="aging", keywords="motoric cognitive risk syndrome", keywords="MCR", keywords="physio-cognitive decline syndrome", keywords="PCDS", keywords="cognitive frailty", keywords="CF", keywords="frailty", keywords="discrimination", keywords="risk factors", keywords="prediction", keywords="dementia risk", keywords="mortality risk", abstract="Background: The utility of aging metrics that incorporate cognitive and physical function is not fully understood. Objective: We aim to compare the predictive capacities of 3 distinct aging metrics---motoric cognitive risk syndrome (MCR), physio-cognitive decline syndrome (PCDS), and cognitive frailty (CF)---for incident dementia and all-cause mortality among community-dwelling older adults. Methods: We used longitudinal data from waves 10-15 of the Health and Retirement Study. Cox proportional hazards regression analysis was employed to evaluate the effects of MCR, PCDS, and CF on incident all-cause dementia and mortality, controlling for socioeconomic and lifestyle factors, as well as medical comorbidities. Discrimination analysis was conducted to assess and compare the predictive accuracy of the 3 aging metrics. Results: A total of 2367 older individuals aged 65 years and older, with no baseline prevalence of dementia or disability, were ultimately included. The prevalence rates of MCR, PCDS, and CF were 5.4\%, 6.3\%, and 1.3\%, respectively. Over a decade-long follow-up period, 341 cases of dementia and 573 deaths were recorded. All 3 metrics were predictive of incident all-cause dementia and mortality when adjusting for multiple confounders, with variations in the strength of their associations (incident dementia: MCR odds ratio [OR] 1.90, 95\% CI 1.30?2.78; CF 5.06, 95\% CI 2.87?8.92; PCDS 3.35, 95\% CI 2.44?4.58; mortality: MCR 1.60, 95\% CI 1.17?2.19; CF 3.26, 95\% CI 1.99?5.33; and PCDS 1.58, 95\% CI 1.17?2.13). The C-index indicated that PCDS and MCR had the highest discriminatory accuracy for all-cause dementia and mortality, respectively. Conclusions: Despite the inherent differences among the aging metrics that integrate cognitive and physical functions, they consistently identified risks of dementia and mortality. This underscores the importance of implementing targeted preventive strategies and intervention programs based on these metrics to enhance the overall quality of life and reduce premature deaths in aging populations. ", doi="10.2196/66104", url="https://aging.jmir.org/2025/1/e66104" } @Article{info:doi/10.2196/55308, author="Terhorst, Yannik and Messner, Eva-Maria and Opoku Asare, Kennedy and Montag, Christian and Kannen, Christopher and Baumeister, Harald", title="Investigating Smartphone-Based Sensing Features for Depression Severity Prediction: Observation Study", journal="J Med Internet Res", year="2025", month="Jan", day="30", volume="27", pages="e55308", keywords="smart sensing", keywords="digital phenotyping", keywords="depression", keywords="observation study", keywords="smartphone", keywords="mHealth", keywords="mobile health", keywords="app", keywords="mental health", keywords="symptoms", keywords="assessments", abstract="Background: Unobtrusively collected objective sensor data from everyday devices like smartphones provide a novel paradigm to infer mental health symptoms. This process, called smart sensing, allows a fine-grained assessment of various features (eg, time spent at home based on the GPS sensor). Based on its prevalence and impact, depression is a promising target for smart sensing. However, currently, it is unclear which sensor-based features should be used in depression severity prediction and if they hold an incremental benefit over established fine-grained assessments like the ecological momentary assessment (EMA). Objective: The aim of this study was to investigate various features based on the smartphone screen, app usage, and call sensor alongside EMA to infer depression severity. Bivariate, cluster-wise, and cluster-combined analyses were conducted to determine the incremental benefit of smart sensing features compared to each other and EMA in parsimonious regression models for depression severity. Methods: In this exploratory observational study, participants were recruited from the general population. Participants needed to be 18 years of age, provide written informed consent, and own an Android-based smartphone. Sensor data and EMA were collected via the INSIGHTS app. Depression severity was assessed using the 8-item Patient Health Questionnaire. Missing data were handled by multiple imputations. Correlation analyses were conducted for bivariate associations; stepwise linear regression analyses were used to find the best prediction models for depression severity. Models were compared by adjusted R2. All analyses were pooled across the imputed datasets according to Rubin's rule. Results: A total of 107 participants were included in the study. Ages ranged from 18 to 56 (mean 22.81, SD 7.32) years, and 78\% of the participants identified as female. Depression severity was subclinical on average (mean 5.82, SD 4.44; Patient Health Questionnaire score ?10: 18.7\%). Small to medium correlations were found for depression severity and EMA (eg, valence: r=--0.55, 95\% CI --0.67 to --0.41), and there were small correlations with sensing features (eg, screen duration: r=0.37, 95\% CI 0.20 to 0.53). EMA features could explain 35.28\% (95\% CI 20.73\% to 49.64\%) of variance and sensing features (adjusted R2=20.45\%, 95\% CI 7.81\% to 35.59\%). The best regression model contained EMA and sensing features (R2=45.15\%, 95\% CI 30.39\% to 58.53\%). Conclusions: Our findings underline the potential of smart sensing and EMA to infer depression severity as isolated paradigms and when combined. Although these could become important parts of clinical decision support systems for depression diagnostics and treatment in the future, confirmatory studies are needed before they can be applied to routine care. Furthermore, privacy, ethical, and acceptance issues need to be addressed. ", doi="10.2196/55308", url="https://www.jmir.org/2025/1/e55308", url="http://www.ncbi.nlm.nih.gov/pubmed/39883512" } @Article{info:doi/10.2196/54531, author="Sankesara, Heet and Denyer, Hayley and Sun, Shaoxiong and Deng, Qigang and Ranjan, Yatharth and Conde, Pauline and Rashid, Zulqarnain and Asherson, Philip and Bilbow, Andrea and Groom, J. Madeleine and Hollis, Chris and Dobson, B. Richard J. and Folarin, Amos and Kuntsi, Jonna", title="Identifying Digital Markers of Attention-Deficit/Hyperactivity Disorder (ADHD) in a Remote Monitoring Setting: Prospective Observational Study", journal="JMIR Form Res", year="2025", month="Jan", day="29", volume="9", pages="e54531", keywords="ADHD", keywords="smartphones", keywords="wearable devices", keywords="mobile health", keywords="mHealth", keywords="remote monitoring", keywords="surveillance", keywords="digital markers", keywords="attention-deficit/hyperactivity disorder", keywords="behavioral data", keywords="real world", keywords="adult", keywords="adolescent", keywords="participants", keywords="digital signals", keywords="restlessness", keywords="severity", keywords="predicting outcomes", abstract="Background: The symptoms and associated characteristics of attention-deficit/hyperactivity disorder (ADHD) are typically assessed in person at a clinic or in a research lab. Mobile health offers a new approach to obtaining additional passively and continuously measured real-world behavioral data. Using our new ADHD remote technology (ART) system, based on the Remote Assessment of Disease and Relapses (RADAR)--base platform, we explore novel digital markers for their potential to identify behavioral patterns associated with ADHD. The RADAR-base Passive App and wearable device collect sensor data in the background, while the Active App involves participants completing clinical symptom questionnaires. Objective: The main aim of this study was to investigate whether adults and adolescents with ADHD differ from individuals without ADHD on 10 digital signals that we hypothesize capture lapses in attention, restlessness, or impulsive behaviors. Methods: We collected data over 10 weeks from 20 individuals with ADHD and 20 comparison participants without ADHD between the ages of 16 and 39 years. We focus on features derived from (1) Active App (mean and SD of questionnaire notification response latency and of the time interval between questionnaires), (2) Passive App (daily mean and SD of response time to social and communication app notifications, the SD in ambient light during phone use, total phone use time, and total number of new apps added), and (3) a wearable device (Fitbit) (daily steps taken while active on the phone). Linear mixed models and t tests were employed to assess the group differences for repeatedly measured and time-aggregated variables, respectively. Effect sizes (d) convey the magnitude of differences. Results: Group differences were significant for 5 of the 10 variables. The participants with ADHD were (1) slower (P=.047, d=1.05) and more variable (P=.01, d=0.84) in their speed of responding to the notifications to complete the questionnaires, (2) had a higher SD in the time interval between questionnaires (P=.04, d=1.13), (3) had higher daily mean response time to social and communication app notifications (P=.03, d=0.7), and (4) had a greater change in ambient (background) light when they were actively using the smartphone (P=.008, d=0.86). Moderate to high effect sizes with nonsignificant P values were additionally observed for the mean of time intervals between questionnaires (P=.06, d=0.82), daily SD in responding to social and communication app notifications (P=.05, d=0.64), and steps taken while active on the phone (P=.09, d=0.61). The groups did not differ in the total phone use time (P=.11, d=0.54) and the number of new apps downloaded (P=.24, d=0.18). Conclusions: In a novel exploration of digital markers of ADHD, we identified candidate digital signals of restlessness, inconsistent attention, and difficulties completing tasks. Larger future studies are needed to replicate these findings and to assess the potential of such objective digital signals for tracking ADHD severity or predicting outcomes. ", doi="10.2196/54531", url="https://formative.jmir.org/2025/1/e54531" } @Article{info:doi/10.2196/66330, author="Choomung, Pichsinee and He, Yupeng and Matsunaga, Masaaki and Sakuma, Kenji and Kishi, Taro and Li, Yuanying and Tanihara, Shinichi and Iwata, Nakao and Ota, Atsuhiko", title="Estimating the Prevalence of Schizophrenia in the General Population of Japan Using an Artificial Neural Network--Based Schizophrenia Classifier: Web-Based Cross-Sectional Survey", journal="JMIR Form Res", year="2025", month="Jan", day="29", volume="9", pages="e66330", keywords="schizophrenia", keywords="schizophrenic", keywords="prevalence", keywords="artificial neural network", keywords="neural network", keywords="neural networks", keywords="ANN", keywords="deep learning", keywords="machine learning", keywords="SZ classifier", keywords="web-based survey", keywords="epidemiology", keywords="epidemiological", keywords="Japan", keywords="classifiers", keywords="mental illness", keywords="mental disorder", keywords="mental health", abstract="Background: Estimating the prevalence of schizophrenia in the general population remains a challenge worldwide, as well as in Japan. Few studies have estimated schizophrenia prevalence in the Japanese population and have often relied on reports from hospitals and self-reported physician diagnoses or typical schizophrenia symptoms. These approaches are likely to underestimate the true prevalence owing to stigma, poor insight, or lack of access to health care among respondents. To address these issues, we previously developed an artificial neural network (ANN)--based schizophrenia classification model (SZ classifier) using data from a large-scale Japanese web-based survey to enhance the comprehensiveness of schizophrenia case identification in the general population. In addition, we also plan to introduce a population-based survey to collect general information and sample participants matching the population's demographic structure, thereby achieving a precise estimate of the prevalence of schizophrenia in Japan. Objective: This study aimed to estimate the prevalence of schizophrenia by applying the SZ classifier to random samples from the Japanese population. Methods: We randomly selected a sample of 750 participants where the age, sex, and regional distributions were similar to Japan's demographic structure from a large-scale Japanese web-based survey. Demographic data, health-related backgrounds, physical comorbidities, psychiatric comorbidities, and social comorbidities were collected and applied to the SZ classifier, as this information was also used for developing the SZ classifier. The crude prevalence of schizophrenia was calculated through the proportion of positive cases detected by the SZ classifier. The crude estimate was further refined by excluding false-positive cases and including false-negative cases to determine the actual prevalence of schizophrenia. Results: Out of 750 participants, 62 were classified as schizophrenia cases by the SZ classifier, resulting in a crude prevalence of schizophrenia in the general population of Japan of 8.3\% (95\% CI 6.6\%-10.1\%). Among these 62 cases, 53 were presumed to be false positives, and 3 were presumed to be false negatives. After adjustment, the actual prevalence of schizophrenia in the general population was estimated to be 1.6\% (95\% CI 0.7\%-2.5\%). Conclusions: This estimated prevalence was slightly higher than that reported in previous studies, possibly due to a more comprehensive disease classification methodology or, conversely, model limitations. This study demonstrates the capability of an ANN-based model to improve the estimation of schizophrenia prevalence in the general population, offering a novel approach to public health analysis. ", doi="10.2196/66330", url="https://formative.jmir.org/2025/1/e66330" } @Article{info:doi/10.2196/65658, author="Ennis, Edel and Bond, Raymond and Mulvenna, Maurice and Sweeney, Colm", title="Understanding Individual Differences in Happiness Sources and Implications for Health Technology Design: Exploratory Analysis of an Open Dataset", journal="JMIR Form Res", year="2025", month="Jan", day="29", volume="9", pages="e65658", keywords="happiness", keywords="sexes", keywords="age", keywords="marital status", keywords="parents", keywords="affections", keywords="achievements", keywords="datasets", keywords="digital health", keywords="well-being", keywords="mental health", keywords="digital mental health interventions", keywords="regression analyses", keywords="evidence based", abstract="Background: Psychologists have developed frameworks to understand many constructs, which have subsequently informed the design of digital mental health interventions (DMHIs) aimed at improving mental health outcomes. The science of happiness is one such domain that holds significant applied importance due to its links to well-being and evidence that happiness can be cultivated through interventions. However, as with many constructs, the unique ways in which individuals experience happiness present major challenges for designing personalized DMHIs. Objective: This paper aims to (1) present an analysis of how sex may interact with age, marital status, and parental status to predict individual differences in sources of happiness, and (2) to present a preliminary discussion of how open datasets may contribute to the process of designing health-related technology innovations. Methods: The HappyDB is an open database of 100,535 statements of what people consider to have made them happy, with some people asking to consider the past 24 hours (49,831 statements) and some considering the last 3 months (50,704 statements). Demographic information is also provided. Binary logistic regression analyses are used to determine whether various groups differed in their likelihood of selecting or not selecting a category as a source of their happiness. Results: Sex and age interacted to influence what was selected as sources of happiness, with patterns being less consistent among female individuals in comparison with male individuals. For marital status, differences in sources of happiness were predominantly between married individuals and those who are divorced or separated, but these were the same for both sexes. Married, single, and widowed individuals were all largely similar in their likelihood of selecting each of the categories as a source of their happiness. However, there were some anomalies, and sex appeared to be important in these anomalies. Sex and parental status also interacted to influence what was selected as sources of happiness. Conclusions: Sex interacts with age, marital status, and parental status in the likelihood of reporting affection, bonding, leisure, achievement, or enjoying the moment as sources of happiness. The contribution of an open dataset to understanding individual differences in sources of happiness is discussed in terms of its potential role in addressing the challenges of designing DMHIs that are ethical, responsible, evidence based, acceptable, engaging, inclusive, and effective for users. The discussion considers how the content design of DMHIs in general may benefit from exploring new methods informed by diverse data sources. It is proposed that examining the extent to which insights from nondigital settings can inform requirements gathering for DMHIs is warranted. ", doi="10.2196/65658", url="https://formative.jmir.org/2025/1/e65658" } @Article{info:doi/10.2196/63809, author="Thomas, Julia and Lucht, Antonia and Segler, Jacob and Wundrack, Richard and Mich{\'e}, Marcel and Lieb, Roselind and Kuchinke, Lars and Meinlschmidt, Gunther", title="An Explainable Artificial Intelligence Text Classifier for Suicidality Prediction in Youth Crisis Text Line Users: Development and Validation Study", journal="JMIR Public Health Surveill", year="2025", month="Jan", day="29", volume="11", pages="e63809", keywords="deep learning", keywords="explainable artificial intelligence (XAI)", keywords="large language model (LLM)", keywords="machine learning", keywords="neural network", keywords="prevention", keywords="risk monitoring", keywords="suicide", keywords="transformer model", keywords="suicidality", keywords="suicidal ideation", keywords="self-murder", keywords="self-harm", keywords="youth", keywords="adolescent", keywords="adolescents", keywords="public health", keywords="language model", keywords="language models", keywords="chat protocols", keywords="crisis helpline", keywords="help-seeking behaviors", keywords="German", keywords="Shapley", keywords="decision-making", keywords="mental health", keywords="health informatics", keywords="mobile phone", abstract="Background: Suicide represents a critical public health concern, and machine learning (ML) models offer the potential for identifying at-risk individuals. Recent studies using benchmark datasets and real-world social media data have demonstrated the capability of pretrained large language models in predicting suicidal ideation and behaviors (SIB) in speech and text. Objective: This study aimed to (1) develop and implement ML methods for predicting SIBs in a real-world crisis helpline dataset, using transformer-based pretrained models as a foundation; (2) evaluate, cross-validate, and benchmark the model against traditional text classification approaches; and (3) train an explainable model to highlight relevant risk-associated features. Methods: We analyzed chat protocols from adolescents and young adults (aged 14-25 years) seeking assistance from a German crisis helpline. An ML model was developed using a transformer-based language model architecture with pretrained weights and long short-term memory layers. The model predicted suicidal ideation (SI) and advanced suicidal engagement (ASE), as indicated by composite Columbia-Suicide Severity Rating Scale scores. We compared model performance against a classical word-vector-based ML model. We subsequently computed discrimination, calibration, clinical utility, and explainability information using a Shapley Additive Explanations value-based post hoc estimation model. Results: The dataset comprised 1348 help-seeking encounters (1011 for training and 337 for testing). The transformer-based classifier achieved a macroaveraged area under the curve (AUC) receiver operating characteristic (ROC) of 0.89 (95\% CI 0.81-0.91) and an overall accuracy of 0.79 (95\% CI 0.73-0.99). This performance surpassed the word-vector-based baseline model (AUC-ROC=0.77, 95\% CI 0.64-0.90; accuracy=0.61, 95\% CI 0.61-0.80). The transformer model demonstrated excellent prediction for nonsuicidal sessions (AUC-ROC=0.96, 95\% CI 0.96-0.99) and good prediction for SI and ASE, with AUC-ROCs of 0.85 (95\% CI 0.97-0.86) and 0.87 (95\% CI 0.81-0.88), respectively. The Brier Skill Score indicated a 44\% improvement in classification performance over the baseline model. The Shapley Additive Explanations model identified language features predictive of SIBs, including self-reference, negation, expressions of low self-esteem, and absolutist language. Conclusions: Neural networks using large language model--based transfer learning can accurately identify SI and ASE. The post hoc explainer model revealed language features associated with SI and ASE. Such models may potentially support clinical decision-making in suicide prevention services. Future research should explore multimodal input features and temporal aspects of suicide risk. ", doi="10.2196/63809", url="https://publichealth.jmir.org/2025/1/e63809" } @Article{info:doi/10.2196/62914, author="Scribano Parada, Paz Mar{\'i}a de la and Gonz{\'a}lez Palau, F{\'a}tima and Valladares Rodr{\'i}guez, Sonia and Rincon, Mariano and Rico Barroeta, Jos{\'e} Maria and Garc{\'i}a Rodriguez, Marta and Bueno Aguado, Yolanda and Herrero Blanco, Ana and D{\'i}az-L{\'o}pez, Estela and Bachiller Mayoral, Margarita and Losada Dur{\'a}n, Raquel", title="Preclinical Cognitive Markers of Alzheimer Disease and Early Diagnosis Using Virtual Reality and Artificial Intelligence: Literature Review", journal="JMIR Med Inform", year="2025", month="Jan", day="28", volume="13", pages="e62914", keywords="dementia", keywords="Alzheimer disease", keywords="mild cognitive impairment", keywords="virtual reality", keywords="artificial intelligence", keywords="early detection", keywords="qualitative review", keywords="literature review", keywords="AI", abstract="Background: This review explores the potential of virtual reality (VR) and artificial intelligence (AI) to identify preclinical cognitive markers of Alzheimer disease (AD). By synthesizing recent studies, it aims to advance early diagnostic methods to detect AD before significant symptoms occur. Objective: Research emphasizes the significance of early detection in AD during the preclinical phase, which does not involve cognitive impairment but nevertheless requires reliable biomarkers. Current biomarkers face challenges, prompting the exploration of cognitive behavior indicators beyond episodic memory. Methods: Using PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) guidelines, we searched Scopus, PubMed, and Google Scholar for studies on neuropsychiatric disorders utilizing conversational data. Results: Following an analysis of 38 selected articles, we highlight verbal episodic memory as a sensitive preclinical AD marker, with supporting evidence from neuroimaging and genetic profiling. Executive functions precede memory decline, while processing speed is a significant correlate. The potential of VR remains underexplored, and AI algorithms offer a multidimensional approach to early neurocognitive disorder diagnosis. Conclusions: Emerging technologies like VR and AI show promise for preclinical diagnostics, but thorough validation and regulation for clinical safety and efficacy are necessary. Continued technological advancements are expected to enhance early detection and management of AD. ", doi="10.2196/62914", url="https://medinform.jmir.org/2025/1/e62914" } @Article{info:doi/10.2196/65454, author="Cardamone, C. Nicholas and Olfson, Mark and Schmutte, Timothy and Ungar, Lyle and Liu, Tony and Cullen, W. Sara and Williams, J. Nathaniel and Marcus, C. Steven", title="Classifying Unstructured Text in Electronic Health Records for Mental Health Prediction Models: Large Language Model Evaluation Study", journal="JMIR Med Inform", year="2025", month="Jan", day="21", volume="13", pages="e65454", keywords="artificial intelligence", keywords="AI", keywords="machine learning", keywords="ML", keywords="natural language processing", keywords="NLP", keywords="large language model", keywords="LLM", keywords="ChatGPT", keywords="predictive modeling", keywords="mental health", keywords="health informatics", keywords="electronic health record", keywords="EHR", keywords="EHR system", keywords="text", keywords="dataset", keywords="mental health disorder", keywords="emergency department", keywords="physical health", abstract="Background: Prediction models have demonstrated a range of applications across medicine, including using electronic health record (EHR) data to identify hospital readmission and mortality risk. Large language models (LLMs) can transform unstructured EHR text into structured features, which can then be integrated into statistical prediction models, ensuring that the results are both clinically meaningful and interpretable. Objective: This study aims to compare the classification decisions made by clinical experts with those generated by a state-of-the-art LLM, using terms extracted from a large EHR data set of individuals with mental health disorders seen in emergency departments (EDs). Methods: Using a dataset from the EHR systems of more than 50 health care provider organizations in the United States from 2016 to 2021, we extracted all clinical terms that appeared in at least 1000 records of individuals admitted to the ED for a mental health--related problem from a source population of over 6 million ED episodes. Two experienced mental health clinicians (one medically trained psychiatrist and one clinical psychologist) reached consensus on the classification of EHR terms and diagnostic codes into categories. We evaluated an LLM's agreement with clinical judgment across three classification tasks as follows: (1) classify terms into ``mental health'' or ``physical health'', (2) classify mental health terms into 1 of 42 prespecified categories, and (3) classify physical health terms into 1 of 19 prespecified broad categories. Results: There was high agreement between the LLM and clinical experts when categorizing 4553 terms as ``mental health'' or ``physical health'' ($\kappa$=0.77, 95\% CI 0.75-0.80). However, there was still considerable variability in LLM-clinician agreement on the classification of mental health terms ($\kappa$=0.62, 95\% CI 0.59?0.66) and physical health terms ($\kappa$=0.69, 95\% CI 0.67?0.70). Conclusions: The LLM displayed high agreement with clinical experts when classifying EHR terms into certain mental health or physical health term categories. However, agreement with clinical experts varied considerably within both sets of mental and physical health term categories. Importantly, the use of LLMs presents an alternative to manual human coding, presenting great potential to create interpretable features for prediction models. ", doi="10.2196/65454", url="https://medinform.jmir.org/2025/1/e65454" } @Article{info:doi/10.2196/58649, author="Zhang, Ren and Liu, Yi and Zhang, Zhiwei and Luo, Rui and Lv, Bin", title="Interpretable Machine Learning Model for Predicting Postpartum Depression: Retrospective Study", journal="JMIR Med Inform", year="2025", month="Jan", day="20", volume="13", pages="e58649", keywords="postpartum depression", keywords="machine learning", keywords="predictive model", keywords="risk factors", keywords="XGBoost", keywords="extreme gradient boosting", keywords="PPD", abstract="Background: Postpartum depression (PPD) is a prevalent mental health issue with significant impacts on mothers and families. Exploring reliable predictors is crucial for the early and accurate prediction of PPD, which remains challenging. Objective: This study aimed to comprehensively collect variables from multiple aspects, develop and validate machine learning models to achieve precise prediction of PPD, and interpret the model to reveal clinical implications. Methods: This study recruited pregnant women who delivered at the West China Second University Hospital, Sichuan University. Various variables were collected from electronic medical record data and screened using least absolute shrinkage and selection operator penalty regression. Participants were divided into training (1358/2055, 66.1\%) and validation (697/2055, 33.9\%) sets by random sampling. Machine learning--based predictive models were developed in the training cohort. Models were validated in the validation cohort with receiver operating curve and decision curve analysis. Multiple model interpretation methods were implemented to explain the optimal model. Results: We recruited 2055 participants in this study. The extreme gradient boosting model was the optimal predictive model with the area under the receiver operating curve of 0.849. Shapley Additive Explanation indicated that the most influential predictors of PPD were antepartum depression, lower fetal weight, elevated thyroid-stimulating hormone, declined thyroid peroxidase antibodies, elevated serum ferritin, and older age. Conclusions: This study developed and validated a machine learning--based predictive model for PPD. Several significant risk factors and how they impact the prediction of PPD were revealed. These findings provide new insights into the early screening of individuals with high risk for PPD, emphasizing the need for comprehensive screening approaches that include both physiological and psychological factors. ", doi="10.2196/58649", url="https://medinform.jmir.org/2025/1/e58649" } @Article{info:doi/10.2196/51602, author="Namatovu, Kasujja Hasifah and Magumba, Abraham Mark and Akena, Dickens", title="E-Screening for Prenatal Depression in Kampala, Uganda Using the Edinburgh Postnatal Depression Scale: Survey Results", journal="Online J Public Health Inform", year="2025", month="Jan", day="14", volume="17", pages="e51602", keywords="perinatal", keywords="prenatal", keywords="antenatal", keywords="antepartum", keywords="depression", keywords="Edinburgh Postnatal Depression Scale", abstract="Background: Perinatal depression remains a substantial public health challenge, often overlooked or incorrectly diagnosed in numerous low-income nations. Objective: The goal of this study was to establish statistical baselines for the prevalence of perinatal depression in Kampala and understand its relationship with key demographic variables. Methods: We employed an Android-based implementation of the Edinburgh Postnatal Depression Scale (EPDS) to survey 12,913 women recruited from 7 government health facilities located in Kampala, Uganda. We used the standard EPDS cutoff, which classifies women with total scores above 13 as possibly depressed and those below 13 as not depressed. The $\chi$2 test of independence was used to determine the most influential categorical variables. We further analyzed the most influential categorical variable using odds ratios. For continuous variables such as age and the weeks of gestation, we performed a simple correlation analysis. Results: We found that 21.5\% (2783/12,913, 95\% CI 20.8\%?22.3\%) were possibly depressed. Respondents' relationship category was found to be the most influential variable ($\chi$21=806.9, P<.001; Cramer's V=0.25), indicating a small effect size. Among quantitative variables, we found a weak negative correlation between respondents' age and the total EPDS score (r=?0.11, P<.001). Similarly, a weak negative correlation was also observed between the total EPDS score and the number of previous children of the respondent (r=?0.07, P<.001). Moreover, a weak positive correlation was noted between weeks of gestation and the total EPDS score (r=0.02, P=.05) Conclusions: This study shows that demographic factors such as spousal employment category, age, and relationship status have an influence on the respondents' EPDS scores. These variables may serve as proxies for latent factors such as financial stability and emotional support. ", doi="10.2196/51602", url="https://ojphi.jmir.org/2025/1/e51602" } @Article{info:doi/10.2196/56679, author="Zawada, J. Stephanie and Ganjizadeh, Ali and Conte, Marco Gian and Demaerschalk, M. Bart and Erickson, J. Bradley", title="Exploring Remote Monitoring of Poststroke Mood With Digital Sensors by Assessment of Depression Phenotypes and Accelerometer Data in UK Biobank: Cross-Sectional Analysis", journal="JMIR Neurotech", year="2025", month="Jan", day="10", volume="4", pages="e56679", keywords="depression", keywords="cerebrovascular disease", keywords="remote monitoring", keywords="stroke", keywords="accelerometers", keywords="mobile phone", abstract="Background: Interest in using digital sensors to monitor patients with prior stroke for depression, a risk factor for poor outcomes, has grown rapidly; however, little is known about behavioral phenotypes related to future mood symptoms and if patients with and without previously diagnosed depression experience similar phenotypes. Objective: This study aimed to assess the feasibility of using digital sensors to monitor mood in patients with prior stroke with a prestroke depression diagnosis (DD) and controls. We examined relationships between physical activity behaviors and self-reported depression frequency. Methods: In the UK Biobank wearable accelerometer cohort, we retrospectively identified patients who had previously suffered a stroke (N=1603) and conducted cross-sectional analyses with those who completed a subsequent depression survey follow-up. Sensitivity analyses assessed a general population cohort excluding previous stroke participants and 2 incident cohorts: incident stroke (IS) and incident cerebrovascular disease (IC). Results: In controls, the odds of being in a higher depressed mood frequency category decreased by 23\% for each minute spent in moderate?to?vigorous physical activity (odds ratio 0.77, 95\% CI 0.69?0.87; P<.001). This association persisted in both general cohorts and in the IC control cohort. Conclusions: Although moderate?to?vigorous physical activity was linked with less frequent depressed mood in patients with prior stroke without DD, this finding did not persist in DDs. Thus, accelerometer-mood monitoring may provide clinically useful insights about future mood in patients with prior stroke without DDs. Considering the finding in the IC cohort and the lack of findings in the IS cohorts, accelerometer-mood monitoring may also be appropriately applied to observing broader cerebrovascular disease pathogenesis. ", doi="10.2196/56679", url="https://neuro.jmir.org/2025/1/e56679" } @Article{info:doi/10.2196/57624, author="Epperson, Neill C. and Davis, Rachel and Dempsey, Allison and Haller, C. Heinrich and Kupfer, J. David and Love, Tiffany and Villarreal, M. Pamela and Matthews, Mark and Moore, L. Susan and Muller, Kimberly and Schneck, D. Christopher and Scott, L. Jessica and Zane, D. Richard and Frank, Ellen", title="The Trifecta of Industry, Academic, and Health System Partnership to Improve Mental Health Care Through Smartphone-Based Remote Patient Monitoring: Development and Usability Study", journal="JMIR Form Res", year="2025", month="Jan", day="7", volume="9", pages="e57624", keywords="digital health", keywords="mobile intervention", keywords="telepsychiatry", keywords="artificial intelligence", keywords="psychiatry", keywords="mental health", keywords="depression", keywords="mood", keywords="bipolar", keywords="monitor", keywords="diagnostic tool", keywords="diagnosis", keywords="electronic health record", keywords="EHR", keywords="alert", keywords="notification", keywords="prediction", keywords="mHealth", keywords="mobile health", keywords="smartphone", keywords="passive", keywords="self-reported", keywords="patient generated", abstract="Background: Mental health treatment is hindered by the limited number of mental health care providers and the infrequency of care. Digital mental health technology can help supplement treatment by remotely monitoring patient symptoms and predicting mental health crises in between clinical visits. However, the feasibility of digital mental health technologies has not yet been sufficiently explored. Rhythms, from the company Health Rhythms, is a smartphone platform that uses passively acquired smartphone data with artificial intelligence and predictive analytics to alert patients and providers to an emerging mental health crisis. Objective: The objective of this study was to test the feasibility and acceptability of Rhythms among patients attending an academic psychiatric outpatient clinic. Methods: Our group embedded Rhythms into the electronic health record of a large health system. Patients with a diagnosis of major depressive disorder, bipolar disorder, or other mood disorder were contacted online and enrolled for a 6-week trial of Rhythms. Participants provided data by completing electronic surveys as well as by active and passive use of Rhythms. Emergent and urgent alerts were monitored and managed according to passively collected data and patient self-ratings. A purposively sampled group of participants also participated in qualitative interviews about their experience with Rhythms at the end of the study. Results: Of the 104 participants, 89 (85.6\%) completed 6 weeks of monitoring. The majority of the participants were women (72/104, 69.2\%), White (84/104, 80.8\%), and non-Hispanic (100/104, 96.2\%) and had a diagnosis of major depressive disorder (71/104, 68.3\%). Two emergent alerts and 19 urgent alerts were received and managed according to protocol over 16 weeks. More than two-thirds (63/87, 72\%) of those participating continued to use Rhythms after study completion. Comments from participants indicated appreciation for greater self-awareness and provider connection, while providers reported that Rhythms provided a more nuanced understanding of patient experience between clinical visits. Conclusions: Rhythms is a user-friendly, electronic health record--adaptable, smartphone-based tool that provides patients and providers with a greater understanding of patient mental health status. Integration of Rhythms into health systems has the potential to facilitate mental health care and improve the experience of both patients and providers. ", doi="10.2196/57624", url="https://formative.jmir.org/2025/1/e57624" } @Article{info:doi/10.2196/63034, author="Tartaglia, Julia and Jaghab, Brendan and Ismail, Mohamed and H{\"a}nsel, Katrin and Meter, Van Anna and Kirschenbaum, Michael and Sobolev, Michael and Kane, M. John and Tang, X. Sunny", title="Assessing Health Technology Literacy and Attitudes of Patients in an Urban Outpatient Psychiatry Clinic: Cross-Sectional Survey Study", journal="JMIR Ment Health", year="2024", month="Dec", day="30", volume="11", pages="e63034", keywords="digital literacy", keywords="attitudes", keywords="mental health", keywords="digital health technology", keywords="cluster analysis", keywords="psychiatry", keywords="mobile phone", abstract="Background: Digital health technologies are increasingly being integrated into mental health care. However, the adoption of these technologies can be influenced by patients' digital literacy and attitudes, which may vary based on sociodemographic factors. This variability necessitates a better understanding of patient digital literacy and attitudes to prevent a digital divide, which can worsen existing health care disparities. Objective: This study aimed to assess digital literacy and attitudes toward digital health technologies among a diverse psychiatric outpatient population. In addition, the study sought to identify clusters of patients based on their digital literacy and attitudes, and to compare sociodemographic characteristics among these clusters. Methods: A survey was distributed to adult psychiatric patients with various diagnoses in an urban outpatient psychiatry program. The survey included a demographic questionnaire, a digital literacy questionnaire, and a digital health attitudes questionnaire. Multiple linear regression analyses were used to identify predictors of digital literacy and attitudes. Cluster analysis was performed to categorize patients based on their responses. Pairwise comparisons and one-way ANOVA were conducted to analyze differences between clusters. Results: A total of 256 patients were included in the analysis. The mean age of participants was 32 (SD 12.6, range 16-70) years. The sample was racially and ethnically diverse: White (100/256, 38.9\%), Black (39/256, 15.2\%), Latinx (44/256, 17.2\%), Asian (59/256, 23\%), and other races and ethnicities (15/256, 5.7\%). Digital literacy was high for technologies such as smartphones, videoconferencing, and social media (items with >75\%, 193/256 of participants reporting at least some use) but lower for health apps, mental health apps, wearables, and virtual reality (items with <42\%, 108/256 reporting at least some use). Attitudes toward using technology in clinical care were generally positive (9 out of 10 items received >75\% positive score), particularly for communication with providers and health data sharing. Older age (P<.001) and lower educational attainment (P<.001) negatively predicted digital literacy scores, but no demographic variables predicted attitude scores. Cluster analysis identified 3 patient groups. Relative to the other clusters, cluster 1 (n=30) had lower digital literacy and intermediate acceptance of digital technology. Cluster 2 (n=50) had higher literacy and lower acceptance. Cluster 3 (n=176) displayed both higher literacy and acceptance. Significant between-cluster differences were observed in mean age and education level between clusters (P<.001), with cluster 1 participants being older and having lower levels of formal education. Conclusions: High digital literacy and acceptance of digital technologies were observed among our patients, indicating a generally positive outlook for digital health clinics. Our results also found that patients of older age and lower formal levels of educational attainment had lower digital literacy, highlighting the need for targeted interventions to support those who may struggle with adopting digital health tools. ", doi="10.2196/63034", url="https://mental.jmir.org/2024/1/e63034" } @Article{info:doi/10.2196/60003, author="Benouis, Mohamed and Andre, Elisabeth and Can, Said Yekta", title="Balancing Between Privacy and Utility for Affect Recognition Using Multitask Learning in Differential Privacy--Added Federated Learning Settings: Quantitative Study", journal="JMIR Ment Health", year="2024", month="Dec", day="23", volume="11", pages="e60003", keywords="privacy preservation", keywords="multitask learning", keywords="federated learning", keywords="privacy", keywords="physiological signals", keywords="affective computing", keywords="wearable sensors", keywords="sensitive data", keywords="empathetic sensors", keywords="data privacy", keywords="digital mental health", keywords="wearables", keywords="ethics", keywords="emotional well-being", abstract="Background: The rise of wearable sensors marks a significant development in the era of affective computing. Their popularity is continuously increasing, and they have the potential to improve our understanding of human stress. A fundamental aspect within this domain is the ability to recognize perceived stress through these unobtrusive devices. Objective: This study aims to enhance the performance of emotion recognition using multitask learning (MTL), a technique extensively explored across various machine learning tasks, including affective computing. By leveraging the shared information among related tasks, we seek to augment the accuracy of emotion recognition while confronting the privacy threats inherent in the physiological data captured by these sensors. Methods: To address the privacy concerns associated with the sensitive data collected by wearable sensors, we proposed a novel framework that integrates differential privacy and federated learning approaches with MTL. This framework was designed to efficiently identify mental stress while preserving private identity information. Through this approach, we aimed to enhance the performance of emotion recognition tasks while preserving user privacy. Results: Comprehensive evaluations of our framework were conducted using 2 prominent public datasets. The results demonstrate a significant improvement in emotion recognition accuracy, achieving a rate of 90\%. Furthermore, our approach effectively mitigates privacy risks, as evidenced by limiting reidentification accuracies to 47\%. Conclusions: This study presents a promising approach to advancing emotion recognition capabilities while addressing privacy concerns in the context of empathetic sensors. By integrating MTL with differential privacy and federated learning, we have demonstrated the potential to achieve high levels of accuracy in emotion recognition while ensuring the protection of user privacy. This research contributes to the ongoing efforts to use affective computing in a privacy-aware and ethical manner. ", doi="10.2196/60003", url="https://mental.jmir.org/2024/1/e60003" } @Article{info:doi/10.2196/65506, author="Thimmapuram, Jayaram and Patel, D. Kamlesh and Bhatt, Deepti and Chauhan, Ajay and Madhusudhan, Divya and Bhatt, K. Kashyap and Deshpande, Snehal and Budhbhatti, Urvi and Joshi, Chaitanya", title="Effect of a Web-Based Heartfulness Program on the Mental Well-Being, Biomarkers, and Gene Expression Profile of Health Care Students: Randomized Controlled Trial", journal="JMIR Bioinform Biotech", year="2024", month="Dec", day="16", volume="5", pages="e65506", keywords="heartfulness", keywords="meditation", keywords="stress", keywords="anxiety", keywords="depression", keywords="interleukins", keywords="gene expression", keywords="dehydroepiandrosterone", keywords="DHEA", keywords="gene", keywords="mental health", keywords="randomized study", keywords="web-based program", keywords="mental well-being", keywords="well-being", keywords="mental", keywords="health care students", keywords="student", keywords="mRNA", keywords="messenger ribonucleic acid", keywords="youth", keywords="young adults", keywords="web-based", keywords="biomarker", keywords="RNA", keywords="bioinformatics", keywords="randomized", keywords="statistical analysis", keywords="nursing", keywords="physiotherapy", keywords="pharmacy", abstract="Background: Health care students often experience high levels of stress, anxiety, and mental health issues, making it crucial to address these challenges. Variations in stress levels may be associated with changes in dehydroepiandrosterone sulfate (DHEA-S) and interleukin-6 (IL-6) levels and gene expression. Meditative practices have demonstrated effectiveness in reducing stress and improving mental well-being. Objective: This study aims to assess the effects of Heartfulness meditation on mental well-being, DHEA-S, IL-6, and gene expression profile. Methods: The 78 enrolled participants were randomly assigned to the Heartfulness meditation (n=42, 54\%) and control (n=36, 46\%) groups. The participants completed the Perceived Stress Scale (PSS) and Depression Anxiety Stress Scale (DASS-21) at baseline and after week 12. Gene expression with messenger RNA sequencing and DHEA-S and IL-6 levels were also measured at baseline and the completion of the 12 weeks. Statistical analysis included descriptive statistics, paired t test, and 1-way ANOVA with Bonferroni correction. Results: The Heartfulness group exhibited a significant 17.35\% reduction in PSS score (from mean 19.71, SD 5.09 to mean 16.29, SD 4.83; P<.001) compared to a nonsignificant 6\% reduction in the control group (P=.31). DASS-21 scores decreased significantly by 27.14\% in the Heartfulness group (from mean 21.15, SD 9.56 to mean 15.41, SD 7.87; P<.001) while it increased nonsignificantly by 17\% in the control group (P=.04). For the DASS-21 subcomponents---the Heartfulness group showed a statistically significant 28.53\% reduction in anxiety (P=.006) and 27.38\% reduction in stress (P=.002) versus an insignificant 22\% increase in anxiety (P=.02) and 6\% increase in stress (P=.47) in the control group. Further, DHEA-S levels showed a significant 20.27\% increase in the Heartfulness group (from mean 251.71, SD 80.98 to mean 302.74, SD 123.56; P=.002) compared to an insignificant 9\% increase in the control group (from mean 285.33, SD 112.14 to mean 309.90, SD 136.90; P=.10). IL-6 levels showed a statistically significant difference in both the groups (from mean 4.93, SD 1.35 to mean 3.67, SD 1.0; 28.6\%; P<.001 [Heartfulness group] and from mean 4.52, SD 1.40 to mean 2.72, SD 1.74; 40\%; P<.001 [control group]). Notably, group comparison at 12 weeks revealed a significant difference in perceived stress, DASS-21 and its subcomponents, and IL-6 (all P<.05/4). The gene expression profile with messenger RNA sequencing identified 875 upregulated genes and 1539 downregulated genes in the Heartfulness group compared to baseline, and there were 292 upregulated genes and 1180 downregulated genes in the Heartfulness group compared to the control group after the intervention. Conclusions: Heartfulness practice was associated with decreased depression, anxiety, and stress scores and improved health measures in DHEA-S and IL-6 levels. The gene expression data point toward possible mechanisms of alleviation of symptoms of stress, anxiety and depression. Trial Registration: ISRCTN Registry ISRCTN82860715; https://doi.org/10.1186/ISRCTN82860715 ", doi="10.2196/65506", url="https://bioinform.jmir.org/2024/1/e65506", url="http://www.ncbi.nlm.nih.gov/pubmed/39680432" } @Article{info:doi/10.2196/55856, author="Georgescu, Livia Alexandra and Cummins, Nicholas and Molimpakis, Emilia and Giacomazzi, Eduardo and Rodrigues Marczyk, Joana and Goria, Stefano", title="Screening for Depression and Anxiety Using a Nonverbal Working Memory Task in a Sample of Older Brazilians: Observational Study of Preliminary Artificial Intelligence Model Transferability", journal="JMIR Form Res", year="2024", month="Dec", day="12", volume="8", pages="e55856", keywords="depression", keywords="anxiety", keywords="Brazil", keywords="machine learning", keywords="n-back", keywords="working memory", keywords="artificial intelligence", keywords="gerontology", keywords="older adults", keywords="mental health", keywords="AI", keywords="transferability", keywords="detection", keywords="screening", keywords="questionnaire", keywords="longitudinal study", abstract="Background: Anxiety and depression represent prevalent yet frequently undetected mental health concerns within the older population. The challenge of identifying these conditions presents an opportunity for artificial intelligence (AI)--driven, remotely available, tools capable of screening and monitoring mental health. A critical criterion for such tools is their cultural adaptability to ensure effectiveness across diverse populations. Objective: This study aims to illustrate the preliminary transferability of two established AI models designed to detect high depression and anxiety symptom scores. The models were initially trained on data from a nonverbal working memory game (1- and 2-back tasks) in a dataset by thymia, a company that develops AI solutions for mental health and well-being assessments, encompassing over 6000 participants from the United Kingdom, United States, Mexico, Spain, and Indonesia. We seek to validate the models' performance by applying it to a new dataset comprising older Brazilian adults, thereby exploring its transferability and generalizability across different demographics and cultures. Methods: A total of 69 Brazilian participants aged 51-92 years old were recruited with the help of La{\c{c}}os Sa{\'u}de, a company specializing in nurse-led, holistic home care. Participants received a link to the thymia dashboard every Monday and Thursday for 6 months. The dashboard had a set of activities assigned to them that would take 10-15 minutes to complete, which included a 5-minute game with two levels of the n-back tasks. Two Random Forest models trained on thymia data to classify depression and anxiety based on thresholds defined by scores of the Patient Health Questionnaire (8 items) (PHQ-8) ?10 and those of the Generalized Anxiety Disorder Assessment (7 items) (GAD-7) ?10, respectively, were subsequently tested on the La{\c{c}}os Sa{\'u}de patient cohort. Results: The depression classification model exhibited robust performance, achieving an area under the receiver operating characteristic curve (AUC) of 0.78, a specificity of 0.69, and a sensitivity of 0.72. The anxiety classification model showed an initial AUC of 0.63, with a specificity of 0.58 and a sensitivity of 0.64. This performance surpassed a benchmark model using only age and gender, which had AUCs of 0.47 for PHQ-8 and 0.53 for GAD-7. After recomputing the AUC scores on a cross-sectional subset of the data (the first n-back game session), we found AUCs of 0.79 for PHQ-8 and 0.76 for GAD-7. Conclusions: This study successfully demonstrates the preliminary transferability of two AI models trained on a nonverbal working memory task, one for depression and the other for anxiety classification, to a novel sample of older Brazilian adults. Future research could seek to replicate these findings in larger samples and other cultural contexts. Trial Registration: ISRCTN Registry ISRCTN90727704; https://www.isrctn.com/ISRCTN90727704 ", doi="10.2196/55856", url="https://formative.jmir.org/2024/1/e55856" } @Article{info:doi/10.2196/56874, author="Ik{\"a}heimonen, Arsi and Luong, Nguyen and Baryshnikov, Ilya and Darst, Richard and Heikkil{\"a}, Roope and Holmen, Joel and Martikkala, Annasofia and Riihim{\"a}ki, Kirsi and Saleva, Outi and Isomets{\"a}, Erkki and Aledavood, Talayeh", title="Predicting and Monitoring Symptoms in Patients Diagnosed With Depression Using Smartphone Data: Observational Study", journal="J Med Internet Res", year="2024", month="Dec", day="3", volume="26", pages="e56874", keywords="data analysis", keywords="digital phenotyping", keywords="digital behavioral data", keywords="depression symptoms", keywords="depression monitoring", keywords="mHealth", keywords="mobile health", keywords="smartphone", keywords="mobile phone", abstract="Background: Clinical diagnostic assessments and the outcome monitoring of patients with depression rely predominantly on interviews by professionals and the use of self-report questionnaires. The ubiquity of smartphones and other personal consumer devices has prompted research into the potential of data collected via these devices to serve as digital behavioral markers for indicating the presence and monitoring of the outcome of depression. Objective: This paper explores the potential of using behavioral data collected with smartphones to detect and monitor depression symptoms in patients diagnosed with depression. Specifically, it investigates whether this data can accurately classify the presence of depression, as well as monitor the changes in depressive states over time. Methods: In a prospective cohort study, we collected smartphone behavioral data for up to 1 year. The study consists of observations from 164 participants, including healthy controls (n=31) and patients diagnosed with various depressive disorders: major depressive disorder (MDD; n=85), MDD with comorbid borderline personality disorder (n=27), and major depressive episodes with bipolar disorder (n=21). Data were labeled based on depression severity using 9-item Patient Health Questionnaire (PHQ-9) scores. We performed statistical analysis and used supervised machine learning on the data to classify the severity of depression and observe changes in the depression state over time. Results: Our correlation analysis revealed 32 behavioral markers associated with the changes in depressive state. Our analysis classified patients who are depressed with an accuracy of 82\% (95\% CI 80\%-84\%) and change in the presence of depression with an accuracy of 75\% (95\% CI 72\%-76\%). Notably, the most important smartphone features for classifying depression states were screen-off events, battery charge levels, communication patterns, app usage, and location data. Similarly, for predicting changes in depression state, the most important features were related to location, battery level, screen, and accelerometer data patterns. Conclusions: The use of smartphone digital behavioral markers to supplement clinical evaluations may aid in detecting the presence and changes in severity of symptoms of depression, particularly if combined with intermittent use of self-report of symptoms. ", doi="10.2196/56874", url="https://www.jmir.org/2024/1/e56874" } @Article{info:doi/10.2196/54966, author="de Boer, Kathleen and Mackelprang, L. Jessica and Nedeljkovic, Maja and Meyer, Denny and Iyer, Ravi", title="Using Artificial Intelligence to Detect Risk of Family Violence: Protocol for a Systematic Review and Meta-Analysis", journal="JMIR Res Protoc", year="2024", month="Dec", day="2", volume="13", pages="e54966", keywords="family violence", keywords="artificial intelligence", keywords="natural language processing", keywords="voice signal characteristics", keywords="public health", keywords="behaviors", keywords="research literature", keywords="policy", keywords="prevalence", keywords="detection", keywords="social policy", keywords="prevention", keywords="machine learning", keywords="mental health", keywords="suicide risk", keywords="psychological distress", abstract="Background: Despite the implementation of prevention strategies, family violence continues to be a prevalent issue worldwide. Current strategies to reduce family violence have demonstrated mixed success and innovative approaches are needed urgently to prevent the occurrence of family violence. Incorporating artificial intelligence (AI) into prevention strategies is gaining research attention, particularly the use of textual or voice signal data to detect individuals at risk of perpetrating family violence. However, no review to date has collated extant research regarding how accurate AI is at identifying individuals who are at risk of perpetrating family violence. Objective: The primary aim of this systematic review and meta-analysis is to assess the accuracy of AI models in differentiating between individuals at risk of engaging in family violence versus those who are not using textual or voice signal data. Methods: The following databases will be searched from conception to the search date: IEEE Xplore, PubMed, PsycINFO, EBSCOhost (Psychology and Behavioral Sciences collection), and Computers and Applied Sciences Complete. ProQuest Dissertations and Theses A\&I will also be used to search the grey literature. Studies will be included if they report on human adults and use machine learning to differentiate between low and high risk of family violence perpetration. Studies may use voice signal data or linguistic (textual) data and must report levels of accuracy in determining risk. In the data screening and full-text review and quality analysis phases, 2 researchers will review the search results and discrepancies and decisions will be resolved through masked review of a third researcher. Results will be reported in a narrative synthesis. In addition, a random effects meta-analysis will be conducted using the area under the receiver operating curve reported in the included studies, assuming sufficient eligible studies are identified. Methodological quality of included studies will be assessed using the risk of bias tool in nonrandomized studies of interventions. Results: As of October 2024, the search has not commenced. The review will document the state of the research concerning the accuracy of AI models in detecting the risk of family violence perpetration using textual or voice signal data. Results will be presented in the form of a narrative synthesis. Results of the meta-analysis will be summarized in tabular form and using a forest plot. Conclusions: The findings from this study will clarify the state of the literature on the accuracy of machine learning models to identify individuals who are at high risk of perpetuating family violence. Findings may be used to inform the development of AI and machine learning models that can be used to support possible prevention strategies. Trial Registration: PROSPERO CRD42023481174; https://www.crd.york.ac.uk/prospero/display\_record.php?RecordID=481174 International Registered Report Identifier (IRRID): PRR1-10.2196/54966 ", doi="10.2196/54966", url="https://www.researchprotocols.org/2024/1/e54966", url="http://www.ncbi.nlm.nih.gov/pubmed/39621402" } @Article{info:doi/10.2196/58927, author="Liu, Zhongling and Li, Jinkai and Zhang, Yuanyuan and Wu, Dan and Huo, Yanyan and Yang, Jianxin and Zhang, Musen and Dong, Chuanfei and Jiang, Luhui and Sun, Ruohan and Zhou, Ruoyin and Li, Fei and Yu, Xiaodan and Zhu, Daqian and Guo, Yao and Chen, Jinjin", title="Auxiliary Diagnosis of Children With Attention-Deficit/Hyperactivity Disorder Using Eye-Tracking and Digital Biomarkers: Case-Control Study", journal="JMIR Mhealth Uhealth", year="2024", month="Nov", day="29", volume="12", pages="e58927", keywords="attention deficit disorder with hyperactivity", keywords="eye-tracking", keywords="auxiliary diagnosis", keywords="digital biomarker", keywords="antisaccade", keywords="machine learning", abstract="Background: Attention-deficit/hyperactivity disorder (ADHD) is a common neurodevelopmental disorder in school-aged children. The lack of objective biomarkers for ADHD often results in missed diagnoses or misdiagnoses, which lead to inappropriate or delayed interventions. Eye-tracking technology provides an objective method to assess children's neuropsychological behavior. Objective: The aim of this study was to develop an objective and reliable auxiliary diagnostic system for ADHD using eye-tracking technology. This system would be valuable for screening for ADHD in schools and communities and may help identify objective biomarkers for the clinical diagnosis of ADHD. Methods: We conducted a case-control study of children with ADHD and typically developing (TD) children. We designed an eye-tracking assessment paradigm based on the core cognitive deficits of ADHD and extracted various digital biomarkers that represented participant behaviors. These biomarkers and developmental patterns were compared between the ADHD and TD groups. Machine learning (ML) was implemented to validate the ability of the extracted eye-tracking biomarkers to predict ADHD. The performance of the ML models was evaluated using 5-fold cross-validation. Results: We recruited 216 participants, of whom 94 (43.5\%) were children with ADHD and 122 (56.5\%) were TD children. The ADHD group showed significantly poorer performance (for accuracy and completion time) than the TD group in the prosaccade, antisaccade, and delayed saccade tasks. In addition, there were substantial group differences in digital biomarkers, such as pupil diameter fluctuation, regularity of gaze trajectory, and fixations on unrelated areas. Although the accuracy and task completion speed of the ADHD group increased over time, their eye-movement patterns remained irregular. The TD group with children aged 5 to 6 years outperformed the ADHD group with children aged 9 to 10 years, and this difference remained relatively stable over time, which indicated that the ADHD group followed a unique developmental pattern. The ML model was effective in discriminating the groups, achieving an area under the curve of 0.965 and an accuracy of 0.908. Conclusions: The eye-tracking biomarkers proposed in this study effectively identified differences in various aspects of eye-movement patterns between the ADHD and TD groups. In addition, the ML model constructed using these digital biomarkers achieved high accuracy and reliability in identifying ADHD. Our system can facilitate early screening for ADHD in schools and communities and provide clinicians with objective biomarkers as a reference. ", doi="10.2196/58927", url="https://mhealth.jmir.org/2024/1/e58927", url="http://www.ncbi.nlm.nih.gov/pubmed/39477792" } @Article{info:doi/10.2196/54176, author="Bonnin, Gabriel and Kr{\"o}ber, Svea and Schneider, Silvia and Margraf, J{\"u}rgen and Pflug, Verena and Gerlach, L. Alexander and Slotta, Timo and Christiansen, Hanna and Albrecht, Bj{\"o}rn and Chavanon, Mira-Lynn and Hirschfeld, Gerrit and In-Albon, Tina and Thielsch, T. Meinald and von Brachel, Ruth", title="A Blended Learning Course on the Diagnostics of Mental Disorders: Multicenter Cluster Randomized Noninferiority Trial", journal="J Med Internet Res", year="2024", month="Nov", day="27", volume="26", pages="e54176", keywords="diagnosis", keywords="structured clinical interviews", keywords="blended learning", keywords="dissemination", keywords="therapist training", keywords="clinical interview", keywords="clinical diagnosis", keywords="clinical practice", keywords="psychology students", keywords="diagnostic test", keywords="health personnel", keywords="mental health services", keywords="mental health", abstract="Background: Clinical diagnoses determine if and how therapists treat their patients. As misdiagnoses can have severe adverse effects, disseminating evidence-based diagnostic skills into clinical practice is highly important. Objective: This study aimed to develop and evaluate a blended learning course in a multicenter cluster randomized controlled trial. Methods: Undergraduate psychology students (N=350) enrolled in 18 university courses at 3 universities. The courses were randomly assigned to blended learning or traditional synchronous teaching. The primary outcome was the participants' performances in a clinical diagnostic interview after the courses. The secondary outcomes were diagnostic knowledge and participants' reactions to the courses. All outcomes were analyzed on the individual participant level using noninferiority testing. Results: Compared with the synchronous course (74.6\% pass rate), participation in the blended learning course (89\% pass rate) increased the likelihood of successfully passing the behavioral test (odds ratio 2.77, 95\% CI 1.55-5.13), indicating not only noninferiority but superiority of the blended learning course. Furthermore, superiority of the blended learning over the synchronous course could be found regarding diagnostic knowledge ($\beta$=.13, 95\% CI 0.01-0.26), course clarity ($\beta$=.40, 95\% CI 0.27-0.53), course structure ($\beta$=.18, 95\% CI 0.04-0.32), and informativeness ($\beta$=.19, 95\% CI 0.06-0.32). Conclusions: Blended learning can help to improve the diagnostic skills and knowledge of (future) clinicians and thus make an important contribution to improving mental health care. Trial Registration: ClinicalTrials.gov NCT05294094; https://clinicaltrials.gov/study/NCT05294094 ", doi="10.2196/54176", url="https://www.jmir.org/2024/1/e54176" } @Article{info:doi/10.2196/60453, author="Ding, Huitong and Gifford, Katherine and Shih, C. Ludy and Ho, Kristi and Rahman, Salman and Igwe, Akwaugo and Low, Spencer and Popp, Zachary and Searls, Edward and Li, Zexu and Madan, Sanskruti and Burk, Alexa and Hwang, H. Phillip and Anda-Duran, De Ileana and Kolachalama, B. Vijaya and Au, Rhoda and Lin, Honghuang", title="Exploring the Perspectives of Older Adults on a Digital Brain Health Platform Using Natural Language Processing: Cohort Study", journal="JMIR Form Res", year="2024", month="Nov", day="18", volume="8", pages="e60453", keywords="digital brain health", keywords="older adults", keywords="perspectives", keywords="semistructured interviews", keywords="natural language processing", keywords="mobile phone", abstract="Background: Although digital technology represents a growing field aiming to revolutionize early Alzheimer disease risk prediction and monitoring, the perspectives of older adults on an integrated digital brain health platform have not been investigated. Objective: This study aims to understand the perspectives of older adults on a digital brain health platform by conducting semistructured interviews and analyzing their transcriptions by natural language processing. Methods: The study included 28 participants from the Boston University Alzheimer's Disease Research Center, all of whom engaged with a digital brain health platform over an initial assessment period of 14 days. Semistructured interviews were conducted to collect data on participants' experiences with the digital brain health platform. The transcripts generated from these interviews were analyzed using natural language processing techniques. The frequency of positive and negative terms was evaluated through word count analysis. A sentiment analysis was used to measure the emotional tone and subjective perceptions of the participants toward the digital platform. Results: Word count analysis revealed a generally positive sentiment toward the digital platform, with ``like,'' ``well,'' and ``good'' being the most frequently mentioned positive terms. However, terms such as ``problem'' and ``hard'' indicated certain challenges faced by participants. Sentiment analysis showed a slightly positive attitude with a median polarity score of 0.13 (IQR 0.08-0.15), ranging from --1 (completely negative) to 1 (completely positive), and a median subjectivity score of 0.51 (IQR 0.47-0.53), ranging from 0 (completely objective) to 1 (completely subjective). These results suggested an overall positive attitude among the study cohort. Conclusions: The study highlights the importance of understanding older adults' attitudes toward digital health platforms amid the comprehensive evolution of the digitalization era. Future research should focus on refining digital solutions to meet the specific needs of older adults, fostering a more personalized approach to brain health. ", doi="10.2196/60453", url="https://formative.jmir.org/2024/1/e60453" } @Article{info:doi/10.2196/59225, author="Owen, David and Lynham, J. Amy and Smart, E. Sophie and Pardi{\~n}as, F. Antonio and Camacho Collados, Jose", title="AI for Analyzing Mental Health Disorders Among Social Media Users: Quarter-Century Narrative Review of Progress and Challenges", journal="J Med Internet Res", year="2024", month="Nov", day="15", volume="26", pages="e59225", keywords="mental health", keywords="depression", keywords="anxiety", keywords="schizophrenia", keywords="social media", keywords="natural language processing", keywords="narrative review", abstract="Background: Mental health disorders are currently the main contributor to poor quality of life and years lived with disability. Symptoms common to many mental health disorders lead to impairments or changes in the use of language, which are observable in the routine use of social media. Detection of these linguistic cues has been explored throughout the last quarter century, but interest and methodological development have burgeoned following the COVID-19 pandemic. The next decade may see the development of reliable methods for predicting mental health status using social media data. This might have implications for clinical practice and public health policy, particularly in the context of early intervention in mental health care. Objective: This study aims to examine the state of the art in methods for predicting mental health statuses of social media users. Our focus is the development of artificial intelligence--driven methods, particularly natural language processing, for analyzing large volumes of written text. This study details constraints affecting research in this area. These include the dearth of high-quality public datasets for methodological benchmarking and the need to adopt ethical and privacy frameworks acknowledging the stigma experienced by those with a mental illness. Methods: A Google Scholar search yielded peer-reviewed articles dated between 1999 and 2024. We manually grouped the articles by 4 primary areas of interest: datasets on social media and mental health, methods for predicting mental health status, longitudinal analyses of mental health, and ethical aspects of the data and analysis of mental health. Selected articles from these groups formed our narrative review. Results: Larger datasets with precise dates of participants' diagnoses are needed to support the development of methods for predicting mental health status, particularly in severe disorders such as schizophrenia. Inviting users to donate their social media data for research purposes could help overcome widespread ethical and privacy concerns. In any event, multimodal methods for predicting mental health status appear likely to provide advancements that may not be achievable using natural language processing alone. Conclusions: Multimodal methods for predicting mental health status from voice, image, and video-based social media data need to be further developed before they may be considered for adoption in health care, medical support, or as consumer-facing products. Such methods are likely to garner greater public confidence in their efficacy than those that rely on text alone. To achieve this, more high-quality social media datasets need to be made available and privacy concerns regarding the use of these data must be formally addressed. A social media platform feature that invites users to share their data upon publication is a possible solution. Finally, a review of literature studying the effects of social media use on a user's depression and anxiety is merited. ", doi="10.2196/59225", url="https://www.jmir.org/2024/1/e59225" } @Article{info:doi/10.2196/62752, author="Hudon, Alexandre and Beaudoin, M{\'e}lissa and Phraxayavong, Kingsada and Potvin, St{\'e}phane and Dumais, Alexandre", title="Exploring the Intersection of Schizophrenia, Machine Learning, and Genomics: Scoping Review", journal="JMIR Bioinform Biotech", year="2024", month="Nov", day="15", volume="5", pages="e62752", keywords="schizophrenia", keywords="genomic data", keywords="machine learning", keywords="artificial intelligence", keywords="classification techniques", keywords="psychiatry", keywords="mental health", keywords="genomics", keywords="predictions", keywords="ML", keywords="psychiatric", keywords="synthesis", keywords="review methods", keywords="searches", keywords="scoping review", keywords="prediction models", abstract="Background: An increasing body of literature highlights the integration of machine learning with genomic data in psychiatry, particularly for complex mental health disorders such as schizophrenia. These advanced techniques offer promising potential for uncovering various facets of these disorders. A comprehensive review of the current applications of machine learning in conjunction with genomic data within this context can significantly enhance our understanding of the current state of research and its future directions. Objective: This study aims to conduct a systematic scoping review of the use of machine learning algorithms with genomic data in the field of schizophrenia. Methods: To conduct a systematic scoping review, a search was performed in the electronic databases MEDLINE, Web of Science, PsycNet (PsycINFO), and Google Scholar from 2013 to 2024. Studies at the intersection of schizophrenia, genomic data, and machine learning were evaluated. Results: The literature search identified 2437 eligible articles after removing duplicates. Following abstract screening, 143 full-text articles were assessed, and 121 were subsequently excluded. Therefore, 21 studies were thoroughly assessed. Various machine learning algorithms were used in the identified studies, with support vector machines being the most common. The studies notably used genomic data to predict schizophrenia, identify schizophrenia features, discover drugs, classify schizophrenia amongst other mental health disorders, and predict the quality of life of patients. Conclusions: Several high-quality studies were identified. Yet, the application of machine learning with genomic data in the context of schizophrenia remains limited. Future research is essential to further evaluate the portability of these models and to explore their potential clinical applications. ", doi="10.2196/62752", url="https://bioinform.jmir.org/2024/1/e62752", url="http://www.ncbi.nlm.nih.gov/pubmed/39546776" } @Article{info:doi/10.2196/65994, author="Hong, Minseok and Kang, Ri-Ra and Yang, Hun Jeong and Rhee, Jin Sang and Lee, Hyunju and Kim, Yong-gyom and Lee, KangYoon and Kim, HongGi and Lee, Sang Yu and Youn, Tak and Kim, Hyun Se and Ahn, Min Yong", title="Comprehensive Symptom Prediction in Inpatients With Acute Psychiatric Disorders Using Wearable-Based Deep Learning Models: Development and Validation Study", journal="J Med Internet Res", year="2024", month="Nov", day="13", volume="26", pages="e65994", keywords="digital phenotype", keywords="mental health monitoring", keywords="smart hospital", keywords="clinical decision support system", keywords="multitask learning", keywords="wearable sensor", keywords="local validation", keywords="mental health facility", keywords="deep learning", abstract="Background: Assessing the complex and multifaceted symptoms of patients with acute psychiatric disorders proves to be significantly challenging for clinicians. Moreover, the staff in acute psychiatric wards face high work intensity and risk of burnout, yet research on the introduction of digital technologies in this field remains limited. The combination of continuous and objective wearable sensor data acquired from patients with deep learning techniques holds the potential to overcome the limitations of traditional psychiatric assessments and support clinical decision-making. Objective: This study aimed to develop and validate wearable-based deep learning models to comprehensively predict patient symptoms across various acute psychiatric wards in South Korea. Methods: Participants diagnosed with schizophrenia and mood disorders were recruited from 4 wards across 3 hospitals and prospectively observed using wrist-worn wearable devices during their admission period. Trained raters conducted periodic clinical assessments using the Brief Psychiatric Rating Scale, Hamilton Anxiety Rating Scale, Montgomery-Asberg Depression Rating Scale, and Young Mania Rating Scale. Wearable devices collected patients' heart rate, accelerometer, and location data. Deep learning models were developed to predict psychiatric symptoms using 2 distinct approaches: single symptoms individually (Single) and multiple symptoms simultaneously via multitask learning (Multi). These models further addressed 2 problems: within-subject relative changes (Deterioration) and between-subject absolute severity (Score). Four configurations were consequently developed for each scale: Single-Deterioration, Single-Score, Multi-Deterioration, and Multi-Score. Data of participants recruited before May 1, 2024, underwent cross-validation, and the resulting fine-tuned models were then externally validated using data from the remaining participants. Results: Of the 244 enrolled participants, 191 (78.3\%; 3954 person-days) were included in the final analysis after applying the exclusion criteria. The demographic and clinical characteristics of participants, as well as the distribution of sensor data, showed considerable variations across wards and hospitals. Data of 139 participants were used for cross-validation, while data of 52 participants were used for external validation. The Single-Deterioration and Multi-Deterioration models achieved similar overall accuracy values of 0.75 in cross-validation and 0.73 in external validation. The Single-Score and Multi-Score models attained overall R{\texttwosuperior} values of 0.78 and 0.83 in cross-validation and 0.66 and 0.74 in external validation, respectively, with the Multi-Score model demonstrating superior performance. Conclusions: Deep learning models based on wearable sensor data effectively classified symptom deterioration and predicted symptom severity in participants in acute psychiatric wards. Despite lower computational costs, Multi models demonstrated equivalent or superior performance to Single models, suggesting that multitask learning is a promising approach for comprehensive symptom prediction. However, significant variations were observed across wards, which present a key challenge for developing clinical decision support systems in acute psychiatric wards. Future studies may benefit from recurring local validation or federated learning to address generalizability issues. ", doi="10.2196/65994", url="https://www.jmir.org/2024/1/e65994" } @Article{info:doi/10.2196/54335, author="Gao, Hongxin and Schneider, Stefan and Hernandez, Raymond and Harris, Jenny and Maupin, Danny and Junghaenel, U. Doerte and Kapteyn, Arie and Stone, Arthur and Zelinski, Elizabeth and Meijer, Erik and Lee, Pey-Jiuan and Orriens, Bart and Jin, Haomiao", title="Early Identification of Cognitive Impairment in Community Environments Through Modeling Subtle Inconsistencies in Questionnaire Responses: Machine Learning Model Development and Validation", journal="JMIR Form Res", year="2024", month="Nov", day="13", volume="8", pages="e54335", keywords="machine learning", keywords="artificial intelligence", keywords="cognitive impairments", keywords="surveys and questionnaires", keywords="community health services", keywords="public health", keywords="early identification", keywords="elder care", keywords="dementia", abstract="Background: The underdiagnosis of cognitive impairment hinders timely intervention of dementia. Health professionals working in the community play a critical role in the early detection of cognitive impairment, yet still face several challenges such as a lack of suitable tools, necessary training, and potential stigmatization. Objective: This study explored a novel application integrating psychometric methods with data science techniques to model subtle inconsistencies in questionnaire response data for early identification of cognitive impairment in community environments. Methods: This study analyzed questionnaire response data from participants aged 50 years and older in the Health and Retirement Study (waves 8-9, n=12,942). Predictors included low-quality response indices generated using the graded response model from four brief questionnaires (optimism, hopelessness, purpose in life, and life satisfaction) assessing aspects of overall well-being, a focus of health professionals in communities. The primary and supplemental predicted outcomes were current cognitive impairment derived from a validated criterion and dementia or mortality in the next ten years. Seven predictive models were trained, and the performance of these models was evaluated and compared. Results: The multilayer perceptron exhibited the best performance in predicting current cognitive impairment. In the selected four questionnaires, the area under curve values for identifying current cognitive impairment ranged from 0.63 to 0.66 and was improved to 0.71 to 0.74 when combining the low-quality response indices with age and gender for prediction. We set the threshold for assessing cognitive impairment risk in the tool based on the ratio of underdiagnosis costs to overdiagnosis costs, and a ratio of 4 was used as the default choice. Furthermore, the tool outperformed the efficiency of age or health-based screening strategies for identifying individuals at high risk for cognitive impairment, particularly in the 50- to 59-year and 60- to 69-year age groups. The tool is available on a portal website for the public to access freely. Conclusions: We developed a novel prediction tool that integrates psychometric methods with data science to facilitate ``passive or backend'' cognitive impairment assessments in community settings, aiming to promote early cognitive impairment detection. This tool simplifies the cognitive impairment assessment process, making it more adaptable and reducing burdens. Our approach also presents a new perspective for using questionnaire data: leveraging, rather than dismissing, low-quality data. ", doi="10.2196/54335", url="https://formative.jmir.org/2024/1/e54335" } @Article{info:doi/10.2196/58572, author="Riad, Rachid and Denais, Martin and de Gennes, Marc and Lesage, Adrien and Oustric, Vincent and Cao, Nga Xuan and Mouchabac, St{\'e}phane and Bourla, Alexis", title="Automated Speech Analysis for Risk Detection of Depression, Anxiety, Insomnia, and Fatigue: Algorithm Development and Validation Study", journal="J Med Internet Res", year="2024", month="Oct", day="31", volume="26", pages="e58572", keywords="speech analysis", keywords="voice detection", keywords="voice analysis", keywords="speech biomarkers", keywords="speech-based systems", keywords="computer-aided diagnosis", keywords="mental health symptom detection", keywords="machine learning", keywords="mental health", keywords="fatigue", keywords="anxiety", keywords="depression", abstract="Background: While speech analysis holds promise for mental health assessment, research often focuses on single symptoms, despite symptom co-occurrences and interactions. In addition, predictive models in mental health do not properly assess the limitations of speech-based systems, such as uncertainty, or fairness for a safe clinical deployment. Objective: We investigated the predictive potential of mobile-collected speech data for detecting and estimating depression, anxiety, fatigue, and insomnia, focusing on other factors than mere accuracy, in the general population. Methods: We included 865 healthy adults and recorded their answers regarding their perceived mental and sleep states. We asked how they felt and if they had slept well lately. Clinically validated questionnaires measuring depression, anxiety, insomnia, and fatigue severity were also used. We developed a novel speech and machine learning pipeline involving voice activity detection, feature extraction, and model training. We automatically modeled speech with pretrained deep learning models that were pretrained on a large, open, and free database, and we selected the best one on the validation set. Based on the best speech modeling approach, clinical threshold detection, individual score prediction, model uncertainty estimation, and performance fairness across demographics (age, sex, and education) were evaluated. We used a train-validation-test split for all evaluations: to develop our models, select the best ones, and assess the generalizability of held-out data. Results: The best model was Whisper M with a max pooling and oversampling method. Our methods achieved good detection performance for all symptoms, depression (Patient Health Questionnaire-9: area under the curve [AUC]=0.76; F1-score=0.49 and Beck Depression Inventory: AUC=0.78; F1-score=0.65), anxiety (Generalized Anxiety Disorder 7-item scale: AUC=0.77; F1-score=0.50), insomnia (Athens Insomnia Scale: AUC=0.73; F1-score=0.62), and fatigue (Multidimensional Fatigue Inventory total score: AUC=0.68; F1-score=0.88). The system performed well when it needed to abstain from making predictions, as demonstrated by low abstention rates in depression detection with the Beck Depression Inventory and fatigue, with risk-coverage AUCs below 0.4. Individual symptom scores were accurately predicted (correlations were all significant with Pearson strengths between 0.31 and 0.49). Fairness analysis revealed that models were consistent for sex (average disparity ratio [DR] 0.86, SD 0.13), to a lesser extent for education level (average DR 0.47, SD 0.30), and worse for age groups (average DR 0.33, SD 0.30). Conclusions: This study demonstrates the potential of speech-based systems for multifaceted mental health assessment in the general population, not only for detecting clinical thresholds but also for estimating their severity. Addressing fairness and incorporating uncertainty estimation with selective classification are key contributions that can enhance the clinical utility and responsible implementation of such systems. ", doi="10.2196/58572", url="https://www.jmir.org/2024/1/e58572" } @Article{info:doi/10.2196/51269, author="Liu, Qimin and Ning, Emma and Ross, K. Mindy and Cladek, Andrea and Kabir, Sarah and Barve, Amruta and Kennelly, Ellyn and Hussain, Faraz and Duffecy, Jennifer and Langenecker, A. Scott and Nguyen, M. Theresa and Tulabandhula, Theja and Zulueta, John and Demos, P. Alexander and Leow, Alex and Ajilore, Olusola", title="Digital Phenotypes of Mobile Keyboard Backspace Rates and Their Associations With Symptoms of Mood Disorder: Algorithm Development and Validation", journal="J Med Internet Res", year="2024", month="Oct", day="29", volume="26", pages="e51269", keywords="keyboard typing", keywords="passive sensing", keywords="digital phenotyping", keywords="mood disorder", keywords="mixture model", keywords="phenotypes", keywords="mobile keyboard", keywords="smartphone", keywords="keyboard data", keywords="monitoring", keywords="clinical decision-making", keywords="depression", keywords="mania, mobile phone", abstract="Background: Passive sensing through smartphone keyboard data can be used to identify and monitor symptoms of mood disorders with low participant burden. Behavioral phenotyping based on mobile keystroke data can aid in clinical decision-making and provide insights into the individual symptoms of mood disorders. Objective: This study aims to derive digital phenotypes based on smartphone keyboard backspace use among 128 community adults across 2948 observations using a Bayesian mixture model. Methods: Eligible study participants completed a virtual screening visit where all eligible participants were instructed to download the custom-built BiAffect smartphone keyboard (University of Illinois). The BiAffect keyboard unobtrusively captures keystroke dynamics. All eligible and consenting participants were instructed to use this keyboard exclusively for up to 4 weeks of the study in real life, and participants' compliance was checked at the 2 follow-up visits at week 2 and week 4. As part of the research protocol, every study participant underwent evaluations by a study psychiatrist during each visit. Results: We found that derived phenotypes were associated with not only the diagnoses and severity of depression and mania but also specific individual symptoms. Using a linear mixed-effects model with random intercepts accounting for the nested data structure from daily data, the backspace rates on the continuous scale did not differ between participants in the healthy control and in the mood disorders groups (P=.11). The 3-class model had mean backspace rates of 0.112, 0.180, and 0.268, respectively, with a SD of 0.048. In total, 3 classes, respectively, were estimated to comprise 37.5\% (n=47), 54.4\% (n=72), and 8.1\% (n=9) of the sample. We grouped individuals into Low, Medium, and High backspace rate groups. Individuals with unipolar mood disorder were predominantly in the Medium group (n=54), with some in the Low group (n=27) and a few in the High group (n=6). The Medium group, compared with the Low group, had significantly higher ratings of depression (b=2.32, P=.008). The High group was not associated with ratings of depression with (P=.88) or without (P=.27) adjustment for medication and diagnoses. The High group, compared with the Low group, was associated with both nonzero ratings (b=1.91, P=.02) and higher ratings of mania (b=1.46, P<.001). The High group, compared with the Low group, showed significantly higher odds of elevated mood (P=.03), motor activity (P=.04), and irritability (P<.05). Conclusions: This study demonstrates the promise of mobile typing kinematics in mood disorder research and practice. Monitoring a single mobile typing kinematic feature, that is, backspace rates, through passive sensing imposes a low burden on the participants. Based on real-life keystroke data, our derived digital phenotypes from this single feature can be useful for researchers and practitioners to distinguish between individuals with and those without mood disorder symptoms. ", doi="10.2196/51269", url="https://www.jmir.org/2024/1/e51269" } @Article{info:doi/10.2196/58357, author="Lee, Heather Younga and Zhang, Yingzhe and Kennedy, J. Chris and Mallard, T. Travis and Liu, Zhaowen and Vu, Linh Phuong and Feng, Anne Yen-Chen and Ge, Tian and Petukhova, V. Maria and Kessler, C. Ronald and Nock, K. Matthew and Smoller, W. Jordan", title="Enhancing Suicide Risk Prediction With Polygenic Scores in Psychiatric Emergency Settings: Prospective Study", journal="JMIR Bioinform Biotech", year="2024", month="Oct", day="23", volume="5", pages="e58357", keywords="polygenic risk score", keywords="suicide risk prediction", keywords="suicide attempt", keywords="predictive algorithms", keywords="genomics", keywords="genotypes", keywords="electronic health record", keywords="machine learning", abstract="Background: Despite growing interest in the clinical translation of polygenic risk scores (PRSs), it remains uncertain to what extent genomic information can enhance the prediction of psychiatric outcomes beyond the data collected during clinical visits alone. Objective: This study aimed to assess the clinical utility of incorporating PRSs into a suicide risk prediction model trained on electronic health records (EHRs) and patient-reported surveys among patients admitted to the emergency department. Methods: Study participants were recruited from the psychiatric emergency department at Massachusetts General Hospital. There were 333 adult patients of European ancestry who had high-quality genotype data available through their participation in the Mass General Brigham Biobank. Multiple neuropsychiatric PRSs were added to a previously validated suicide prediction model in a prospective cohort enrolled between February 4, 2015, and March 13, 2017. Data analysis was performed from July 11, 2022, to August 31, 2023. Suicide attempt was defined using diagnostic codes from longitudinal EHRs combined with 6-month follow-up surveys. The clinical risk score for suicide attempt was calculated from an ensemble model trained using an EHR-based suicide risk score and a brief survey, and it was subsequently used to define the baseline model. We generated PRSs for depression, bipolar disorder, schizophrenia, suicide attempt, and externalizing traits using a Bayesian polygenic scoring method for European ancestry participants. Model performance was evaluated using area under the receiver operator curve (AUC), area under the precision-recall curve, and positive predictive values. Results: Of the 333 patients (n=178, 53.5\% male; mean age 36.8, SD 13.6 years; n=333, 100\% non-Hispanic and n=324, 97.3\% self-reported White), 28 (8.4\%) had a suicide attempt within 6 months. Adding either the schizophrenia PRS or all PRSs to the baseline model resulted in the numerically highest discrimination (AUC 0.86, 95\% CI 0.73-0.99) compared to the baseline model (AUC 0.84, 95\% Cl 0.70-0.98). However, the improvement in model performance was not statistically significant. Conclusions: In this study, incorporating genomic information into clinical prediction models for suicide attempt did not improve patient risk stratification. Larger studies that include more diverse participants are required to validate whether the inclusion of psychiatric PRSs in clinical prediction models can enhance the stratification of patients at risk of suicide attempts. ", doi="10.2196/58357", url="https://bioinform.jmir.org/2024/1/e58357", url="http://www.ncbi.nlm.nih.gov/pubmed/39442166" } @Article{info:doi/10.2196/53465, author="Toki, I. Eugenia and Zakopoulou, Victoria and Tatsis, Giorgos and Pange, Jenny", title="Automated Detection of Neurodevelopmental Disorders Using Face-to-Face Mobile Technology Among Typically Developing Greek Children: Randomized Controlled Trial", journal="JMIR Form Res", year="2024", month="Oct", day="11", volume="8", pages="e53465", keywords="main principles", keywords="automated detection", keywords="neurodevelopmental disorders", keywords="principal component analysis", keywords="early screening", keywords="early intervention", keywords="detection", keywords="screening", keywords="assessment", keywords="digital tool", keywords="serious game", keywords="child", keywords="Greece", keywords="speech", keywords="psychomotor", keywords="cognitive", keywords="psychoemotional", keywords="hearing", keywords="machine learning", keywords="apps", keywords="predictions", keywords="prognosis", abstract="Background: Neurodevelopmental disorders (NDs) are characterized by heterogeneity, complexity, and interactions among multiple domains with long-lasting effects in adulthood. Early and accurate identification of children at risk for NDs is crucial for timely intervention, yet many cases remain undiagnosed, leading to missed opportunities for effective interventions. Digital tools can help clinicians assist and identify NDs. The concept of using serious games to enhance health care has gained attention among a growing group of scientists, entrepreneurs, and clinicians. Objective: This study aims to explore the core principles of automated mobile detection of NDs in typically developing Greek children, using a serious game developed within the SmartSpeech project, designed to evaluate multiple developmental domains through principal component analysis (PCA). Methods: A total of 229 typically developing children aged 4 to 12 years participated in the study. The recruitment process involved open calls through public and private health and educational institutions across Greece. Parents were thoroughly informed about the study's objectives and procedures, and written consent was obtained. Children engaged under the clinician's face-to-face supervision with the serious game ``Apsou,'' which assesses 18 developmental domains, including speech, language, psychomotor, cognitive, psychoemotional, and hearing abilities. Data from the children's interactions were analyzed using PCA to identify key components and underlying principles of ND detection. Results: A sample of 229 typically developing preschoolers and early school-aged children played the Apsou mobile serious game for automated detection of NDs. Performing a PCA, the findings identified 5 main components accounting for about 80\% of the data variability that potentially have significant prognostic implications for a safe diagnosis of NDs. Varimax rotation explained 61.44\% of the total variance. The results underscore key theoretical principles crucial for the automated detection of NDs. These principles encompass communication skills, speech and language development, vocal processing, cognitive skills and sensory functions, and visual-spatial skills. These components align with the theoretical principles of child development and provide a robust framework for automated ND detection. Conclusions: The study highlights the feasibility and effectiveness of using serious games for early ND detection in children. The identified principal components offer valuable insights into critical developmental domains, paving the way for the development of advanced machine learning applications to support highly accurate predictions and classifications for automated screening, diagnosis, prognosis, or intervention planning in ND clinical decision-making. Future research should focus on validating these findings across diverse populations integrating additional features such as biometric data and longitudinal tracking to enhance the accuracy and reliability of automated detection systems. Trial Registration: ClinicalTrials.gov NCT06633874; https://clinicaltrials.gov/study/NCT06633874 International Registered Report Identifier (IRRID): RR2-https://doi.org/10.3390/signals4020021 ", doi="10.2196/53465", url="https://formative.jmir.org/2024/1/e53465" } @Article{info:doi/10.2196/56574, author="Ridout, J. Samuel and Ridout, K. Kathryn and Lin, Y. Teresa and Campbell, I. Cynthia", title="Clinical Use of Mental Health Digital Therapeutics in a Large Health Care Delivery System: Retrospective Patient Cohort Study and Provider Survey", journal="JMIR Ment Health", year="2024", month="Oct", day="2", volume="11", pages="e56574", keywords="digital therapeutics", keywords="depression", keywords="anxiety", keywords="mental health", keywords="retrospective cohort", keywords="electronic health record", keywords="adults", keywords="survey", keywords="recommendation", keywords="mobile phone", abstract="Background: While the number of digital therapeutics (DTx) has proliferated, there is little real-world research on the characteristics of providers recommending DTx, their recommendation behaviors, or the characteristics of patients receiving recommendations in the clinical setting. Objective: The aim of this study was to characterize the clinical and demographic characteristics of patients receiving DTx recommendations and describe provider characteristics and behaviors regarding DTx. Methods: This retrospective cohort study used electronic health record data from a large, integrated health care delivery system. Demographic and clinical characteristics of adult patients recommended versus not recommended DTx by a mental health provider between May 2020 and December 2021 were examined. A cross-sectional survey of mental health providers providing these recommendations was conducted in December 2022 to assess the characteristics of providers and recommendation behaviors related to DTx. Parametric and nonparametric tests were used to examine statistical significance between groups. Results: Of 335,250 patients with a mental health appointment, 53,546 (16\%) received a DTx recommendation. Patients recommended to DTx were younger, were of Asian or Hispanic race or ethnicity, were female, were without medical comorbidities, and had commercial insurance compared to those without a DTx recommendation (P<.001). More patients receiving a DTx recommendation had anxiety or adjustment disorder diagnoses, but less had depression, bipolar, or psychotic disorder diagnoses (P<.001) versus matched controls not recommended to DTx. Overall, depression and anxiety symptom scores were lower in patients recommended to DTx compared to matched controls not receiving a recommendation, although female patients had a higher proportion of severe depression and anxiety scores compared to male patients. Provider survey results indicated a higher proportion of nonprescribers recommended DTx to patients compared to prescribers (P=.008). Of all providers, 29.4\% (45/153) reported using the suggested internal electronic health record--based tools (eg, smart text) to recommend DTx, and of providers recommending DTx resources to patients, 64.1\% (98/153) reported they follow up with patients to inquire on DTx benefits. Only 38.4\% (58/151) of respondents report recommending specific DTx modules, and of those, 58.6\% (34/58) report following up on the impact of these specific modules. Conclusions: DTx use in mental health was modest and varied by patient and provider characteristics. Providers do not appear to actively engage with these tools and integrate them into treatment plans. Providers, while expressing interest in potential benefits from DTx, may view DTx as a passive strategy to augment traditional treatment for select patients. ", doi="10.2196/56574", url="https://mental.jmir.org/2024/1/e56574" } @Article{info:doi/10.2196/57926, author="Prakash, Ravi and Dupre, E. Matthew and {\O}stbye, Truls and Xu, Hanzhang", title="Extracting Critical Information from Unstructured Clinicians' Notes Data to Identify Dementia Severity Using a Rule-Based Approach: Feasibility Study", journal="JMIR Aging", year="2024", month="Sep", day="24", volume="7", pages="e57926", keywords="electronic health record", keywords="EHR", keywords="electric medical record", keywords="EMR", keywords="patient record", keywords="health record", keywords="personal health record", keywords="PHR", keywords="unstructured data", keywords="rule based analysis", keywords="artificial intelligence", keywords="AI", keywords="large language model", keywords="LLM", keywords="natural language processing", keywords="NLP", keywords="deep learning", keywords="Alzheimer's disease and related dementias", keywords="AD", keywords="ADRD", keywords="Alzheimer's disease", keywords="dementia", keywords="geriatric syndromes", abstract="Background: The severity of Alzheimer disease and related dementias (ADRD) is rarely documented in structured data fields in electronic health records (EHRs). Although this information is important for clinical monitoring and decision-making, it is often undocumented or ``hidden'' in unstructured text fields and not readily available for clinicians to act upon. Objective: We aimed to assess the feasibility and potential bias in using keywords and rule-based matching for obtaining information about the severity of ADRD from EHR data. Methods: We used EHR data from a large academic health care system that included patients with a primary discharge diagnosis of ADRD based on ICD-9 (International Classification of Diseases, Ninth Revision) and ICD-10 (International Statistical Classification of Diseases, Tenth Revision) codes between 2014 and 2019. We first assessed the presence of ADRD severity information and then the severity of ADRD in the EHR. Clinicians' notes were used to determine the severity of ADRD based on two criteria: (1) scores from the Mini Mental State Examination and Montreal Cognitive Assessment and (2) explicit terms for ADRD severity (eg, ``mild dementia'' and ``advanced Alzheimer disease''). We compiled a list of common ADRD symptoms, cognitive test names, and disease severity terms, refining it iteratively based on previous literature and clinical expertise. Subsequently, we used rule-based matching in Python using standard open-source data analysis libraries to identify the context in which specific words or phrases were mentioned. We estimated the prevalence of documented ADRD severity and assessed the performance of our rule-based algorithm. Results: We included 9115 eligible patients with over 65,000 notes from the providers. Overall, 22.93\% (2090/9115) of patients were documented with mild ADRD, 20.87\% (1902/9115) were documented with moderate or severe ADRD, and 56.20\% (5123/9115) did not have any documentation of the severity of their ADRD. For the task of determining the presence of any ADRD severity information, our algorithm achieved an accuracy of >95\%, specificity of >95\%, sensitivity of >90\%, and an F1-score of >83\%. For the specific task of identifying the actual severity of ADRD, the algorithm performed well with an accuracy of >91\%, specificity of >80\%, sensitivity of >88\%, and F1-score of >92\%. Comparing patients with mild ADRD to those with more advanced ADRD, the latter group tended to contain older, more likely female, and Black patients, and having received their diagnoses in primary care or in-hospital settings. Relative to patients with undocumented ADRD severity, those with documented ADRD severity had a similar distribution in terms of sex, race, and rural or urban residence. Conclusions: Our study demonstrates the feasibility of using a rule-based matching algorithm to identify ADRD severity from unstructured EHR report data. However, it is essential to acknowledge potential biases arising from differences in documentation practices across various health care systems. ", doi="10.2196/57926", url="https://aging.jmir.org/2024/1/e57926", url="http://www.ncbi.nlm.nih.gov/pubmed/39316421" } @Article{info:doi/10.2196/54617, author="Shin, Daun and Kim, Hyoseung and Lee, Seunghwan and Cho, Younhee and Jung, Whanbo", title="Using Large Language Models to Detect Depression From User-Generated Diary Text Data as a Novel Approach in Digital Mental Health Screening: Instrument Validation Study", journal="J Med Internet Res", year="2024", month="Sep", day="18", volume="26", pages="e54617", keywords="depression", keywords="screening", keywords="artificial intelligence", keywords="digital health technology", keywords="text data", abstract="Background: Depressive disorders have substantial global implications, leading to various social consequences, including decreased occupational productivity and a high disability burden. Early detection and intervention for clinically significant depression have gained attention; however, the existing depression screening tools, such as the Center for Epidemiologic Studies Depression Scale, have limitations in objectivity and accuracy. Therefore, researchers are identifying objective indicators of depression, including image analysis, blood biomarkers, and ecological momentary assessments (EMAs). Among EMAs, user-generated text data, particularly from diary writing, have emerged as a clinically significant and analyzable source for detecting or diagnosing depression, leveraging advancements in large language models such as ChatGPT. Objective: We aimed to detect depression based on user-generated diary text through an emotional diary writing app using a large language model (LLM). We aimed to validate the value of the semistructured diary text data as an EMA data source. Methods: Participants were assessed for depression using the Patient Health Questionnaire and suicide risk was evaluated using the Beck Scale for Suicide Ideation before starting and after completing the 2-week diary writing period. The text data from the daily diaries were also used in the analysis. The performance of leading LLMs, such as ChatGPT with GPT-3.5 and GPT-4, was assessed with and without GPT-3.5 fine-tuning on the training data set. The model performance comparison involved the use of chain-of-thought and zero-shot prompting to analyze the text structure and content. Results: We used 428 diaries from 91 participants; GPT-3.5 fine-tuning demonstrated superior performance in depression detection, achieving an accuracy of 0.902 and a specificity of 0.955. However, the balanced accuracy was the highest (0.844) for GPT-3.5 without fine-tuning and prompt techniques; it displayed a recall of 0.929. Conclusions: Both GPT-3.5 and GPT-4.0 demonstrated relatively reasonable performance in recognizing the risk of depression based on diaries. Our findings highlight the potential clinical usefulness of user-generated text data for detecting depression. In addition to measurable indicators, such as step count and physical activity, future research should increasingly emphasize qualitative digital expression. ", doi="10.2196/54617", url="https://www.jmir.org/2024/1/e54617", url="http://www.ncbi.nlm.nih.gov/pubmed/39292502" } @Article{info:doi/10.2196/55126, author="Ding, Huitong and Lister, Adrian and Karjadi, Cody and Au, Rhoda and Lin, Honghuang and Bischoff, Brian and Hwang, H. Phillip", title="Detection of Mild Cognitive Impairment From Non-Semantic, Acoustic Voice Features: The Framingham Heart Study", journal="JMIR Aging", year="2024", month="Aug", day="22", volume="7", pages="e55126", keywords="early detection", keywords="Alzheimer disease and related dementias", keywords="mild cognitive impairment", keywords="digital voice", keywords="machine learning", keywords="smartphone", keywords="mobile phone", abstract="Background: With the aging global population and the rising burden of Alzheimer disease and related dementias (ADRDs), there is a growing focus on identifying mild cognitive impairment (MCI) to enable timely interventions that could potentially slow down the onset of clinical dementia. The production of speech by an individual is a cognitively complex task that engages various cognitive domains. The ease of audio data collection highlights the potential cost-effectiveness and noninvasive nature of using human speech as a tool for cognitive assessment. Objective: This study aimed to construct a machine learning pipeline that incorporates speaker diarization, feature extraction, feature selection, and classification to identify a set of acoustic features derived from voice recordings that exhibit strong MCI detection capability. Methods: The study included 100 MCI cases and 100 cognitively normal controls matched for age, sex, and education from the Framingham Heart Study. Participants' spoken responses on neuropsychological tests were recorded, and the recorded audio was processed to identify segments of each participant's voice from recordings that included voices of both testers and participants. A comprehensive set of 6385 acoustic features was then extracted from these voice segments using OpenSMILE and Praat software. Subsequently, a random forest model was constructed to classify cognitive status using the features that exhibited significant differences between the MCI and cognitively normal groups. The MCI detection performance of various audio lengths was further examined. Results: An optimal subset of 29 features was identified that resulted in an area under the receiver operating characteristic curve of 0.87, with a 95\% CI of 0.81-0.94. The most important acoustic feature for MCI classification was the number of filled pauses (importance score=0.09, P=3.10E--08). There was no substantial difference in the performance of the model trained on the acoustic features derived from different lengths of voice recordings. Conclusions: This study showcases the potential of monitoring changes to nonsemantic and acoustic features of speech as a way of early ADRD detection and motivates future opportunities for using human speech as a measure of brain health. ", doi="10.2196/55126", url="https://aging.jmir.org/2024/1/e55126" } @Article{info:doi/10.2196/53714, author="Razavi, Moein and Ziyadidegan, Samira and Mahmoudzadeh, Ahmadreza and Kazeminasab, Saber and Baharlouei, Elaheh and Janfaza, Vahid and Jahromi, Reza and Sasangohar, Farzan", title="Machine Learning, Deep Learning, and Data Preprocessing Techniques for Detecting, Predicting, and Monitoring Stress and Stress-Related Mental Disorders: Scoping Review", journal="JMIR Ment Health", year="2024", month="Aug", day="21", volume="11", pages="e53714", keywords="machine learning", keywords="deep learning", keywords="data preprocessing", keywords="stress detection", keywords="stress prediction", keywords="stress monitoring", keywords="mental disorders", abstract="Background: Mental stress and its consequent mental health disorders (MDs) constitute a significant public health issue. With the advent of machine learning (ML), there is potential to harness computational techniques for better understanding and addressing mental stress and MDs. This comprehensive review seeks to elucidate the current ML methodologies used in this domain to pave the way for enhanced detection, prediction, and analysis of mental stress and its subsequent MDs. Objective: This review aims to investigate the scope of ML methodologies used in the detection, prediction, and analysis of mental stress and its consequent MDs. Methods: Using a rigorous scoping review process with PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews) guidelines, this investigation delves into the latest ML algorithms, preprocessing techniques, and data types used in the context of stress and stress-related MDs. Results: A total of 98 peer-reviewed publications were examined for this review. The findings highlight that support vector machine, neural network, and random forest models consistently exhibited superior accuracy and robustness among all ML algorithms examined. Physiological parameters such as heart rate measurements and skin response are prevalently used as stress predictors due to their rich explanatory information concerning stress and stress-related MDs, as well as the relative ease of data acquisition. The application of dimensionality reduction techniques, including mappings, feature selection, filtering, and noise reduction, is frequently observed as a crucial step preceding the training of ML algorithms. Conclusions: The synthesis of this review identified significant research gaps and outlines future directions for the field. These encompass areas such as model interpretability, model personalization, the incorporation of naturalistic settings, and real-time processing capabilities for the detection and prediction of stress and stress-related MDs. ", doi="10.2196/53714", url="https://mental.jmir.org/2024/1/e53714" } @Article{info:doi/10.2196/49794, author="Greene, Barry and Tobyne, Sean and Jannati, Ali and McManus, Killian and Gomes Osman, Joyce and Banks, Russell and Kher, Ranjit and Showalter, John and Bates, David and Pascual-Leone, Alvaro", title="The Dual Task Ball Balancing Test and Its Association With Cognitive Function: Algorithm Development and Validation", journal="J Med Internet Res", year="2024", month="Aug", day="19", volume="26", pages="e49794", keywords="cognitive function", keywords="dual task", keywords="inertial sensors", keywords="mHealth", keywords="tablet", keywords="MCI", keywords="Alzheimer", keywords="dementia", keywords="motor", keywords="older adults", keywords="cognitive impairment", keywords="balance?", abstract="Background: Dual task paradigms are thought to offer a quantitative means to assess cognitive reserve and the brain's capacity to allocate resources in the face of competing cognitive demands. The most common dual task paradigms examine the interplay between gait or balance control and cognitive function. However, gait and balance tasks can be physically challenging for older adults and may pose a risk of falls. Objective: We introduce a novel, digital dual-task assessment that combines a motor-control task (the ``ball balancing'' test), which challenges an individual to maintain a virtual ball within a designated zone, with a concurrent cognitive task (the backward digit span task [BDST]). Methods: The task was administered on a touchscreen tablet, performance was measured using the inertial sensors embedded in the tablet, conducted under both single- and dual-task conditions. The clinical use of the task was evaluated on a sample of 375 older adult participants (n=210 female; aged 73.0, SD 6.5 years). Results: All older adults, including those with mild cognitive impairment (MCI) and Alzheimer disease--related dementia (ADRD), and those with poor balance and gait problems due to diabetes, osteoarthritis, peripheral neuropathy, and other causes, were able to complete the task comfortably and safely while seated. As expected, task performance significantly decreased under dual task conditions compared to single task conditions. We show that performance was significantly associated with cognitive impairment; significant differences were found among healthy participants, those with MCI, and those with ADRD. Task results were significantly associated with functional impairment, independent of diagnosis, degree of cognitive impairment (as indicated by the Mini Mental State Examination [MMSE] score), and age. Finally, we found that cognitive status could be classified with >70\% accuracy using a range of classifier models trained on 3 different cognitive function outcome variables (consensus clinical judgment, Rey Auditory Verbal Learning Test [RAVLT], and MMSE). Conclusions: Our results suggest that the dual task ball balancing test could be used as a digital cognitive assessment of cognitive reserve. The portability, simplicity, and intuitiveness of the task suggest that it may be suitable for unsupervised home assessment of cognitive function. ", doi="10.2196/49794", url="https://www.jmir.org/2024/1/e49794", url="http://www.ncbi.nlm.nih.gov/pubmed/39158963" } @Article{info:doi/10.2196/57038, author="Campos, Susana and Nu{\~n}ez, Daniel and P{\'e}rez, Carola J. and Robinson, Jo", title="Characterization of Psychopathology in Latin American Adolescents Using a Web-Based Screening Tool: Cross-Sectional Study", journal="JMIR Form Res", year="2024", month="Aug", day="8", volume="8", pages="e57038", keywords="web-based screening", keywords="adolescents", keywords="psychopathology", keywords="suicidal ideation", keywords="early detection", keywords="detection", keywords="screening", keywords="teens", keywords="youths", keywords="suicide", keywords="mental health", keywords="screening tool", keywords="Latin American", keywords="Latino", keywords="psychiatric", keywords="psychiatric symptoms", keywords="psychological risk", abstract="Background: Mental health problems and suicide ideation are common in adolescents. Early detection of these issues could prevent the escalation of mental health--related symptoms in the long term. Moreover, characterizing different profiles of prevalent symptoms in conjunction with emotional regulation strategies could guide the design of specific interventions. The use of web-based screening (WBS) tools has been regarded as a suitable strategy to timely detect symptomatology while improving the appeal, cost, timeliness, and reach of detection in young populations. However, the evidence regarding the accuracy of these approaches is not fully conclusive. Objective: The study aims (1) to examine the capability of a WBS to identify adolescents with psychiatric symptoms and suicidality and (2) to characterize the mental health profiles of a large sample of adolescents using WBS. Methods: A total of 1599 Latin American Spanish-speaking adolescents (mean age 15.56, SD 1.34 years), consisting of 47.3\% (n=753) female, 98.5\% Chilean (n=1570), and 1.5\% Venezuelan (n=24) participants, responded to a mental health WBS. A randomized subsample of participants also responded to the Mini International Neuropsychiatric Interview for Children and Adolescents (MINI-KID). McNemar $\chi$2 and receiver-operating characteristic curves tested the detection accuracy of WBS contrasted with the MINI-KID. Latent profile analyses explored the symptomatic and emotional regulation profiles of participants. Results: Both measures showed an adequate level of agreement (area under the curve per symptom domain ranging from 0.70 to 0.89); however, WBS yielded a higher prevalence than MINI-KID for all psychiatric symptoms, except suicide ideation and depression. Latent profile analyses yielded 4 profiles---one of them presented elevated psychopathological symptoms, constituting 11\% of the sample (n=175). Rumination (odds ratio [OR] 130.15, 95\% CI 51.75-439.89; P<.001), entrapment (OR 96.35, 95\% CI 29.21-317.79; P<.001), and defeat (OR 156.79, 95\% CI 50.45-487.23; P<.001) contributed significantly to the prediction of latent profile memberships, while cognitive reappraisal did not contribute to the prediction of any latent profile memberships, and expressive suppression was only associated to profile-2 membership. Conclusions: WBS is acceptable for the timely detection of adolescents at risk of mental health conditions. Findings from the symptomatic and emotional regulation profiles highlight the need for comprehensive assessments and differential interventions. ", doi="10.2196/57038", url="https://formative.jmir.org/2024/1/e57038" } @Article{info:doi/10.2196/59826, author="Ortiz, Abigail and Mulsant, H. Benoit", title="Beyond Step Count: Are We Ready to Use Digital Phenotyping to Make Actionable Individual Predictions in Psychiatry?", journal="J Med Internet Res", year="2024", month="Aug", day="5", volume="26", pages="e59826", keywords="digital phenotype", keywords="digital phenotyping", keywords="prediction", keywords="predictions", keywords="mental health", keywords="mental illness", keywords="mental illnesses", keywords="mental disorder", keywords="mental disorders", keywords="US National Institute of Mental Health", keywords="NIMH", keywords="psychiatry", keywords="psychiatrist", keywords="psychiatrists", doi="10.2196/59826", url="https://www.jmir.org/2024/1/e59826" } @Article{info:doi/10.2196/54577, author="Grazioli, Silvia and Crippa, Alessandro and Buo, Noemi and Busti Ceccarelli, Silvia and Molteni, Massimo and Nobile, Maria and Salandi, Antonio and Trabattoni, Sara and Caselli, Gabriele and Colombo, Paola", title="Use of Machine Learning Models to Differentiate Neurodevelopment Conditions Through Digitally Collected Data: Cross-Sectional Questionnaire Study", journal="JMIR Form Res", year="2024", month="Jul", day="29", volume="8", pages="e54577", keywords="digital-aided clinical assessment", keywords="machine learning", keywords="random forest", keywords="logistic regression", keywords="computational psychometrics", keywords="telemedicine", keywords="neurodevelopmental conditions", keywords="parent-report questionnaires", keywords="attention-deficit/hyperactivity disorder", keywords="autism spectrum disorder", keywords="ASD", keywords="autism", keywords="autistic", keywords="attention deficit", keywords="hyperactivity", keywords="classification", abstract="Background: Diagnosis of child and adolescent psychopathologies involves a multifaceted approach, integrating clinical observations, behavioral assessments, medical history, cognitive testing, and familial context information. Digital technologies, especially internet-based platforms for administering caregiver-rated questionnaires, are increasingly used in this field, particularly during the screening phase. The ascent of digital platforms for data collection has propelled advanced psychopathology classification methods such as supervised machine learning (ML) into the forefront of both research and clinical environments. This shift, recently called psycho-informatics, has been facilitated by gradually incorporating computational devices into clinical workflows. However, an actual integration between telemedicine and the ML approach has yet to be fulfilled. Objective: Under these premises, exploring the potential of ML applications for analyzing digitally collected data may have significant implications for supporting the clinical practice of diagnosing early psychopathology. The purpose of this study was, therefore, to exploit ML models for the classification of attention-deficit/hyperactivity disorder (ADHD) and autism spectrum disorder (ASD) using internet-based parent-reported socio-anamnestic data, aiming at obtaining accurate predictive models for new help-seeking families. Methods: In this retrospective, single-center observational study, socio-anamnestic data were collected from 1688 children and adolescents referred for suspected neurodevelopmental conditions. The data included sociodemographic, clinical, environmental, and developmental factors, collected remotely through the first Italian internet-based screening tool for neurodevelopmental disorders, the Medea Information and Clinical Assessment On-Line (MedicalBIT). Random forest (RF), decision tree, and logistic regression models were developed and evaluated using classification accuracy, sensitivity, specificity, and importance of independent variables. Results: The RF model demonstrated robust accuracy, achieving 84\% (95\% CI 82-85; P<.001) for ADHD and 86\% (95\% CI 84-87; P<.001) for ASD classifications. Sensitivities were also high, with 93\% for ADHD and 95\% for ASD. In contrast, the DT and LR models exhibited lower accuracy (DT 74\%, 95\% CI 71-77; P<.001 for ADHD; DT 79\%, 95\% CI 77-82; P<.001 for ASD; LR 61\%, 95\% CI 57-64; P<.001 for ADHD; LR 63\%, 95\% CI 60-67; P<.001 for ASD) and sensitivities (DT: 82\% for ADHD and 88\% for ASD; LR: 62\% for ADHD and 68\% for ASD). The independent variables considered for classification differed in importance between the 2 models, reflecting the distinct characteristics of the 3 ML approaches. Conclusions: This study highlights the potential of ML models, particularly RF, in enhancing the diagnostic process of child and adolescent psychopathology. Altogether, the current findings underscore the significance of leveraging digital platforms and computational techniques in the diagnostic process. While interpretability remains crucial, the developed approach might provide valuable screening tools for clinicians, highlighting the significance of embedding computational techniques in the diagnostic process. ", doi="10.2196/54577", url="https://formative.jmir.org/2024/1/e54577", url="http://www.ncbi.nlm.nih.gov/pubmed/39073858" } @Article{info:doi/10.2196/38413, author="Omisore, Mumini Olatunji and Odenigbo, Ifeanyi and Orji, Joseph and Beltran, Hernandez Amelia Itzel and Meier, Sandra and Baghaei, Nilufar and Orji, Rita", title="Extended Reality for Mental Health Evaluation: Scoping Review", journal="JMIR Serious Games", year="2024", month="Jul", day="24", volume="12", pages="e38413", keywords="extended reality", keywords="mental disorder", keywords="depression", keywords="anxiety", keywords="exposure therapy", abstract="Background: Mental health disorders are the leading cause of health-related problems worldwide. It is projected that mental health disorders will be the leading cause of morbidity among adults as the incidence rates of anxiety and depression grow worldwide. Recently, ``extended reality'' (XR), a general term covering virtual reality (VR), augmented reality (AR), and mixed reality (MR), is paving the way for the delivery of mental health care. Objective: We aimed to investigate the adoption and implementation of XR technology used in interventions for mental disorders and to provide statistical analyses of the design, usage, and effectiveness of XR technology for mental health interventions with a worldwide demographic focus. Methods: In this paper, we conducted a scoping review of the development and application of XR in the area of mental disorders. We performed a database search to identify relevant studies indexed in Google Scholar, PubMed, and the ACM Digital Library. A search period between August 2016 and December 2023 was defined to select papers related to the usage of VR, AR, and MR in a mental health context. The database search was performed with predefined queries, and a total of 831 papers were identified. Ten papers were identified through professional recommendation. Inclusion and exclusion criteria were designed and applied to ensure that only relevant studies were included in the literature review. Results: We identified a total of 85 studies from 27 countries worldwide that used different types of VR, AR, and MR techniques for managing 14 types of mental disorders. By performing data analysis, we found that most of the studies focused on high-income countries, such as the United States (n=14, 16.47\%) and Germany (n=12, 14.12\%). None of the studies were for African countries. The majority of papers reported that XR techniques lead to a significant reduction in symptoms of anxiety or depression. The majority of studies were published in 2021 (n=26, 30.59\%). This could indicate that mental disorder intervention received higher attention when COVID-19 emerged. Most studies (n=65, 76.47\%) focused on a population in the age range of 18-65 years, while few studies (n=2, 3.35\%) focused on teenagers (ie, subjects in the age range of 10-19 years). In addition, more studies were conducted experimentally (n=67, 78.82\%) rather than by using analytical and modeling approaches (n=8, 9.41\%). This shows that there is a rapid development of XR technology for mental health care. Furthermore, these studies showed that XR technology can effectively be used for evaluating mental disorders in a similar or better way that conventional approaches. Conclusions: In this scoping review, we studied the adoption and implementation of XR technology for mental disorder care. Our review shows that XR treatment yields high patient satisfaction, and follow-up assessments show significant improvement with large effect sizes. Moreover, the studies adopted unique designs that were set up to record and analyze the symptoms reported by their participants. This review may aid future research and development of various XR mechanisms for differentiated mental disorder procedures. ", doi="10.2196/38413", url="https://games.jmir.org/2024/1/e38413" } @Article{info:doi/10.2196/52101, author="Matson, E. Theresa and Lee, K. Amy and Oliver, Malia and Bradley, A. Katharine and Hallgren, A. Kevin", title="Equivalence of Alcohol Use Disorder Symptom Assessments in Routine Clinical Care When Completed Remotely via Online Patient Portals Versus In Clinic via Paper Questionnaires: Psychometric Evaluation", journal="J Med Internet Res", year="2024", month="Jul", day="22", volume="26", pages="e52101", keywords="alcohol", keywords="alcohol use disorder", keywords="assessment", keywords="symptom checklist", keywords="electronic health record", keywords="patient portal", keywords="item response theory", keywords="differential item functioning", keywords="alcohol use", keywords="patient portals", keywords="in-clinic", keywords="psychometric evaluation", keywords="alcoholism", keywords="cross-sectional", keywords="United States", abstract="Background: The National Institute on Alcohol Abuse and Alcoholism (NIAAA) recommends the paper-based or computerized Alcohol Symptom Checklist to assess alcohol use disorder (AUD) symptoms in routine care when patients report high-risk drinking. However, it is unknown whether Alcohol Symptom Checklist response characteristics differ when it is administered online (eg, remotely via an online electronic health record [EHR] patient portal before an appointment) versus in clinic (eg, on paper after appointment check-in). Objective: This study evaluated the psychometric performance of the Alcohol Symptom Checklist when completed online versus in clinic during routine clinical care. Methods: This cross-sectional, psychometric study obtained EHR data from the Alcohol Symptom Checklist completed by adult patients from an integrated health system in Washington state. The sample included patients who had a primary care visit in 2021 at 1 of 32 primary care practices, were due for annual behavioral health screening, and reported high-risk drinking on the behavioral health screen (Alcohol Use Disorder Identification Test--Consumption score ?7). After screening, patients with high-risk drinking were typically asked to complete the Alcohol Symptom Checklist---an 11-item questionnaire on which patients self-report whether they had experienced each of the 11 AUD criteria listed in the Diagnostic and Statistical Manual of Mental Disorders, Fifth Edition (DSM-5) over a past-year timeframe. Patients could complete the Alcohol Symptom Checklist online (eg, on a computer, smartphone, or tablet from any location) or in clinic (eg, on paper as part of the rooming process at clinical appointments). We examined sample and measurement characteristics and conducted differential item functioning analyses using item response theory to examine measurement consistency across these 2 assessment modalities. Results: Among 3243 patients meeting eligibility criteria for this secondary analysis (2313/3243, 71\% male; 2271/3243, 70\% White; and 2014/3243, 62\% non-Hispanic), 1640 (51\%) completed the Alcohol Symptom Checklist online while 1603 (49\%) completed it in clinic. Approximately 46\% (752/1640) and 48\% (764/1603) reported ?2 AUD criteria (the threshold for AUD diagnosis) online and in clinic (P=.37), respectively. A small degree of differential item functioning was observed for 4 of 11 items. This differential item functioning produced only minimal impact on total scores used clinically to assess AUD severity, affecting total criteria count by a maximum of 0.13 criteria (on a scale ranging from 0 to 11). Conclusions: Completing the Alcohol Symptom Checklist online, typically prior to patient check-in, performed similarly to an in-clinic modality typically administered on paper by a medical assistant at the time of the appointment. Findings have implications for using online AUD symptom assessments to streamline workflows, reduce staff burden, reduce stigma, and potentially assess patients who do not receive in-person care. Whether modality of DSM-5 assessment of AUD differentially impacts treatment is unknown. ", doi="10.2196/52101", url="https://www.jmir.org/2024/1/e52101" } @Article{info:doi/10.2196/59794, author="Jaiswal, Aditi and Shah, Aekta and Harjadi, Christopher and Windgassen, Erik and Washington, Peter", title="Ethics of the Use of Social Media as Training Data for AI Models Used for Digital Phenotyping", journal="JMIR Form Res", year="2024", month="Jul", day="17", volume="8", pages="e59794", keywords="social media analytics", keywords="machine learning", keywords="ethics", keywords="research ethics", keywords="consent", keywords="scientific integrity", doi="10.2196/59794", url="https://formative.jmir.org/2024/1/e59794" } @Article{info:doi/10.2196/59349, author="Jaiswal, Aditi and Shah, Aekta and Harjadi, Christopher and Windgassen, Erik and Washington, Peter", title="Addendum: Using \#ActuallyAutistic on Twitter for Precision Diagnosis of Autism Spectrum Disorder: Machine Learning Study", journal="JMIR Form Res", year="2024", month="Jul", day="17", volume="8", pages="e59349", doi="10.2196/59349", url="https://formative.jmir.org/2024/1/e59349" } @Article{info:doi/10.2196/48777, author="Li, Aoyu and Li, Jingwen and Chai, Jiali and Wu, Wei and Chaudhary, Suamn and Zhao, Juanjuan and Qiang, Yan", title="Detection of Mild Cognitive Impairment Through Hand Motor Function Under Digital Cognitive Test: Mixed Methods Study", journal="JMIR Mhealth Uhealth", year="2024", month="Jun", day="26", volume="12", pages="e48777", keywords="mild cognitive impairment", keywords="movement kinetics", keywords="digital cognitive test", keywords="dual task", keywords="mobile phone", abstract="Background: Early detection of cognitive impairment or dementia is essential to reduce the incidence of severe neurodegenerative diseases. However, currently available diagnostic tools for detecting mild cognitive impairment (MCI) or dementia are time-consuming, expensive, or not widely accessible. Hence, exploring more effective methods to assist clinicians in detecting MCI is necessary. Objective: In this study, we aimed to explore the feasibility and efficiency of assessing MCI through movement kinetics under tablet-based ``drawing and dragging'' tasks. Methods: We iteratively designed ``drawing and dragging'' tasks by conducting symposiums, programming, and interviews with stakeholders (neurologists, nurses, engineers, patients with MCI, healthy older adults, and caregivers). Subsequently, stroke patterns and movement kinetics were evaluated in healthy control and MCI groups by comparing 5 categories of features related to hand motor function (ie, time, stroke, frequency, score, and sequence). Finally, user experience with the overall cognitive screening system was investigated using structured questionnaires and unstructured interviews, and their suggestions were recorded. Results: The ``drawing and dragging'' tasks can detect MCI effectively, with an average accuracy of 85\% (SD 2\%). Using statistical comparison of movement kinetics, we discovered that the time- and score-based features are the most effective among all the features. Specifically, compared with the healthy control group, the MCI group showed a significant increase in the time they took for the hand to switch from one stroke to the next, with longer drawing times, slow dragging, and lower scores. In addition, patients with MCI had poorer decision-making strategies and visual perception of drawing sequence features, as evidenced by adding auxiliary information and losing more local details in the drawing. Feedback from user experience indicates that our system is user-friendly and facilitates screening for deficits in self-perception. Conclusions: The tablet-based MCI detection system quantitatively assesses hand motor function in older adults and further elucidates the cognitive and behavioral decline phenomenon in patients with MCI. This innovative approach serves to identify and measure digital biomarkers associated with MCI or Alzheimer dementia, enabling the monitoring of changes in patients' executive function and visual perceptual abilities as the disease advances. ", doi="10.2196/48777", url="https://mhealth.jmir.org/2024/1/e48777" } @Article{info:doi/10.2196/53162, author="Gonz{\'a}lez-Colom, Rub{\`e}n and Mitra, Kangkana and Vela, Emili and Gezsi, Andras and Paajanen, Teemu and G{\'a}l, Zs{\'o}fia and Hullam, Gabor and M{\"a}kinen, Hannu and Nagy, Tamas and Kuokkanen, Mikko and Piera-Jim{\'e}nez, Jordi and Roca, Josep and Antal, Peter and Juhasz, Gabriella and Cano, Isaac", title="Multicentric Assessment of a Multimorbidity-Adjusted Disability Score to Stratify Depression-Related Risks Using Temporal Disease Maps: Instrument Validation Study", journal="J Med Internet Res", year="2024", month="Jun", day="24", volume="26", pages="e53162", keywords="health risk assessment", keywords="multimorbidity", keywords="disease trajectories", keywords="major depressive disorder", abstract="Background: Comprehensive management of multimorbidity can significantly benefit from advanced health risk assessment tools that facilitate value-based interventions, allowing for the assessment and prediction of disease progression. Our study proposes a novel methodology, the Multimorbidity-Adjusted Disability Score (MADS), which integrates disease trajectory methodologies with advanced techniques for assessing interdependencies among concurrent diseases. This approach is designed to better assess the clinical burden of clusters of interrelated diseases and enhance our ability to anticipate disease progression, thereby potentially informing targeted preventive care interventions. Objective: This study aims to evaluate the effectiveness of the MADS in stratifying patients into clinically relevant risk groups based on their multimorbidity profiles, which accurately reflect their clinical complexity and the probabilities of developing new associated disease conditions. Methods: In a retrospective multicentric cohort study, we developed the MADS by analyzing disease trajectories and applying Bayesian statistics to determine disease-disease probabilities combined with well-established disability weights. We used major depressive disorder (MDD) as a primary case study for this evaluation. We stratified patients into different risk levels corresponding to?different?percentiles of MADS distribution. We statistically assessed the association of MADS risk strata with mortality, health care resource use, and disease progression across 1 million individuals from Spain, the United Kingdom, and Finland. Results: The results revealed significantly different distributions of the assessed outcomes across the MADS risk tiers, including mortality rates; primary care visits; specialized care outpatient consultations; visits in mental health specialized centers; emergency room visits; hospitalizations; pharmacological and nonpharmacological expenditures; and dispensation of antipsychotics, anxiolytics, sedatives, and antidepressants (P<.001 in all cases). Moreover, the results of the pairwise comparisons between adjacent risk tiers illustrate a substantial and gradual pattern of increased mortality rate, heightened health care use, increased health care expenditures, and a raised pharmacological burden as individuals progress from lower MADS risk tiers to higher-risk tiers. The analysis also revealed an augmented risk of multimorbidity progression within the high-risk groups, aligned with a higher incidence of new onsets of MDD-related diseases. Conclusions: The MADS seems to be a promising approach for predicting health risks associated with multimorbidity. It might complement current risk assessment state-of-the-art tools by providing valuable insights for tailored epidemiological impact analyses of clusters of interrelated diseases and by accurately assessing multimorbidity progression risks. This study paves the way for innovative digital developments to support advanced health risk assessment strategies.?Further validation is required to generalize its use beyond the initial case study of MDD. ", doi="10.2196/53162", url="https://www.jmir.org/2024/1/e53162", url="http://www.ncbi.nlm.nih.gov/pubmed/38913991" } @Article{info:doi/10.2196/58565, author="Denis, Fabrice and Le Goff, Florian and Desbois, Madhu and Gepner, Agnes and Feliciano, Guillaume and Silber, Denise and Zeitoun, Jean-David and Assuied, Peretz Guedalia", title="Early Detection of 5 Neurodevelopmental Disorders of Children and Prevention of Postnatal Depression With a Mobile Health App: Observational Cross-Sectional Study", journal="JMIR Public Health Surveill", year="2024", month="Jun", day="18", volume="10", pages="e58565", keywords="mobile phone", keywords="pediatric", keywords="infant", keywords="baby", keywords="neonate", keywords="newborn", keywords="toddler", keywords="child", keywords="early detection", keywords="app", keywords="application", keywords="screening", keywords="algorithm", keywords="NDD", keywords="neurodevelopmental disorder", keywords="autism", keywords="ASD", keywords="autism spectrum disorder", keywords="attention deficit/hyperactivity disorder", keywords="ADHD", keywords="attention deficit", keywords="PND", keywords="postnatal depression", keywords="mHealth", keywords="mobile health", keywords="real-world study", keywords="smartphone", keywords="dyspraxia", keywords="delayed", keywords="language", keywords="dyslexia", keywords="incidence", keywords="prevalence", abstract="Background: Delay in the diagnosis of neurodevelopmental disorders (NDDs) in toddlers and postnatal depression (PND) is a major public health issue. In both cases, early intervention is crucial but too rarely implemented in practice. Objective: Our goal was to determine if a dedicated mobile app can improve screening of 5 NDDs (autism spectrum disorder [ASD], language delay, dyspraxia, dyslexia, and attention-deficit/hyperactivity disorder [ADHD]) and reduce PND incidence. Methods: We performed an observational, cross-sectional, data-based study in a population of young parents in France with at least 1 child aged <10 years at the time of inclusion and regularly using Malo, an ``all-in-one'' multidomain digital health record electronic patient-reported outcome (PRO) app for smartphones. We included the first 50,000 users matching the criteria and agreeing to participate between May 1, 2022, and February 8, 2024. Parents received periodic questionnaires assessing skills in neurodevelopment domains via the app. Mothers accessed a support program to prevent PND and were requested to answer regular PND questionnaires. When any PROs matched predefined criteria, an in-app recommendation was sent to book an appointment with a family physician or pediatrician. The main outcomes were the median age of the infant at the time of notification for possible NDD and the incidence of PND detection after childbirth. One secondary outcome was the relevance of the NDD notification by consultation as assessed by health professionals. Results: Among 55,618 children median age 4 months (IQR 9), 439 (0.8\%) had at least 1 disorder for which consultation was critically necessary. The median ages of notification for probable ASD, language delay, dyspraxia, dyslexia, and ADHD were 32.5 (IQR 12.8), 16 (IQR 13), 36 (IQR 22.5), 80 (IQR 5), and 61 (IQR 15.5) months, respectively. The rate of probable ADHD, ASD, dyslexia, language delay, and dyspraxia in the population of children of the age included between the detection limits of each alert was 1.48\%, 0.21\%, 1.52\%, 0.91\%, and 0.37\%, respectively. Sensitivity of alert notifications for suspected NDDs as assessed by the physicians was 78.6\% and specificity was 98.2\%. Among 8243 mothers who completed a PND questionnaire, highly probable PND was detected in 938 (11.4\%), corresponding to a reduction of --31\% versus our previous study without a support program. Suspected PND was detected a median 96 days (IQR 86) after childbirth. Among 130 users who filled in the satisfaction survey, 99.2\% (129/130) found the app easy to use and 70\% (91/130) reported that the app improved follow-up of their child. The app was rated 4.8/5 on Apple's App Store. Conclusions: Algorithm-based early alerts suggesting NDDs were highly specific with good sensitivity as assessed by real-life practitioners. Early detection of 5 NDDs and PNDs was efficient and led to a possible 31\% reduction in PND incidence. Trial Registration: ClinicalTrials.gov NCT06301087; https://www.clinicaltrials.gov/study/NCT06301087 ", doi="10.2196/58565", url="https://publichealth.jmir.org/2024/1/e58565", url="http://www.ncbi.nlm.nih.gov/pubmed/38888952" } @Article{info:doi/10.2196/56668, author="Morita, Kentaro and Miura, Kenichiro and Toyomaki, Atsuhito and Makinodan, Manabu and Ohi, Kazutaka and Hashimoto, Naoki and Yasuda, Yuka and Mitsudo, Takako and Higuchi, Fumihiro and Numata, Shusuke and Yamada, Akiko and Aoki, Yohei and Honda, Hiromitsu and Mizui, Ryo and Honda, Masato and Fujikane, Daisuke and Matsumoto, Junya and Hasegawa, Naomi and Ito, Satsuki and Akiyama, Hisashi and Onitsuka, Toshiaki and Satomura, Yoshihiro and Kasai, Kiyoto and Hashimoto, Ryota", title="Tablet-Based Cognitive and Eye Movement Measures as Accessible Tools for Schizophrenia Assessment: Multisite Usability Study", journal="JMIR Ment Health", year="2024", month="May", day="30", volume="11", pages="e56668", keywords="schizophrenia", keywords="cognitive function", keywords="eye movement", keywords="diagnostic biomarkers", keywords="digital health tools", abstract="Background: Schizophrenia is a complex mental disorder characterized by significant cognitive and neurobiological alterations. Impairments in cognitive function and eye movement have been known to be promising biomarkers for schizophrenia. However, cognitive assessment methods require specialized expertise. To date, data on simplified measurement tools for assessing both cognitive function and eye movement in patients with schizophrenia are lacking. Objective: This study aims to assess the efficacy of a novel tablet-based platform combining cognitive and eye movement measures for classifying schizophrenia. Methods: Forty-four patients with schizophrenia, 67 healthy controls, and 41 patients with other psychiatric diagnoses participated in this study from 10 sites across Japan. A free-viewing eye movement task and 2 cognitive assessment tools (Codebreaker task from the THINC-integrated tool and the CognitiveFunctionTest app) were used for conducting assessments in a 12.9-inch iPad Pro. We performed comparative group and logistic regression analyses for evaluating the diagnostic efficacy of the 3 measures of interest. Results: Cognitive and eye movement measures differed significantly between patients with schizophrenia and healthy controls (all 3 measures; P<.001). The Codebreaker task showed the highest classification effectiveness in distinguishing schizophrenia with an area under the receiver operating characteristic curve of 0.90. Combining cognitive and eye movement measures further improved accuracy with a maximum area under the receiver operating characteristic curve of 0.94. Cognitive measures were more effective in differentiating patients with schizophrenia from healthy controls, whereas eye movement measures better differentiated schizophrenia from other psychiatric conditions. Conclusions: This multisite study demonstrates the feasibility and effectiveness of a tablet-based app for assessing cognitive functioning and eye movements in patients with schizophrenia. Our results suggest the potential of tablet-based assessments of cognitive function and eye movement as simple and accessible evaluation tools, which may be useful for future clinical implementation. ", doi="10.2196/56668", url="https://mental.jmir.org/2024/1/e56668", url="http://www.ncbi.nlm.nih.gov/pubmed/38815257" } @Article{info:doi/10.2196/53623, author="Attarha, Mouna and Mahncke, Henry and Merzenich, Michael", title="The Real-World Usability, Feasibility, and Performance Distributions of Deploying a Digital Toolbox of Computerized Assessments to Remotely Evaluate Brain Health: Development and Usability Study", journal="JMIR Form Res", year="2024", month="May", day="13", volume="8", pages="e53623", keywords="web-based cognitive assessment", keywords="remote data collection", keywords="neurocognition", keywords="cognitive profiles", keywords="normative assessment data", keywords="brain health", keywords="cognitive status", keywords="assessment accessibility", abstract="Background: An ongoing global challenge is managing brain health and understanding how performance changes across the lifespan. Objective: We developed and deployed a set of self-administrable, computerized assessments designed to measure key indexes of brain health across the visual and auditory sensory modalities. In this pilot study, we evaluated the usability, feasibility, and performance distributions of the assessments in a home-based, real-world setting without supervision. Methods: Potential participants were untrained users who self-registered on an existing brain training app called BrainHQ. Participants were contacted via a recruitment email and registered remotely to complete a demographics questionnaire and 29 unique assessments on their personal devices. We examined participant engagement, descriptive and psychometric properties of the assessments, associations between performance and self-reported demographic variables, cognitive profiles, and factor loadings. Results: Of the 365,782 potential participants contacted via a recruitment email, 414 (0.11\%) registered, of whom 367 (88.6\%) completed at least one assessment and 104 (25.1\%) completed all 29 assessments. Registered participants were, on average, aged 63.6 (SD 14.8; range 13-107) years, mostly female (265/414, 64\%), educated (329/414, 79.5\% with a degree), and White (349/414, 84.3\% White and 48/414, 11.6\% people of color). A total of 72\% (21/29) of the assessments showed no ceiling or floor effects or had easily modifiable score bounds to eliminate these effects. When correlating performance with self-reported demographic variables, 72\% (21/29) of the assessments were sensitive to age, 72\% (21/29) of the assessments were insensitive to gender, 93\% (27/29) of the assessments were insensitive to race and ethnicity, and 93\% (27/29) of the assessments were insensitive to education-based differences. Assessments were brief, with a mean duration of 3 (SD 1.0) minutes per task. The pattern of performance across the assessments revealed distinctive cognitive profiles and loaded onto 4 independent factors. Conclusions: The assessments were both usable and feasible and warrant a full normative study. A digital toolbox of scalable and self-administrable assessments that can evaluate brain health at a glance (and longitudinally) may lead to novel future applications across clinical trials, diagnostics, and performance optimization. ", doi="10.2196/53623", url="https://formative.jmir.org/2024/1/e53623", url="http://www.ncbi.nlm.nih.gov/pubmed/38739916" } @Article{info:doi/10.2196/52691, author="Mohebbi, Fahimeh and Forati, Masoud Amir and Torres, Lucas and deRoon-Cassini, A. Terri and Harris, Jennifer and Tomas, W. Carissa and Mantsch, R. John and Ghose, Rina", title="Exploring the Association Between Structural Racism and Mental Health: Geospatial and Machine Learning Analysis", journal="JMIR Public Health Surveill", year="2024", month="May", day="3", volume="10", pages="e52691", keywords="machine learning", keywords="geospatial", keywords="racial disparities", keywords="social determinant of health", keywords="structural racism", keywords="mental health", keywords="health disparities", keywords="deep learning", abstract="Background: Structural racism produces mental health disparities. While studies have examined the impact of individual factors such as poverty and education, the collective contribution of these elements, as manifestations of structural racism, has been less explored. Milwaukee County, Wisconsin, with its racial and socioeconomic diversity, provides a unique context for this multifactorial investigation. Objective: This research aimed to delineate the association between structural racism and mental health disparities in Milwaukee County, using a combination of geospatial and deep learning techniques. We used secondary data sets where all data were aggregated and anonymized before being released by federal agencies. Methods: We compiled 217 georeferenced explanatory variables across domains, initially deliberately excluding race-based factors to focus on nonracial determinants. This approach was designed to reveal the underlying patterns of risk factors contributing to poor mental health, subsequently reintegrating race to assess the effects of racism quantitatively. The variable selection combined tree-based methods (random forest) and conventional techniques, supported by variance inflation factor and Pearson correlation analysis for multicollinearity mitigation. The geographically weighted random forest model was used to investigate spatial heterogeneity and dependence. Self-organizing maps, combined with K-means clustering, were used to analyze data from Milwaukee communities, focusing on quantifying the impact of structural racism on the prevalence of poor mental health. Results: While 12 influential factors collectively accounted for 95.11\% of the variability in mental health across communities, the top 6 factors---smoking, poverty, insufficient sleep, lack of health insurance, employment, and age---were particularly impactful. Predominantly, African American neighborhoods were disproportionately affected, which is 2.23 times more likely to encounter high-risk clusters for poor mental health. Conclusions: The findings demonstrate that structural racism shapes mental health disparities, with Black community members disproportionately impacted. The multifaceted methodological approach underscores the value of integrating geospatial analysis and deep learning to understand complex social determinants of mental health. These insights highlight the need for targeted interventions, addressing both individual and systemic factors to mitigate mental health disparities rooted in structural racism. ", doi="10.2196/52691", url="https://publichealth.jmir.org/2024/1/e52691", url="http://www.ncbi.nlm.nih.gov/pubmed/38701436" } @Article{info:doi/10.2196/54622, author="Hurwitz, Eric and Butzin-Dozier, Zachary and Master, Hiral and O'Neil, T. Shawn and Walden, Anita and Holko, Michelle and Patel, C. Rena and Haendel, A. Melissa", title="Harnessing Consumer Wearable Digital Biomarkers for Individualized Recognition of Postpartum Depression Using the All of Us Research Program Data Set: Cross-Sectional Study", journal="JMIR Mhealth Uhealth", year="2024", month="May", day="2", volume="12", pages="e54622", keywords="wearable device", keywords="All of Us", keywords="postpartum depression", keywords="machine learning", keywords="Fitbit", keywords="mobile phone", abstract="Background: Postpartum depression (PPD) poses a significant maternal health challenge. The current approach to detecting PPD relies on in-person postpartum visits, which contributes to underdiagnosis. Furthermore, recognizing PPD symptoms can be challenging. Therefore, we explored the potential of using digital biomarkers from consumer wearables for PPD recognition. Objective: The main goal of this study was to showcase the viability of using machine learning (ML) and digital biomarkers related to heart rate, physical activity, and energy expenditure derived from consumer-grade wearables for the recognition of PPD. Methods: Using the All of Us Research Program Registered Tier v6 data set, we performed computational phenotyping of women with and without PPD following childbirth. Intraindividual ML models were developed using digital biomarkers from Fitbit to discern between prepregnancy, pregnancy, postpartum without depression, and postpartum with depression (ie, PPD diagnosis) periods. Models were built using generalized linear models, random forest, support vector machine, and k-nearest neighbor algorithms and evaluated using the $\kappa$ statistic and multiclass area under the receiver operating characteristic curve (mAUC) to determine the algorithm with the best performance. The specificity of our individualized ML approach was confirmed in a cohort of women who gave birth and did not experience PPD. Moreover, we assessed the impact of a previous history of depression on model performance. We determined the variable importance for predicting the PPD period using Shapley additive explanations and confirmed the results using a permutation approach. Finally, we compared our individualized ML methodology against a traditional cohort-based ML model for PPD recognition and compared model performance using sensitivity, specificity, precision, recall, and F1-score. Results: Patient cohorts of women with valid Fitbit data who gave birth included <20 with PPD and 39 without PPD. Our results demonstrated that intraindividual models using digital biomarkers discerned among prepregnancy, pregnancy, postpartum without depression, and postpartum with depression (ie, PPD diagnosis) periods, with random forest (mAUC=0.85; $\kappa$=0.80) models outperforming generalized linear models (mAUC=0.82; $\kappa$=0.74), support vector machine (mAUC=0.75; $\kappa$=0.72), and k-nearest neighbor (mAUC=0.74; $\kappa$=0.62). Model performance decreased in women without PPD, illustrating the method's specificity. Previous depression history did not impact the efficacy of the model for PPD recognition. Moreover, we found that the most predictive biomarker of PPD was calories burned during the basal metabolic rate. Finally, individualized models surpassed the performance of a conventional cohort-based model for PPD detection. Conclusions: This research establishes consumer wearables as a promising tool for PPD identification and highlights personalized ML approaches, which could transform early disease detection strategies. ", doi="10.2196/54622", url="https://mhealth.jmir.org/2024/1/e54622", url="http://www.ncbi.nlm.nih.gov/pubmed/38696234" } @Article{info:doi/10.2196/50259, author="Cho, Kwangsu and Kim, Minah and Cho, Youngeun and Hur, Ji-Won and Kim, Hyung Do and Park, Seonghyeon and Park, Sunghyun and Jang, Moonyoung and Lee, Chang-Gun and Kwon, Soo Jun", title="Digital Phenotypes for Early Detection of Internet Gaming Disorder in Adolescent Students: Explorative Data-Driven Study", journal="JMIR Ment Health", year="2024", month="Apr", day="29", volume="11", pages="e50259", keywords="adolescents", keywords="digital biomarkers", keywords="digital phenotyping", keywords="digital psychiatry", keywords="early detection", keywords="IGD", keywords="internet gaming disorder", keywords="pediatric psychiatry", keywords="proactive medicine", keywords="secondary school", keywords="universal screening", abstract="Background: Limited awareness, social stigma, and access to mental health professionals hinder early detection and intervention of internet gaming disorder (IGD), which has emerged as a significant concern among young individuals. Prevalence estimates vary between 0.7\% and 15.6\%, and its recognition in the International Classification of Diseases, 11th Revision and Diagnostic and Statistical Manual of Mental Disorders, 5th Edition underscores its impact on academic functioning, social isolation, and mental health challenges. Objective: This study aimed to uncover digital phenotypes for the early detection of IGD among adolescents in learning settings. By leveraging sensor data collected from student tablets, the overarching objective is to incorporate these digital indicators into daily school activities to establish these markers as a mental health screening tool, facilitating the early identification and intervention for IGD cases. Methods: A total of 168 voluntary participants were engaged, consisting of 85 students with IGD and 83 students without IGD. There were 53\% (89/168) female and 47\% (79/168) male individuals, all within the age range of 13-14 years. The individual students learned their Korean literature and mathematics lessons on their personal tablets, with sensor data being automatically collected. Multiple regression with bootstrapping and multivariate ANOVA were used, prioritizing interpretability over predictability, for cross-validation purposes. Results: A negative correlation between IGD Scale (IGDS) scores and learning outcomes emerged (r166=--0.15; P=.047), suggesting that higher IGDS scores were associated with lower learning outcomes. Multiple regression identified 5 key indicators linked to IGD, explaining 23\% of the IGDS score variance: stroke acceleration ($\beta$=.33; P<.001), time interval between keys ($\beta$=--0.26; P=.01), word spacing ($\beta$=--0.25; P<.001), deletion ($\beta$=--0.24; P<.001), and horizontal length of strokes ($\beta$=0.21; P=.02). Multivariate ANOVA cross-validated these findings, revealing significant differences in digital phenotypes between potential IGD and non-IGD groups. The average effect size, measured by Cohen d, across the indicators was 0.40, indicating a moderate effect. Notable distinctions included faster stroke acceleration (Cohen d=0.68; P=<.001), reduced word spacing (Cohen d=.57; P=<.001), decreased deletion behavior (Cohen d=0.33; P=.04), and longer horizontal strokes (Cohen d=0.34; P=.03) in students with potential IGD compared to their counterparts without IGD. Conclusions: The aggregated findings show a negative correlation between IGD and learning performance, highlighting the effectiveness of digital markers in detecting IGD. This underscores the importance of digital phenotyping in advancing mental health care within educational settings. As schools adopt a 1-device-per-student framework, digital phenotyping emerges as a promising early detection method for IGD. This shift could transform clinical approaches from reactive to proactive measures. ", doi="10.2196/50259", url="https://mental.jmir.org/2024/1/e50259", url="http://www.ncbi.nlm.nih.gov/pubmed/38683658" } @Article{info:doi/10.2196/51540, author="Ahmed, Sabbir Md and Hasan, Tanvir and Islam, Salekul and Ahmed, Nova", title="Investigating Rhythmicity in App Usage to Predict Depressive Symptoms: Protocol for Personalized Framework Development and Validation Through a Countrywide Study", journal="JMIR Res Protoc", year="2024", month="Apr", day="24", volume="13", pages="e51540", keywords="depressive symptoms", keywords="app usage rhythm", keywords="behavioral markers", keywords="personalization", keywords="multitask learning framework", abstract="Background: Understanding a student's depressive symptoms could facilitate significantly more precise diagnosis and treatment. However, few studies have focused on depressive symptom prediction through unobtrusive systems, and these studies are limited by small sample sizes, low performance, and the requirement for higher resources. In addition, research has not explored whether statistically significant rhythms based on different app usage behavioral markers (eg, app usage sessions) exist that could be useful in finding subtle differences to predict with higher accuracy like the models based on rhythms of physiological data. Objective: The main objective of this study is to explore whether there exist statistically significant rhythms in resource-insensitive app usage behavioral markers and predict depressive symptoms through these marker-based rhythmic features. Another objective of this study is to understand whether there is a potential link between rhythmic features and depressive symptoms. Methods: Through a countrywide study, we collected 2952 students' raw app usage behavioral data and responses to the 9 depressive symptoms in the 9-item Patient Health Questionnaire (PHQ-9). The behavioral data were retrieved through our developed app, which was previously used in our pilot studies in Bangladesh on different research problems. To explore whether there is a rhythm based on app usage data, we will conduct a zero-amplitude test. In addition, we will develop a cosinor model for each participant to extract rhythmic parameters (eg, acrophase). In addition, to obtain a comprehensive picture of the rhythms, we will explore nonparametric rhythmic features (eg, interdaily stability). Furthermore, we will conduct regression analysis to understand the association of rhythmic features with depressive symptoms. Finally, we will develop a personalized multitask learning (MTL) framework to predict symptoms through rhythmic features. Results: After applying inclusion criteria (eg, having app usage data of at least 2 days to explore rhythmicity), we kept the data of 2902 (98.31\%) students for analysis, with 24.48 million app usage events, and 7 days' app usage of 2849 (98.17\%) students. The students are from all 8 divisions of Bangladesh, both public and private universities (19 different universities and 52 different departments). We are analyzing the data and will publish the findings in a peer-reviewed publication. Conclusions: Having an in-depth understanding of app usage rhythms and their connection with depressive symptoms through a countrywide study can significantly help health care professionals and researchers better understand depressed students and may create possibilities for using app usage--based rhythms for intervention. In addition, the MTL framework based on app usage rhythmic features may more accurately predict depressive symptoms due to the rhythms' capability to find subtle differences. International Registered Report Identifier (IRRID): DERR1-10.2196/51540 ", doi="10.2196/51540", url="https://www.researchprotocols.org/2024/1/e51540", url="http://www.ncbi.nlm.nih.gov/pubmed/38657238" } @Article{info:doi/10.2196/47428, author="Zhu, Yue and Zhang, Ran and Yin, Shuluo and Sun, Yihui and Womer, Fay and Liu, Rongxun and Zeng, Sheng and Zhang, Xizhe and Wang, Fei", title="Digital Dietary Behaviors in Individuals With Depression: Real-World Behavioral Observation", journal="JMIR Public Health Surveill", year="2024", month="Apr", day="22", volume="10", pages="e47428", keywords="dietary behaviors", keywords="digital marker", keywords="depression", keywords="mental health", keywords="appetite disturbance", keywords="behavioral monitoring", keywords="eating pattern", keywords="electronic record", keywords="digital health", keywords="behavioral", keywords="surveillance", abstract="Background: Depression is often accompanied by changes in behavior, including dietary behaviors. The relationship between dietary behaviors and depression has been widely studied, yet previous research has relied on self-reported data which is subject to recall bias. Electronic device--based behavioral monitoring offers the potential for objective, real-time data collection of a large amount of continuous, long-term behavior data in naturalistic settings. Objective: The study aims to characterize digital dietary behaviors in depression, and to determine whether these behaviors could be used to detect depression. Methods: A total of 3310 students (2222 healthy controls [HCs], 916 with mild depression, and 172 with moderate-severe depression) were recruited for the study of their dietary behaviors via electronic records over a 1-month period, and depression severity was assessed in the middle of the month. The differences in dietary behaviors across the HCs, mild depression, and moderate-severe depression were determined by ANCOVA (analyses of covariance) with age, gender, BMI, and educational level as covariates. Multivariate logistic regression analyses were used to examine the association between dietary behaviors and depression severity. Support vector machine analysis was used to determine whether changes in dietary behaviors could detect mild and moderate-severe depression. Results: The study found that individuals with moderate-severe depression had more irregular eating patterns, more fluctuated feeding times, spent more money on dinner, less diverse food choices, as well as eating breakfast less frequently, and preferred to eat only lunch and dinner, compared with HCs. Moderate-severe depression was found to be negatively associated with the daily 3 regular meals pattern (breakfast-lunch-dinner pattern; OR 0.467, 95\% CI 0.239-0.912), and mild depression was positively associated with daily lunch and dinner pattern (OR 1.460, 95\% CI 1.016-2.100). These changes in digital dietary behaviors were able to detect mild and moderate-severe depression (accuracy=0.53, precision=0.60), with better accuracy for detecting moderate-severe depression (accuracy=0.67, precision=0.64). Conclusions: This is the first study to develop a profile of changes in digital dietary behaviors in individuals with depression using real-world behavioral monitoring. The results suggest that digital markers may be a promising approach for detecting depression. ", doi="10.2196/47428", url="https://publichealth.jmir.org/2024/1/e47428", url="http://www.ncbi.nlm.nih.gov/pubmed/38648087" } @Article{info:doi/10.2196/56883, author="McMurray, Josephine and Levy, AnneMarie and Pang, Wei and Holyoke, Paul", title="Psychometric Evaluation of a Tablet-Based Tool to Detect Mild Cognitive Impairment in Older Adults: Mixed Methods Study", journal="J Med Internet Res", year="2024", month="Apr", day="19", volume="26", pages="e56883", keywords="cognitive dysfunction", keywords="dementia neuropsychological tests", keywords="evaluation study", keywords="technology", keywords="aged", keywords="mobile phone", abstract="Background: With the rapid aging of the global population, the prevalence of mild cognitive impairment (MCI) and dementia is anticipated to surge worldwide. MCI serves as an intermediary stage between normal aging and dementia, necessitating more sensitive and effective screening tools for early identification and intervention. The BrainFx SCREEN is a novel digital tool designed to assess cognitive impairment. This study evaluated its efficacy as a screening tool for MCI in primary care settings, particularly in the context of an aging population and the growing integration of digital health solutions. Objective: The primary objective was to assess the validity, reliability, and applicability of the BrainFx SCREEN (hereafter, the SCREEN) for MCI screening in a primary care context. We conducted an exploratory study comparing the SCREEN with an established screening tool, the Quick Mild Cognitive Impairment (Qmci) screen. Methods: A concurrent mixed methods, prospective study using a quasi-experimental design was conducted with 147 participants from 5 primary care Family Health Teams (FHTs; characterized by multidisciplinary practice and capitated funding) across southwestern Ontario, Canada. Participants included health care practitioners, patients, and FHT administrative executives. Individuals aged ?55 years with no history of MCI or diagnosis of dementia rostered in a participating FHT were eligible to participate. Participants were screened using both the SCREEN and Qmci. The study also incorporated the Geriatric Anxiety Scale--10 to assess general anxiety levels at each cognitive screening. The SCREEN's scoring was compared against that of the Qmci and the clinical judgment of health care professionals. Statistical analyses included sensitivity, specificity, internal consistency, and test-retest reliability assessments. Results: The study found that the SCREEN's longer administration time and complex scoring algorithm, which is proprietary and unavailable for independent analysis, presented challenges. Its internal consistency, indicated by a Cronbach $\alpha$ of 0.63, was below the acceptable threshold. The test-retest reliability also showed limitations, with moderate intraclass correlation coefficient (0.54) and inadequate $\kappa$ (0.15) values. Sensitivity and specificity were consistent (63.25\% and 74.07\%, respectively) between cross-tabulation and discrepant analysis. In addition, the study faced limitations due to its demographic skew (96/147, 65.3\% female, well-educated participants), the absence of a comprehensive gold standard for MCI diagnosis, and financial constraints limiting the inclusion of confirmatory neuropsychological testing. Conclusions: The SCREEN, in its current form, does not meet the necessary criteria for an optimal MCI screening tool in primary care settings, primarily due to its longer administration time and lower reliability. As the number of digital health technologies increases and evolves, further testing and refinement of tools such as the SCREEN are essential to ensure their efficacy and reliability in real-world clinical settings. This study advocates for continued research in this rapidly advancing field to better serve the aging population. International Registered Report Identifier (IRRID): RR2-10.2196/25520 ", doi="10.2196/56883", url="https://www.jmir.org/2024/1/e56883", url="http://www.ncbi.nlm.nih.gov/pubmed/38640480" } @Article{info:doi/10.2196/50907, author="Bilder, A. Deborah and Mthembu, Mariah and Worsham, Whitney and Aguayo, Patricia and Knight, R. Jacob and Deng, W. Steven and Singh, P. Tejinder and Davis, John", title="Developing and Implementing a Web-Based Branching Logic Survey to Support Psychiatric Crisis Evaluations of Individuals With Developmental Disabilities: Qualitative Study and Evaluation of Validity", journal="JMIR Ment Health", year="2024", month="Mar", day="29", volume="11", pages="e50907", keywords="developmental disabilities", keywords="disruptive behavior", keywords="psychiatric comorbidity", keywords="web-based", keywords="psychiatric crisis", keywords="disability", keywords="mental health", keywords="behavioral crises", keywords="intervention", keywords="general population", keywords="screening", keywords="accuracy", keywords="mood disorder", keywords="sources of distress", keywords="autism", keywords="intellectual disability", abstract="Background: Individuals with developmental disabilities (DD) experience increased rates of emotional and behavioral crises that necessitate assessment and intervention. Psychiatric disorders can contribute to crises; however, screening measures developed for the general population are inadequate for those with DD. Medical conditions can exacerbate crises and merit evaluation. Screening tools using checklist formats, even when designed for DD, are too limited in depth and scope for crisis assessments. The Sources of Distress survey implements a web-based branching logic format to screen for common psychiatric and medical conditions experienced by individuals with DD by querying caregiver knowledge and observations. Objective: This paper aims to (1) describe the initial survey development, (2) report on focus group and expert review processes and findings, and (3) present results from the survey's clinical implementation and evaluation of validity. Methods: Sources of Distress was reviewed by focus groups and clinical experts; this feedback informed survey revisions. The survey was subsequently implemented in clinical settings to augment providers' psychiatric and medical history taking. Informal and formal consults followed the completion of Sources of Distress for a subset of individuals. A records review was performed to identify working diagnoses established during these consults. Results: Focus group members (n=17) expressed positive feedback overall about the survey's content and provided specific recommendations to add categories and items. The survey was completed for 231 individuals with DD in the clinical setting (n=161, 69.7\% men and boys; mean age 17.7, SD 10.3; range 2-65 years). Consults were performed for 149 individuals (n=102, 68.5\% men and boys; mean age 18.9, SD 10.9 years), generating working diagnoses to compare survey screening results. Sources of Distress accuracy rates were 91\% (95\% CI 85\%-95\%) for posttraumatic stress disorder, 87\% (95\% CI 81\%-92\%) for anxiety, 87\% (95\% CI 81\%-92\%) for episodic expansive mood and bipolar disorder, 82\% (95\% CI 75\%-87\%) for psychotic disorder, 79\% (95\% CI 71\%-85\%) for unipolar depression, and 76\% (95\% CI 69\%-82\%) for attention-deficit/hyperactivity disorder. While no specific survey items or screening algorithm existed for unspecified mood disorder and disruptive mood dysregulation disorder, these conditions were caregiver-reported and working diagnoses for 11.7\% (27/231) and 16.8\% (25/149) of individuals, respectively. Conclusions: Caregivers described Sources of Distress as an acceptable tool for sharing their knowledge and insights about individuals with DD who present in crisis. As a screening tool, this survey demonstrates good accuracy. However, better differentiation among mood disorders is needed, including the addition of items and screening algorithm for unspecified mood disorder and disruptive mood dysregulation disorder. Additional validation efforts are necessary to include a more geographically diverse population and reevaluate mood disorder differentiation. Future study is merited to investigate the survey's impact on the psychiatric and medical management of distress in individuals with DD. ", doi="10.2196/50907", url="https://mental.jmir.org/2024/1/e50907", url="http://www.ncbi.nlm.nih.gov/pubmed/38551644" } @Article{info:doi/10.2196/48894, author="Worthington, A. Michelle and Christie, H. Richard and Masino, J. Aaron and Kark, M. Sarah", title="Identifying Unmet Needs in Major Depressive Disorder Using a Computer-Assisted Alternative to Conventional Thematic Analysis: Qualitative Interview Study With Psychiatrists", journal="JMIR Form Res", year="2024", month="Mar", day="1", volume="8", pages="e48894", keywords="consumer health informatics", keywords="interview", keywords="major depressive disorder", keywords="medical informatics applications", keywords="needs assessment", keywords="psychiatry and psychology", abstract="Background: The development of digital health tools that are clinically relevant requires a deep understanding of the unmet needs of stakeholders, such as clinicians and patients. One way to reveal unforeseen stakeholder needs is through qualitative research, including stakeholder interviews. However, conventional qualitative data analytical approaches are time-consuming and resource-intensive, rendering them untenable in many industry settings where digital tools are conceived of and developed. Thus, a more time-efficient process for identifying clinically relevant target needs for digital tool development is needed. Objective: The objective of this study was to address the need for an accessible, simple, and time-efficient alternative to conventional thematic analysis of qualitative research data through text analysis of semistructured interview transcripts. In addition, we sought to identify important themes across expert psychiatrist advisor interview transcripts to efficiently reveal areas for the development of digital tools that target unmet clinical needs. Methods: We conducted 10 (1-hour-long) semistructured interviews with US-based psychiatrists treating major depressive disorder. The interviews were conducted using an interview guide that comprised open-ended questions predesigned to (1) understand the clinicians' experience of the care management process and (2) understand the clinicians' perceptions of the patients' experience of the care management process. We then implemented a hybrid analytical approach that combines computer-assisted text analyses with deductive analyses as an alternative to conventional qualitative thematic analysis to identify word combination frequencies, content categories, and broad themes characterizing unmet needs in the care management process. Results: Using this hybrid computer-assisted analytical approach, we were able to identify several key areas that are of interest to clinicians in the context of major depressive disorder and would be appropriate targets for digital tool development. Conclusions: A hybrid approach to qualitative research combining computer-assisted techniques with deductive techniques provides a time-efficient approach to identifying unmet needs, targets, and relevant themes to inform digital tool development. This can increase the likelihood that useful and practical tools are built and implemented to ultimately improve health outcomes for patients. ", doi="10.2196/48894", url="https://formative.jmir.org/2024/1/e48894", url="http://www.ncbi.nlm.nih.gov/pubmed/38427407" } @Article{info:doi/10.2196/51749, author="Kurokawa, Shunya and Nomura, Kensuke and Hosogane, Nana and Nagasawa, Takashi and Kawade, Yuko and Matsumoto, Yu and Morinaga, Shuichi and Kaise, Yuriko and Higuchi, Ayana and Goto, Akiko and Inada, Naoko and Kodaira, Masaki and Kishimoto, Taishiro", title="Reliability of Telepsychiatry Assessments Using the Attention-Deficit/Hyperactivity Disorder Rating Scale-IV for Children With Neurodevelopmental Disorders and Their Caregivers: Randomized Feasibility Study", journal="J Med Internet Res", year="2024", month="Feb", day="19", volume="26", pages="e51749", keywords="acceptability", keywords="ADHD", keywords="application", keywords="attention-deficit/hyperactivity disorder", keywords="autism spectrum disorders", keywords="autism", keywords="child", keywords="children", keywords="diagnosis", keywords="management", keywords="neurodevelopmental disorder", keywords="neurodevelopmental", keywords="psychiatrists", keywords="reliability", keywords="telepsychitatry", abstract="Background: Given the global shortage of child psychiatrists and barriers to specialized care, remote assessment is a promising alternative for diagnosing and managing attention-deficit/hyperactivity disorder (ADHD). However, only a few studies have validated the accuracy and acceptability of these remote methods. Objective: This study aimed to test the agreement between remote and face-to-face assessments. Methods: Patients aged between 6 and 17 years with confirmed Diagnostic and Statistical Manual of Mental Disorders, Fifth Edition diagnoses of ADHD or autism spectrum disorder (ASD) were recruited from multiple institutions. In a randomized order, participants underwent 2 evaluations, face-to-face and remotely, with distinct evaluators administering the ADHD Rating Scale-IV (ADHD-RS-IV). Intraclass correlation coefficient (ICC) was used to assess the reliability of face-to-face and remote assessments. Results: The participants included 74 Japanese children aged between 6 and 16 years who were primarily diagnosed with ADHD (43/74, 58\%) or ASD (31/74, 42\%). A total of 22 (30\%) children were diagnosed with both conditions. The ADHD-RS-IV ICCs between face-to-face and remote assessments showed ``substantial'' agreement in the total ADHD-RS-IV score (ICC=0.769, 95\% CI 0.654-0.849; P<.001) according to the Landis and Koch criteria. The ICC in patients with ADHD showed ``almost perfect'' agreement (ICC=0.816, 95\% CI 0.683-0.897; P<.001), whereas in patients with ASD, it showed ``substantial'' agreement (ICC=0.674, 95\% CI 0.420-0.831; P<.001), indicating the high reliability of both methods across both conditions. Conclusions: Our study validated the feasibility and reliability of remote ADHD testing, which has potential benefits such as reduced hospital visits and time-saving effects. Our results highlight the potential of telemedicine in resource-limited areas, clinical trials, and treatment evaluations, necessitating further studies to explore its broader application. Trial Registration: UMIN Clinical Trials Registry UMIN000039860; http://tinyurl.com/yp34x6kh ", doi="10.2196/51749", url="https://www.jmir.org/2024/1/e51749", url="http://www.ncbi.nlm.nih.gov/pubmed/38373022" } @Article{info:doi/10.2196/52660, author="Jaiswal, Aditi and Washington, Peter", title="Using \#ActuallyAutistic on Twitter for Precision Diagnosis of Autism Spectrum Disorder: Machine Learning Study", journal="JMIR Form Res", year="2024", month="Feb", day="14", volume="8", pages="e52660", keywords="autism", keywords="autism spectrum disorder", keywords="machine learning", keywords="natural language processing", keywords="public health", keywords="sentiment analysis", keywords="social media analysis", keywords="Twitter", abstract="Background: The increasing use of social media platforms has given rise to an unprecedented surge in user-generated content, with millions of individuals publicly sharing their thoughts, experiences, and health-related information. Social media can serve as a useful means to study and understand public health. Twitter (subsequently rebranded as ``X'') is one such social media platform that has proven to be a valuable source of rich information for both the general public and health officials. We conducted the first study applying Twitter data mining to autism screening. Objective: We aimed to study the feasibility of autism screening from Twitter data and discuss the ethical implications of such models. Methods: We developed a machine learning model to attempt to distinguish individuals with autism from their neurotypical peers based on the textual patterns from their public communications on Twitter. We collected 6,515,470 tweets from users' self-identification with autism using ``\#ActuallyAutistic'' and a separate control group. To construct the data set, we targeted English-language tweets using the search query ``\#ActuallyAutistic'' posted from January 1, 2014 to December 31, 2022. We encrypted all user IDs and stripped the tweets of identifiable information such as the associated email address prior to analysis. From these tweets, we identified unique users who used keywords such as ``autism'' OR ``autistic'' OR ``neurodiverse'' in their profile description and collected all the tweets from their timelines. To build the control group data set, we formulated a search query excluding the hashtag ``\#ActuallyAutistic'' and collected 1000 tweets per day during the same time period. We trained a word2vec model and an attention-based, bidirectional long short-term memory model to validate the performance of per-tweet and per-profile classification models. We deleted the data set and the models after our analysis. Results: Our tweet classifier reached a 73\% accuracy, a 0.728 area under the receiver operating characteristic curve score, and an 0.71 F1-score using word2vec representations fed into a logistic regression model, while the user profile classifier achieved an 0.78 area under the receiver operating characteristic curve score and an F1-score of 0.805 using an attention-based, bidirectional long short-term memory model. Conclusions: We have shown that it is feasible to train machine learning models using social media data to predict use of the \#ActuallyAutistic hashtag, an imperfect proxy for self-reported autism. While analyzing textual differences in naturalistic text has the potential to help clinicians screen for autism, there remain ethical questions that must be addressed for such research to move forward and to translate into the real world. While machine learning has the potential to improve behavioral research, there are still a plethora of ethical issues in digital phenotyping studies using social media with respect to user consent of marginalized populations. Achieving this requires a more inclusive approach during the model development process that involves the autistic community directly in the ideation and consent processes. ", doi="10.2196/52660", url="https://formative.jmir.org/2024/1/e52660", url="http://www.ncbi.nlm.nih.gov/pubmed/38354045" } @Article{info:doi/10.2196/52205, author="Jaiswal, Aditi and Kruiper, Ruben and Rasool, Abdur and Nandkeolyar, Aayush and Wall, P. Dennis and Washington, Peter", title="Digitally Diagnosing Multiple Developmental Delays Using Crowdsourcing Fused With Machine Learning: Protocol for a Human-in-the-Loop Machine Learning Study", journal="JMIR Res Protoc", year="2024", month="Feb", day="8", volume="13", pages="e52205", keywords="machine learning", keywords="crowdsourcing", keywords="autism spectrum disorder", keywords="ASD", keywords="attention-deficit/hyperactivity disorder", keywords="ADHD", keywords="precision health", abstract="Background: A considerable number of minors in the United States are diagnosed with developmental or psychiatric conditions, potentially influenced by underdiagnosis factors such as cost, distance, and clinician availability. Despite the potential of digital phenotyping tools with machine learning (ML) approaches to expedite diagnoses and enhance diagnostic services for pediatric psychiatric conditions, existing methods face limitations because they use a limited set of social features for prediction tasks and focus on a single binary prediction, resulting in uncertain accuracies. Objective: This study aims to propose the development of a gamified web system for data collection, followed by a fusion of novel crowdsourcing algorithms with ML behavioral feature extraction approaches to simultaneously predict diagnoses of autism spectrum disorder and attention-deficit/hyperactivity disorder in a precise and specific manner. Methods: The proposed pipeline will consist of (1) gamified web applications to curate videos of social interactions adaptively based on the needs of the diagnostic system, (2) behavioral feature extraction techniques consisting of automated ML methods and novel crowdsourcing algorithms, and (3) the development of ML models that classify several conditions simultaneously and that adaptively request additional information based on uncertainties about the data. Results: A preliminary version of the web interface has been implemented, and a prior feature selection method has highlighted a core set of behavioral features that can be targeted through the proposed gamified approach. Conclusions: The prospect for high reward stems from the possibility of creating the first artificial intelligence--powered tool that can identify complex social behaviors well enough to distinguish conditions with nuanced differentiators such as autism spectrum disorder and attention-deficit/hyperactivity disorder. International Registered Report Identifier (IRRID): PRR1-10.2196/52205 ", doi="10.2196/52205", url="https://www.researchprotocols.org/2024/1/e52205", url="http://www.ncbi.nlm.nih.gov/pubmed/38329783" } @Article{info:doi/10.2196/52096, author="Dryden, M. Eileen and Anwar, Chitra and Conti, Jennifer and Boudreau, H. Jacqueline and Kennedy, A. Meaghan and Hung, W. William and Nearing, A. Kathryn and Pimentel, B. Camilla and Moo, Lauren", title="The Development and Use of a New Visual Tool (REVISIT) to Support Participant Recall: Web-Based Interview Study Among Older Adults", journal="JMIR Form Res", year="2024", month="Feb", day="1", volume="8", pages="e52096", keywords="qualitative interviews", keywords="visual recall aid", keywords="older adults", keywords="health services research", keywords="web-based methods", keywords="visual tool", keywords="recall", keywords="qualitative interview", keywords="experience", keywords="perspective", keywords="motivation", keywords="patient", keywords="recall capacity", keywords="medical information", keywords="visual appointment", keywords="geriatric", keywords="older people", keywords="telemedicine", keywords="videoconference", keywords="e-consultation", keywords="e-medicine", keywords="internet medicine", keywords="REVISIT", keywords="Remembering Healthcare Encounters Visually and Interactively", keywords="mobile phone", abstract="Background: Qualitative health services research often relies on semistructured or in-depth interviews to develop a deeper understanding of patient experiences, motivations, and perspectives. The quality of data gathered is contingent upon a patient's recall capacity; yet, studies have shown that recall of medical information is low. Threats to generating rich and detailed interview data may be more prevalent when interviewing older adults. Objective: We developed and studied the feasibility of using a tool, Remembering Healthcare Encounters Visually and Interactively (REVISIT), which has been created to aid the recall of a specific telemedicine encounter to provide health services research teams with a visual tool, to improve qualitative interviews with older adults. Methods: The REVISIT visual appointment summary was developed to facilitate web-based interviews with our participants as part of an evaluation of a geriatric telemedicine program. Our primary aims were to aid participant recall, maintain focus on the index visit, and establish a shared understanding of the visit between participants and interviewers. The authors' experiences and observations developing REVISIT and using it during videoconference interviews (N=16) were systematically documented and synthesized. We discuss these experiences with REVISIT and suggest considerations for broader implementation and future research to expand upon this preliminary work. Results: REVISIT enhanced the interview process by providing a focus and catalyst for discussion and supporting rapport-building with participants.?REVISIT appeared to support older patients' and caregivers' recollection of a clinical visit, helping them to share additional details about their experience. REVISIT was difficult to read for some participants, however, and could not be used for phone interviews. Conclusions: REVISIT is a promising tool to enhance the quality of data collected during interviews with older, rural adults and caregivers about a health care encounter. This novel tool may aid recall of health care experiences for those groups for whom it may be more challenging to collect accurate, rich qualitative data (eg, those with cognitive impairment or complex medical care), allowing health services research to include more diverse patient experiences. ", doi="10.2196/52096", url="https://formative.jmir.org/2024/1/e52096", url="http://www.ncbi.nlm.nih.gov/pubmed/38300691" } @Article{info:doi/10.2196/50738, author="Benacek, Jiri and Lawal, Nimotalai and Ong, Tommy and Tomasik, Jakub and Martin-Key, A. Nayra and Funnell, L. Erin and Barton-Owen, Giles and Olmert, Tony and Cowell, Dan and Bahn, Sabine", title="Identification of Predictors of Mood Disorder Misdiagnosis and Subsequent Help-Seeking Behavior in Individuals With Depressive Symptoms: Gradient-Boosted Tree Machine Learning Approach", journal="JMIR Ment Health", year="2024", month="Jan", day="11", volume="11", pages="e50738", keywords="misdiagnosis", keywords="help-seeking", keywords="gradient-boosted trees", keywords="machine learning", keywords="depression", keywords="bipolar disorder", keywords="diagnose", keywords="diagnosis", keywords="mood", keywords="mental health", keywords="mental disorder", keywords="mental disorders", keywords="depressive", keywords="predict", keywords="predictive", keywords="prediction", keywords="depressed", keywords="algorithm", keywords="algorithms", abstract="Background: Misdiagnosis and delayed help-seeking cause significant burden for individuals with mood disorders such as major depressive disorder and bipolar disorder. Misdiagnosis can lead to inappropriate treatment, while delayed help-seeking can result in more severe symptoms, functional impairment, and poor treatment response. Such challenges are common in individuals with major depressive disorder and bipolar disorder due to the overlap of symptoms with other mental and physical health conditions, as well as, stigma and insufficient understanding of these disorders. Objective: In this study, we aimed to identify factors that may contribute to mood disorder misdiagnosis and delayed help-seeking. Methods: Participants with current depressive symptoms were recruited online and data were collected using an extensive digital mental health questionnaire, with the World Health Organization World Mental Health Composite International Diagnostic Interview delivered via telephone. A series of predictive gradient-boosted tree algorithms were trained and validated to identify the most important predictors of misdiagnosis and subsequent help-seeking in misdiagnosed individuals. Results: The analysis included data from 924 symptomatic individuals for predicting misdiagnosis and from a subset of 379 misdiagnosed participants who provided follow-up information when predicting help-seeking. Models achieved good predictive power, with area under the receiver operating characteristic curve of 0.75 and 0.71 for misdiagnosis and help-seeking, respectively. The most predictive features with respect to misdiagnosis were high severity of depressed mood, instability of self-image, the involvement of a psychiatrist in diagnosing depression, higher age at depression diagnosis, and reckless spending. Regarding help-seeking behavior, the strongest predictors included shorter time elapsed since last speaking to a general practitioner about mental health, sleep problems disrupting daily tasks, taking antidepressant medication, and being diagnosed with depression at younger ages. Conclusions: This study provides a novel, machine learning--based approach to understand the interplay of factors that may contribute to the misdiagnosis and subsequent help-seeking in patients experiencing low mood. The present findings can inform the development of targeted interventions to improve early detection and appropriate treatment of individuals with mood disorders. ", doi="10.2196/50738", url="https://mental.jmir.org/2024/1/e50738", url="http://www.ncbi.nlm.nih.gov/pubmed/38206660" } @Article{info:doi/10.2196/53365, author="Tumaliuan, Beatriz Faye and Grepo, Lorelie and Jalao, Rex Eugene", title="Development of Depression Data Sets and a Language Model for Depression Detection: Mixed Methods Study", journal="JMIR Data", year="2024", month="Sep", day="4", volume="5", pages="e53365", keywords="depression data set", keywords="depression detection", keywords="social media", keywords="natural language processing", keywords="Filipino", abstract="Background: Depression detection in social media has gained attention in recent years with the help of natural language processing (NLP) techniques. Because of the low-resource standing of Filipino depression data, valid data sets need to be created to aid various machine learning techniques in depression detection classification tasks. Objective: The primary objective is to build a depression corpus of Philippine Twitter users who were clinically diagnosed with depression by mental health professionals and develop from this a corpus of depression symptoms that can later serve as a baseline for predicting depression symptoms in the Filipino and English languages. Methods: The proposed process included the implementation of clinical screening methods with the help of clinical psychologists in the recruitment of study participants who were young adults aged 18 to 30 years. A total of 72 participants were assessed by clinical psychologists and provided their Twitter data: 60 with depression and 12 with no depression. Six participants provided 2 Twitter accounts each, making 78 Twitter accounts. A data set was developed consisting of depression symptom--annotated tweets with 13 depression categories. These were created through manual annotation in a process constructed, guided, and validated by clinical psychologists. Results: Three annotators completed the process for approximately 79,614 tweets, resulting in a substantial interannotator agreement score of 0.735 using Fleiss $\kappa$ and a 95.59\% psychologist validation score. A word2vec language model was developed using Filipino and English data sets to create a 300-feature word embedding that can be used in various machine learning techniques for NLP. Conclusions: This study contributes to depression research by constructing depression data sets from social media to aid NLP in the Philippine setting. These 2 validated data sets can be significant in user detection or tweet-level detection of depression in young adults in further studies. ", doi="10.2196/53365", url="https://data.jmir.org/2024/1/e53365" } @Article{info:doi/10.2196/42886, author="Ding, Huitong and Mandapati, Amiya and Karjadi, Cody and Ang, Alvin Ting Fang and Lu, Sophia and Miao, Xiao and Glass, James and Au, Rhoda and Lin, Honghuang", title="Association Between Acoustic Features and Neuropsychological Test Performance in the Framingham Heart Study: Observational Study", journal="J Med Internet Res", year="2022", month="Dec", day="22", volume="24", number="12", pages="e42886", keywords="mild cognitive impairment", keywords="digital voice", keywords="neuropsychological test", keywords="association", keywords="prediction", abstract="Background: Human voice has increasingly been recognized as an effective indicator for the detection of cognitive disorders. However, the association of acoustic features with specific cognitive functions and mild cognitive impairment (MCI) has yet to be evaluated in a large community-based population. Objective: This study aimed to investigate the association between acoustic features and neuropsychological (NP) tests across multiple cognitive domains and evaluate the added predictive power of acoustic composite scores for the classification of MCI. Methods: This study included participants without dementia from the Framingham Heart Study, a large community-based cohort with longitudinal surveillance for incident dementia. For each participant, 65 low-level acoustic descriptors were derived from voice recordings of NP test administration. The associations between individual acoustic descriptors and 18 NP tests were assessed with linear mixed-effect models adjusted for age, sex, and education. Acoustic composite scores were then built by combining acoustic features significantly associated with NP tests. The added prediction power of acoustic composite scores for prevalent and incident MCI was also evaluated. Results: The study included 7874 voice recordings from 4950 participants (age: mean 62, SD 14 years; 4336/7874, 55.07\% women), of whom 453 were diagnosed with MCI. In all, 8 NP tests were associated with more than 15 acoustic features after adjusting for multiple testing. Additionally, 4 of the acoustic composite scores were significantly associated with prevalent MCI and 7 were associated with incident MCI. The acoustic composite scores can increase the area under the curve of the baseline model for MCI prediction from 0.712 to 0.755. Conclusions: Multiple acoustic features are significantly associated with NP test performance and MCI, which can potentially be used as digital biomarkers for early cognitive impairment monitoring. ", doi="10.2196/42886", url="https://www.jmir.org/2022/12/e42886", url="http://www.ncbi.nlm.nih.gov/pubmed/36548029" } @Article{info:doi/10.2196/41003, author="Acien, Alejandro and Morales, Aythami and Vera-Rodriguez, Ruben and Fierrez, Julian and Mondesire-Crump, Ijah and Arroyo-Gallego, Teresa", title="Detection of Mental Fatigue in the General Population: Feasibility Study of Keystroke Dynamics as a Real-world Biomarker", journal="JMIR Biomed Eng", year="2022", month="Nov", day="21", volume="7", number="2", pages="e41003", keywords="fatigue", keywords="keystroke", keywords="biometrics", keywords="digital biomarker", keywords="TypeNet", keywords="domain adaptation", keywords="fatigue detection", keywords="typing patterns", keywords="circadian cycles", keywords="mental fatigue", keywords="psychomotor patterns", keywords="monitoring", keywords="mental health", keywords="keystroke dynamics", abstract="Background: Mental fatigue is a common and potentially debilitating state that can affect individuals' health and quality of life. In some cases, its manifestation can precede or mask early signs of other serious mental or physiological conditions. Detecting and assessing mental fatigue can be challenging nowadays as it relies on self-evaluation and rating questionnaires, which are highly influenced by subjective bias. Introducing more objective, quantitative, and sensitive methods to characterize mental fatigue could be critical to improve its management and the understanding of its connection to other clinical conditions. Objective: This paper aimed to study the feasibility of using keystroke biometrics for mental fatigue detection during natural typing. As typing involves multiple motor and cognitive processes that are affected by mental fatigue, our hypothesis was that the information captured in keystroke dynamics can offer an interesting mean to characterize users' mental fatigue in a real-world setting. Methods: We apply domain transformation techniques to adapt and transform TypeNet, a state-of-the-art deep neural network, originally intended for user authentication, to generate a network optimized for the fatigue detection task. All experiments were conducted using 3 keystroke databases that comprise different contexts and data collection protocols. Results: Our preliminary results showed area under the curve performances ranging between 72.2\% and 80\% for fatigue versus rested sample classification, which is aligned with previously published models on daily alertness and circadian cycles. This demonstrates the potential of our proposed system to characterize mental fatigue fluctuations via natural typing patterns. Finally, we studied the performance of an active detection approach that leverages the continuous nature of keystroke biometric patterns for the assessment of users' fatigue in real time. Conclusions: Our results suggest that the psychomotor patterns that characterize mental fatigue manifest during natural typing, which can be quantified via automated analysis of users' daily interaction with their device. These findings represent a step towards the development of a more objective, accessible, and transparent solution to monitor mental fatigue in a real-world environment. ", doi="10.2196/41003", url="https://biomedeng.jmir.org/2022/2/e41003", url="http://www.ncbi.nlm.nih.gov/pubmed/38875698" } @Article{info:doi/10.2196/41456, author="Shubina, Ivanna", title="Scientific Publication Patterns of Systematic Reviews on Psychosocial Interventions Improving Well-being: Bibliometric Analysis", journal="Interact J Med Res", year="2022", month="Nov", day="11", volume="11", number="2", pages="e41456", keywords="psychosocial intervention", keywords="well-being", keywords="systematic review", keywords="bibliometric analysis", keywords="bibliometrics", keywords="scientific research", keywords="medical research", keywords="publication", keywords="publish", keywords="citation", keywords="scientometrics", keywords="mental health", abstract="Background: Despite numerous empirical studies and systematic reviews conducted on the effectiveness of interventions improving psychological well-being, there is no holistic overview of published systematic reviews in this field. Objective: This bibliometric study explored the scientific patterns of the effectiveness of different psychosocial interventions improving well-being among various categories of individuals with mental and physical diseases, to synthesize well-being intervention studies, and to suggest gaps and further studies in this emerging field. Methods: The bibliometric analysis included identifying the most productive authors, institutions, and countries; most explored fields and subjects of study; most active journals and publishers; and performing citation analysis and analyzing publication trends between 2014 and 2022. We focused on data retrieved from known databases, and the study was conducted with a proven bibliometric approach. Results: In total, 156 studies were found concerning the research domains and retrieved using LENS software from high-ranking databases (Crossref, Microsoft Academic, PubMed, and Core). These papers were written in English by 100 authors from 24 countries, among which, the leading country was the United Kingdom. Descriptive characteristics of the publications involved an increased number of publications in 2017 (n=35) and 2019 (n=34) and a decreased number in 2021 (n=4). The top 2 leading authors by citation score are James Thomas (3 papers and 260 citations) and Chris Dickens (3 papers and 182 citations). However, the most cited study had 592 citations. BMJ Open (n=6 articles) is the leading journal in the field of medicine; Clinical Psychology Review (n=5), in psychology; and Frontiers in Psychology, in psychological intervention (n=5) and psychology (n=5). The top 2 publishers were Wiley (n=28) and Elsevier (n=25). Conclusions: This study indicates an overall interest in the declared domains within the last decade. Our findings primarily indicate that psychosocial interventions (PIs) were evaluated as being effective in managing mental and physical problems and enhancing well-being. Cognitive behavioral therapy was assessed as being effective in treating anxiety, psychoeducation in relapse prevention, and gratitude interventions in improving overall health, and the mindfulness approach had a positive impact on decreasing distress and depression. Moreover, all these intervention types resulted in an overall increase in an individuals' well-being and resilience. Integrating social and cultural factors while considering individual differences increases the efficiency of PIs. Furthermore, PIs were evaluated as being effective in managing symptoms of eating disorders, dementia, and cancer. Our findings could help provide researchers an overview of the publication trends on research domains of focus for further studies, since it shows current findings and potential research needs in these fields, and would also benefit practitioners working on increasing their own and their patients' well-being. ", doi="10.2196/41456", url="https://www.i-jmr.org/2022/2/e41456", url="http://www.ncbi.nlm.nih.gov/pubmed/36367767" } @Article{info:doi/10.2196/38168, author="Malins, Sam and Figueredo, Grazziela and Jilani, Tahseen and Long, Yunfei and Andrews, Jacob and Rawsthorne, Mat and Manolescu, Cosmin and Clos, Jeremie and Higton, Fred and Waldram, David and Hunt, Daniel and Perez Vallejos, Elvira and Moghaddam, Nima", title="Developing an Automated Assessment of In-session Patient Activation for Psychological Therapy: Codevelopment Approach", journal="JMIR Med Inform", year="2022", month="Nov", day="8", volume="10", number="11", pages="e38168", keywords="responsible artificial intelligence", keywords="machine learning", keywords="cognitive behavioral therapy", keywords="multimorbidity", keywords="natural language processing", keywords="mental health", abstract="Background: Patient activation is defined as a patient's confidence and perceived ability to manage their own health. Patient activation has been a consistent predictor of long-term health and care costs, particularly for people with multiple long-term health conditions. However, there is currently no means of measuring patient activation from what is said in health care consultations. This may be particularly important for psychological therapy because most current methods for evaluating therapy content cannot be used routinely due to time and cost restraints. Natural language processing (NLP) has been used increasingly to classify and evaluate the contents of psychological therapy. This aims to make the routine, systematic evaluation of psychological therapy contents more accessible in terms of time and cost restraints. However, comparatively little attention has been paid to algorithmic trust and interpretability, with few studies in the field involving end users or stakeholders in algorithm development. Objective: This study applied a responsible design to use NLP in the development of an artificial intelligence model to automate the ratings assigned by a psychological therapy process measure: the consultation interactions coding scheme (CICS). The CICS assesses the level of patient activation observable from turn-by-turn psychological therapy interactions. Methods: With consent, 128 sessions of remotely delivered cognitive behavioral therapy from 53 participants experiencing multiple physical and mental health problems were anonymously transcribed and rated by trained human CICS coders. Using participatory methodology, a multidisciplinary team proposed candidate language features that they thought would discriminate between high and low patient activation. The team included service-user researchers, psychological therapists, applied linguists, digital research experts, artificial intelligence ethics researchers, and NLP researchers. Identified language features were extracted from the transcripts alongside demographic features, and machine learning was applied using k-nearest neighbors and bagged trees algorithms to assess whether in-session patient activation and interaction types could be accurately classified. Results: The k-nearest neighbors classifier obtained 73\% accuracy (82\% precision and 80\% recall) in a test data set. The bagged trees classifier obtained 81\% accuracy for test data (87\% precision and 75\% recall) in differentiating between interactions rated high in patient activation and those rated low or neutral. Conclusions: Coproduced language features identified through a multidisciplinary collaboration can be used to discriminate among psychological therapy session contents based on patient activation among patients experiencing multiple long-term physical and mental health conditions. ", doi="10.2196/38168", url="https://medinform.jmir.org/2022/11/e38168", url="http://www.ncbi.nlm.nih.gov/pubmed/36346654" } @Article{info:doi/10.2196/37614, author="Lam, Ka-Hoo and Twose, James and Lissenberg-Witte, Birgit and Licitra, Giovanni and Meijer, Kim and Uitdehaag, Bernard and De Groot, Vincent and Killestein, Joep", title="The Use of Smartphone Keystroke Dynamics to Passively Monitor Upper Limb and Cognitive Function in Multiple Sclerosis: Longitudinal Analysis", journal="J Med Internet Res", year="2022", month="Nov", day="7", volume="24", number="11", pages="e37614", keywords="multiple sclerosis", keywords="smartphone", keywords="mobile app", keywords="digital technology", keywords="keystroke dynamics", keywords="typing", keywords="upper extremity", keywords="cognition", keywords="outpatient monitoring", abstract="Background: Typing on smartphones, which has become a near daily activity, requires both upper limb and cognitive function. Analysis of keyboard interactions during regular typing, that is, keystroke dynamics, could therefore potentially be utilized for passive and continuous monitoring of function in patients with multiple sclerosis. Objective: To determine whether passively acquired smartphone keystroke dynamics correspond to multiple sclerosis outcomes, we investigated the association between keystroke dynamics and clinical outcomes (upper limb and cognitive function). This association was investigated longitudinally in order to study within-patient changes independently of between-patient differences. Methods: During a 1-year follow-up, arm function and information processing speed were assessed every 3 months in 102 patients with multiple sclerosis with the Nine-Hole Peg Test and Symbol Digit Modalities Test, respectively. Keystroke-dynamics data were continuously obtained from regular typing on the participants' own smartphones. Press-and-release latency of the alphanumeric keys constituted the fine motor score cluster, while latency of the punctuation and backspace keys constituted the cognition score cluster. The association over time between keystroke clusters and the corresponding clinical outcomes was assessed with linear mixed models with subjects as random intercepts. By centering around the mean and calculating deviation scores within subjects, between-subject and within-subject effects were distinguished. Results: Mean (SD) scores for the fine motor score cluster and cognition score cluster were 0.43 (0.16) and 0.94 (0.41) seconds, respectively. The fine motor score cluster was significantly associated with the Nine-Hole Peg Test: between-subject $\beta$ was 15.9 (95\% CI 12.2-19.6) and within-subject $\beta$ was 6.9 (95\% CI 2.0-11.9). The cognition score cluster was significantly associated with the Symbol Digit Modalities Test between subjects (between-subject $\beta$ --11.2, 95\% CI --17.3 to --5.2) but not within subjects (within-subject $\beta$ --0.4, 95\% CI --5.6 to 4.9). Conclusions: Smartphone keystroke dynamics were longitudinally associated with multiple sclerosis outcomes. Worse arm function corresponded with longer latency in typing both across and within patients. Worse processing speed corresponded with higher latency in using punctuation and backspace keys across subjects. Hence, keystroke dynamics are a potential digital biomarker for remote monitoring and predicting clinical outcomes in patients with multiple sclerosis. Trial Registration: Netherlands Trial Register NTR7268; https://trialsearch.who.int/Trial2.aspx?TrialID=NTR7268 ", doi="10.2196/37614", url="https://www.jmir.org/2022/11/e37614", url="http://www.ncbi.nlm.nih.gov/pubmed/36342763" } @Article{info:doi/10.2196/38624, author="Li, Ran and Geng, Jiawei and Yang, Runze and Ge, Yumeng and Hesketh, Therese", title="Effectiveness of Computerized Cognitive Training in Delaying Cognitive Function Decline in People With Mild Cognitive Impairment: Systematic Review and Meta-analysis", journal="J Med Internet Res", year="2022", month="Oct", day="27", volume="24", number="10", pages="e38624", keywords="computerized cognitive training", keywords="mild cognitive impairment", abstract="Background: With no current cure for mild cognitive impairment (MCI), delaying its progression could significantly reduce the disease burden and improve the quality of life for patients with MCI. Computerized cognitive training (CCT) has recently become a potential instrument for improvement of cognition. However, the evidence for its effectiveness remains limited. Objective: This systematic review aims to (1) analyze the efficacy of CCT on cognitive impairment or cognitive decline in patients with MCI and (2) analyze the relationship between the characteristics of CCT interventions and cognition-related health outcomes. Methods: A systematic search was performed using MEDLINE, Cochrane, Embase, Web of Science, and Google Scholar. Full texts of randomized controlled trials of CCT interventions in adults with MCI and published in English language journals between 2010 and 2021 were included. Overall global cognitive function and domain-specific cognition were pooled using a random-effects model. Sensitivity analyses were performed to determine the reasons for heterogeneity and to test the robustness of the results. Subgroup analyses were performed to identify the relationship between the characteristics of CCT interventions and cognition-related effectiveness. Results: A total of 18 studies with 1059 participants were included in this review. According to the meta-analysis, CCT intervention provided a significant but small increase in global cognitive function compared to that in the global cognitive function of the control groups (standardized mean difference=0.54, 95\% CI 0.35-0.73; I2=38\%). CCT intervention also resulted in a marginal improvement in domain-specific cognition compared to that in the control groups, with moderate heterogeneity. Subgroup analyses showed consistent improvement in global cognitive behavior in the CCT intervention groups. Conclusions: This systematic review suggests that CCT interventions could improve global cognitive function in patients with MCI. Considering the relatively small sample size and the short treatment duration in all the included studies, more comprehensive trials are needed to quantify both the impact of CCT on cognitive decline, especially in the longer term, and to establish whether CCT should be recommended for use in clinical practice. Trial Registration: PROSPERO International Prospective Register of Systematic Reviews CRD42021278884; https://www.crd.york.ac.uk/prospero/display\_record.php?RecordID=278884 ", doi="10.2196/38624", url="https://www.jmir.org/2022/10/e38624", url="http://www.ncbi.nlm.nih.gov/pubmed/36301590" } @Article{info:doi/10.2196/33871, author="McIntyre, S. Roger and Lipsitz, Orly and Rodrigues, B. Nelson and Subramaniapillai, Mehala and Nasri, Flora and Lee, Yena and Fehnert, Ben and King, James and Chrones, Lambros and Kratiuk, Kevin and Uddin, Sharif and Rosenblat, D. Joshua and Mansur, B. Rodrigo and McCue, Maggie", title="An App-Based Digit Symbol Substitution Test for Assessment of Cognitive Deficits in Adults With Major Depressive Disorder: Evaluation Study", journal="JMIR Ment Health", year="2022", month="Oct", day="27", volume="9", number="10", pages="e33871", keywords="depression", keywords="DSST", keywords="Digit Symbol Substitution Test", keywords="smartphone", keywords="technology", keywords="measurement-based care", keywords="cognition", abstract="Background: Cognitive dysfunction is an impairing core symptom of depression. Among adults with major depressive disorder (MDD) treated with antidepressants, residual cognitive symptoms interfere with patient-reported outcomes. The foregoing characterization of cognitive symptoms provides the rationale for screening and assessing the severity of cognitive symptoms at point of care. However, clinical neurocognitive assessments are time-consuming and difficult, and they require specialist expertise to interpret them. A smartphone-delivered neurocognitive test may offer an effective and accessible tool that can be readily implemented into a measurement-based care framework. Objective: We aimed to evaluate the use of a smartphone-delivered app-based version of the established Cognition Kit Digit Symbol Substitution Test (DSST) neurocognitive assessment compared to a traditional paper-and-pencil version. Methods: Convergent validity and test-retest reliability of the 2 versions were evaluated. Patient satisfaction with the app was also assessed. Results: Assessments made using the app-based Cognition Kit DSST were highly correlated with the standard paper-and-pencil version of the test, both at the baseline visit (r=0.69, df=27; P<.001) and at the end-of-study visit (r=0.82, df=27; P<.001), and they were positively evaluated by 30 patients as being user-friendly, easy to navigate, and preferable over the paper-and-pencil version of the DSST. However, although the app-based Cognition Kit DSST was validated in patients with MDD, it still needs to be evaluated in healthy controls. Conclusions: App-based DSST may facilitate a more personalized, convenient, and cost-effective method of cognitive assessment, helping to guide measurement-based care and psychotherapeutic and pharmacologic treatment options for patients with MDD. Trial Registration: ClinicalTrials.gov NCT03999567; https://tinyurl.com/2p8pnyv7 ", doi="10.2196/33871", url="https://mental.jmir.org/2022/10/e33871", url="http://www.ncbi.nlm.nih.gov/pubmed/36301615" } @Article{info:doi/10.2196/39513, author="Christova, Monica and Strohmaier, Robert and Fuchs-Neuhold, Bianca and Guggenberger, Bernhard and Loder-Fink, Brigitte and Draxler, Theresa and Palli, Christoph and Simi, Helmut and Schadenbauer, Sandra and Nischelwitzer, Alexander and Sprung, Gerhard and Pilz, Ren{\'e} and Darkow, Robert and Staubmann, Wolfgang", title="Mixed Reality Prototype of Multimodal Screening for Early Detection of Cognitive Impairments in Older Adults: Protocol Development and Usability Study", journal="JMIR Res Protoc", year="2022", month="Oct", day="14", volume="11", number="10", pages="e39513", keywords="augmented reality", keywords="virtual reality", keywords="multimodal screening", keywords="cognitive impairment", keywords="smart cognition", keywords="elderly", keywords="usability", keywords="dementia", keywords="aging", keywords="screening tool", keywords="digital health", keywords="digital health intervention", abstract="Background: The early diagnosis of cognitive impairments is an important step in the adequate management of dementia. The project ``Smart Cognition \& Behaviour Screening powered by Augmented Reality'' (SCOBES-AR) aims to develop a multimodal screening tool (MST) for the early detection of cognitive impairments using augmented and virtual reality. The first project phase selected validated assessments for combination with the MST and tested it in 300 healthy older adults. Objective: This study established a protocol for the implementation and usability of a mixed reality (MR)--enhanced multidisciplinary screening tool for the early detection of cognitive impairments in older adults. The developed MST will be partially enhanced by MR, which is a combination of augmented reality (AR) and virtual reality (VR). This MR-enhanced prototype of the screening tool (MR-MST) will be tested and compared to the previously developed MST. The usability of the prototype will also be examined. Methods: This single-center observational crossover design study screens 100 healthy participants (aged 60-75 years) for cognitive decline using a specially developed MST (assessment of cognitive functions, olfactory sensitivity, nutritional preferences, gait parameters, reaction times, and activities of daily living) and an MR-enhanced MST in which the assessments of cognitive functions, reaction time, activities of daily living, and gait will be performed using tailor-made software and AR and VR hardware. The results of the MR-enhanced MST will be compared to those without MR. The usability of the developed MR-enhanced MST will be tested on 10 investigators and 10 test participants using observed summative evaluation and the codiscovery method, and on 2 usability experts using the codiscovery and cognitive walkthrough methods. Results: This study was funded by the Austrian Research Promotion Agency (grant 866873) and received approval from the ethics committee of the Medical University of Graz. The MR-MST and the experimental protocol for this study were developed. All participants gave written informed consent. As of July 15, 2022, a total of 70 participants have been screened. Data analysis and dissemination are scheduled for completion by September 2023. Conclusions: The development and testing of the MR-MST is an important step toward the establishment of the best practice procedure for the implementation of AR and VR in the screening of cognitive declines in older adults. It will help improve our knowledge of the usability and applicability of the developed prototype and promote further advancement in AR and VR technologies to be used in therapeutic settings. International Registered Report Identifier (IRRID): DERR1-10.2196/39513 ", doi="10.2196/39513", url="https://www.researchprotocols.org/2022/10/e39513", url="http://www.ncbi.nlm.nih.gov/pubmed/36239994" } @Article{info:doi/10.2196/33460, author="Soroski, Thomas and da Cunha Vasco, Thiago and Newton-Mason, Sally and Granby, Saffrin and Lewis, Caitlin and Harisinghani, Anuj and Rizzo, Matteo and Conati, Cristina and Murray, Gabriel and Carenini, Giuseppe and Field, S. Thalia and Jang, Hyeju", title="Evaluating Web-Based Automatic Transcription for Alzheimer Speech Data: Transcript Comparison and Machine Learning Analysis", journal="JMIR Aging", year="2022", month="Sep", day="21", volume="5", number="3", pages="e33460", keywords="Alzheimer disease", keywords="mild cognitive impairment", keywords="speech", keywords="natural language processing", keywords="speech recognition software", keywords="machine learning", keywords="neurodegenerative disease", keywords="transcription software", keywords="memory", abstract="Background: Speech data for medical research can be collected noninvasively and in large volumes. Speech analysis has shown promise in diagnosing neurodegenerative disease. To effectively leverage speech data, transcription is important, as there is valuable information contained in lexical content. Manual transcription, while highly accurate, limits the potential scalability and cost savings associated with language-based screening. Objective: To better understand the use of automatic transcription for classification of neurodegenerative disease, namely, Alzheimer disease (AD), mild cognitive impairment (MCI), or subjective memory complaints (SMC) versus healthy controls, we compared automatically generated transcripts against transcripts that went through manual correction. Methods: We recruited individuals from a memory clinic (``patients'') with a diagnosis of mild-to-moderate AD, (n=44, 30\%), MCI (n=20, 13\%), SMC (n=8, 5\%), as well as healthy controls (n=77, 52\%) living in the community. Participants were asked to describe a standardized picture, read a paragraph, and recall a pleasant life experience. We compared transcripts generated using Google speech-to-text software to manually verified transcripts by examining transcription confidence scores, transcription error rates, and machine learning classification accuracy. For the classification tasks, logistic regression, Gaussian naive Bayes, and random forests were used. Results: The transcription software showed higher confidence scores (P<.001) and lower error rates (P>.05) for speech from healthy controls compared with patients. Classification models using human-verified transcripts significantly (P<.001) outperformed automatically generated transcript models for both spontaneous speech tasks. This comparison showed no difference in the reading task. Manually adding pauses to transcripts had no impact on classification performance. However, manually correcting both spontaneous speech tasks led to significantly higher performances in the machine learning models. Conclusions: We found that automatically transcribed speech data could be used to distinguish patients with a diagnosis of AD, MCI, or SMC from controls. We recommend a human verification step to improve the performance of automatic transcripts, especially for spontaneous tasks. Moreover, human verification can focus on correcting errors and adding punctuation to transcripts. However, manual addition of pauses is not needed, which can simplify the human verification step to more efficiently process large volumes of speech data. ", doi="10.2196/33460", url="https://aging.jmir.org/2022/3/e33460", url="http://www.ncbi.nlm.nih.gov/pubmed/36129754" } @Article{info:doi/10.2196/34450, author="Marin, Anna and DeCaro, Ren{\'e}e and Schiloski, Kylie and Elshaar, Ala'a and Dwyer, Brigid and Vives-Rodriguez, Ana and Palumbo, Rocco and Turk, Katherine and Budson, Andrew", title="Home-Based Electronic Cognitive Therapy in Patients With Alzheimer Disease: Feasibility Randomized Controlled Trial", journal="JMIR Form Res", year="2022", month="Sep", day="12", volume="6", number="9", pages="e34450", keywords="cognitive training", keywords="Alzheimer disease dementia", keywords="technology", abstract="Background: Can home-based computerized cognitive training programs be a useful tool to sustain cognition and quality of life in patients with Alzheimer disease (AD)? To date, the progressive nature of the disease has made this question difficult to answer. Computerized platforms provide more accessibility to cognitive trainings; however, the feasibility of long-term, home-based computerized programs for patients with AD dementia remains unclear. Objective: We aimed to investigate the feasibility of a 24-week home-based intervention program using the Constant Therapy app and its preliminary efficacy on cognition in patients with AD. Constant Therapy is a program developed for patients with speech and cognitive deficits. We hypothesized that patients with AD would use Constant Therapy daily over the course of the 24-week period. Methods: Data were collected over a 48-week period. We recruited participants aged between 50 and 90 years with a diagnosis of mild cognitive impairment due to AD or mild AD dementia. Participants were randomly assigned to either the Constant Therapy (n=10) or active control (n=9) group. The Constant Therapy group completed a tablet-based training during the first 24 weeks; the second 24 weeks of computerized training were optional. The active control group completed paper-and-pencil games during the first 24 weeks and were invited to complete an optional Constant Therapy training during the second 24 weeks. Every 6 weeks, the participants completed the Repeatable Battery for the Assessment of Neuropsychological Status (RBANS). The participants independently accessed Constant Therapy using an Apple iPad. Our primary feasibility outcomes were the rate of adherence and daily use of Constant Therapy over 24 weeks. Our secondary outcomes were Constant Therapy performance over 24 weeks and change in RBANS scores between the 2 experimental groups. Results: Feasibility analyses were computed for participants who completed 24 weeks of Constant Therapy. We found that long-term use of the Constant Therapy program was feasible in patients with AD over 24 weeks (adherence 80\%; program use 121/168 days, for 32 minutes daily). These participants showed an overall improvement in accuracy and latency (P=.005) in the Constant Therapy scores, as well as specific improvements in visual and auditory memory, attention, and arithmetic tasks. The Constant Therapy group showed improvement in the RBANS coding subtest. No unexpected problems or adverse events were observed. Conclusions: Long-term (eg, 24 weeks) computerized cognitive training using Constant Therapy is feasible in patients with AD in the mild cognitive impairment and mild dementia stages. Patients adhered more to Constant Therapy than to the paper-and-pencil training over 24 weeks and improved their performance over time. These findings support the development of future randomized controlled trials that will investigate the efficacy of Constant Therapy to sustain cognitive function in patients with AD. Trial Registration: ClinicalTrials.gov NCT02521558; https://clinicaltrials.gov/ct2/show/NCT02521558 ", doi="10.2196/34450", url="https://formative.jmir.org/2022/9/e34450", url="http://www.ncbi.nlm.nih.gov/pubmed/36094804" } @Article{info:doi/10.2196/40384, author="Noori, Ayush and Magdamo, Colin and Liu, Xiao and Tyagi, Tanish and Li, Zhaozhi and Kondepudi, Akhil and Alabsi, Haitham and Rudmann, Emily and Wilcox, Douglas and Brenner, Laura and Robbins, K. Gregory and Moura, Lidia and Zafar, Sahar and Benson, M. Nicole and Hsu, John and R Dickson, John and Serrano-Pozo, Alberto and Hyman, T. Bradley and Blacker, Deborah and Westover, Brandon M. and Mukerji, S. Shibani and Das, Sudeshna", title="Development and Evaluation of a Natural Language Processing Annotation Tool to Facilitate Phenotyping of Cognitive Status in Electronic Health Records: Diagnostic Study", journal="J Med Internet Res", year="2022", month="Aug", day="30", volume="24", number="8", pages="e40384", keywords="chart review", keywords="cognition", keywords="cognitive status", keywords="dementia", keywords="diagnostic", keywords="electronic health record", keywords="health care", keywords="natural language processing", keywords="research cohort", abstract="Background: Electronic health records (EHRs) with large sample sizes and rich information offer great potential for dementia research, but current methods of phenotyping cognitive status are not scalable. Objective: The aim of this study was to evaluate whether natural language processing (NLP)--powered semiautomated annotation can improve the speed and interrater reliability of chart reviews for phenotyping cognitive status. Methods: In this diagnostic study, we developed and evaluated a semiautomated NLP-powered annotation tool (NAT) to facilitate phenotyping of cognitive status. Clinical experts adjudicated the cognitive status of 627 patients at Mass General Brigham (MGB) health care, using NAT or traditional chart reviews. Patient charts contained EHR data from two data sets: (1) records from January 1, 2017, to December 31, 2018, for 100 Medicare beneficiaries from the MGB Accountable Care Organization and (2) records from 2 years prior to COVID-19 diagnosis to the date of COVID-19 diagnosis for 527 MGB patients. All EHR data from the relevant period were extracted; diagnosis codes, medications, and laboratory test values were processed and summarized; clinical notes were processed through an NLP pipeline; and a web tool was developed to present an integrated view of all data. Cognitive status was rated as cognitively normal, cognitively impaired, or undetermined. Assessment time and interrater agreement of NAT compared to manual chart reviews for cognitive status phenotyping was evaluated. Results: NAT adjudication provided higher interrater agreement (Cohen $\kappa$=0.89 vs $\kappa$=0.80) and significant speed up (time difference mean 1.4, SD 1.3 minutes; P<.001; ratio median 2.2, min-max 0.4-20) over manual chart reviews. There was moderate agreement with manual chart reviews (Cohen $\kappa$=0.67). In the cases that exhibited disagreement with manual chart reviews, NAT adjudication was able to produce assessments that had broader clinical consensus due to its integrated view of highlighted relevant information and semiautomated NLP features. Conclusions: NAT adjudication improves the speed and interrater reliability for phenotyping cognitive status compared to manual chart reviews. This study underscores the potential of an NLP-based clinically adjudicated method to build large-scale dementia research cohorts from EHRs. ", doi="10.2196/40384", url="https://www.jmir.org/2022/8/e40384", url="http://www.ncbi.nlm.nih.gov/pubmed/36040790" } @Article{info:doi/10.2196/37061, author="Tonn, Peter and Seule, Lea and Degani, Yoav and Herzinger, Shani and Klein, Amit and Schulze, Nina", title="Digital Content-Free Speech Analysis Tool to Measure Affective Distress in Mental Health: Evaluation Study", journal="JMIR Form Res", year="2022", month="Aug", day="30", volume="6", number="8", pages="e37061", keywords="mobile health", keywords="mHealth", keywords="depression", keywords="assessment", keywords="voice analysis", keywords="evaluation", keywords="speech", keywords="speech analysis", keywords="tool", keywords="distress", keywords="mental health", keywords="mood", keywords="diagnosis", keywords="measurement", keywords="questionnaire", keywords="mobile phone", abstract="Background: Mood disorders and depression are pervasive and significant problems worldwide. These represent severe health and emotional impairments for individuals and a considerable economic and social burden. Therefore, fast and reliable diagnosis and appropriate treatment are of great importance. Verbal communication can clarify the speaker's mental state---regardless of the content, via speech melody, intonation, and so on. In both everyday life and clinical conditions, a listener with appropriate previous knowledge or a trained specialist can grasp helpful knowledge about the speaker's psychological state. Using automated speech analysis for the assessment and tracking of patients with mental health issues opens up the possibility of remote, automatic, and ongoing evaluation when used with patients' smartphones, as part of the current trends toward the increasing use of digital and mobile health tools. Objective: The primary aim of this study is to evaluate the measurements of the presence or absence of depressive mood in participants by comparing the analysis of noncontentual speech parameters with the results of the Patient Health Questionnaire-9. Methods: This proof-of-concept study included participants in different affective phases (with and without depression). The inclusion criteria included a neurological or psychiatric diagnosis made by a specialist and fluent use of the German language. The measuring instrument was the VoiceSense digital voice analysis tool, which enables the analysis of 200 specific speech parameters based on machine learning and the assessment of the findings using Patient Health Questionnaire-9. Results: A total of 292 psychiatric and voice assessments were performed with 163 participants (males: n=47, 28.8\%) aged 15 to 82 years. Of the 163 participants, 87 (53.3\%) were not depressed at the time of assessment, and 88 (53.9\%) participants had clinically mild to moderate depressive phases. Of the 163 participants, 98 (32.5\%) showed subsyndromal symptoms, and 19 (11.7\%) participants were severely depressed. In the speech analysis, a clear differentiation between the individual depressive levels, as seen in the Patient Health Questionnaire-9, was also shown, especially the clear differentiation between nondepressed and depressed participants. The study showed a Pearson correlation of 0.41 between clinical assessment and noncontentual speech analysis (P<.001). Conclusions: The use of speech analysis shows a high level of accuracy, not only in terms of the general recognition of a clinically relevant depressive state in the participants. Instead, there is a high degree of agreement regarding the extent of depressive impairment with the assessment of experienced clinical practitioners. From our point of view, the application of the noncontentual analysis system in everyday clinical practice makes sense, especially with the idea of a quick and unproblematic assessment of the state of mind, which can even be carried out without personal contact. Trial Registration: ClinicalTrials.gov NCT03700008; https://clinicaltrials.gov/ct2/show/NCT03700008 ", doi="10.2196/37061", url="https://formative.jmir.org/2022/8/e37061", url="http://www.ncbi.nlm.nih.gov/pubmed/36040767" } @Article{info:doi/10.2196/34705, author="Metzler, Hannah and Baginski, Hubert and Niederkrotenthaler, Thomas and Garcia, David", title="Detecting Potentially Harmful and Protective Suicide-Related Content on Twitter: Machine Learning Approach", journal="J Med Internet Res", year="2022", month="Aug", day="17", volume="24", number="8", pages="e34705", keywords="suicide prevention", keywords="Twitter", keywords="social media", keywords="machine learning", keywords="deep learning", abstract="Background: Research has repeatedly shown that exposure to suicide-related news media content is associated with suicide rates, with some content characteristics likely having harmful and others potentially protective effects. Although good evidence exists for a few selected characteristics, systematic and large-scale investigations are lacking. Moreover, the growing importance of social media, particularly among young adults, calls for studies on the effects of the content posted on these platforms. Objective: This study applies natural language processing and machine learning methods to classify large quantities of social media data according to characteristics identified as potentially harmful or beneficial in media effects research on suicide and prevention. Methods: We manually labeled 3202 English tweets using a novel annotation scheme that classifies suicide-related tweets into 12 categories. Based on these categories, we trained a benchmark of machine learning models for a multiclass and a binary classification task. As models, we included a majority classifier, an approach based on word frequency (term frequency-inverse document frequency with a linear support vector machine) and 2 state-of-the-art deep learning models (Bidirectional Encoder Representations from Transformers [BERT] and XLNet). The first task classified posts into 6 main content categories, which are particularly relevant for suicide prevention based on previous evidence. These included personal stories of either suicidal ideation and attempts or coping and recovery, calls for action intending to spread either problem awareness or prevention-related information, reporting of suicide cases, and other tweets irrelevant to these 5 categories. The second classification task was binary and separated posts in the 11 categories referring to actual suicide from posts in the off-topic category, which use suicide-related terms in another meaning or context. Results: In both tasks, the performance of the 2 deep learning models was very similar and better than that of the majority or the word frequency classifier. BERT and XLNet reached accuracy scores above 73\% on average across the 6 main categories in the test set and F1-scores between 0.69 and 0.85 for all but the suicidal ideation and attempts category (F1=0.55). In the binary classification task, they correctly labeled around 88\% of the tweets as about suicide versus off-topic, with BERT achieving F1-scores of 0.93 and 0.74, respectively. These classification performances were similar to human performance in most cases and were comparable with state-of-the-art models on similar tasks. Conclusions: The achieved performance scores highlight machine learning as a useful tool for media effects research on suicide. The clear advantage of BERT and XLNet suggests that there is crucial information about meaning in the context of words beyond mere word frequencies in tweets about suicide. By making data labeling more efficient, this work has enabled large-scale investigations on harmful and protective associations of social media content with suicide rates and help-seeking behavior. ", doi="10.2196/34705", url="https://www.jmir.org/2022/8/e34705", url="http://www.ncbi.nlm.nih.gov/pubmed/35976193" } @Article{info:doi/10.2196/35442, author="Curcic, Jelena and Vallejo, Vanessa and Sorinas, Jennifer and Sverdlov, Oleksandr and Praestgaard, Jens and Piksa, Mateusz and Deurinck, Mark and Erdemli, Gul and B{\"u}gler, Maximilian and Tarnanas, Ioannis and Taptiklis, Nick and Cormack, Francesca and Anker, Rebekka and Mass{\'e}, Fabien and Souillard-Mandar, William and Intrator, Nathan and Molcho, Lior and Madero, Erica and Bott, Nicholas and Chambers, Mieko and Tamory, Josef and Shulz, Matias and Fernandez, Gerardo and Simpson, William and Robin, Jessica and Sn{\ae}dal, G. J{\'o}n and Cha, Jang-Ho and Hannesdottir, Kristin", title="Description of the Method for Evaluating Digital Endpoints in Alzheimer Disease Study: Protocol for an Exploratory, Cross-sectional Study", journal="JMIR Res Protoc", year="2022", month="Aug", day="10", volume="11", number="8", pages="e35442", keywords="digital endpoints", keywords="cognition", keywords="Alzheimer disease", keywords="brain amyloid", keywords="methodology study", keywords="clinical trial design", keywords="mobile phone", abstract="Background: More sensitive and less burdensome efficacy end points are urgently needed to improve the effectiveness of clinical drug development for Alzheimer disease (AD). Although conventional end points lack sensitivity, digital technologies hold promise for amplifying the detection of treatment signals and capturing cognitive anomalies at earlier disease stages. Using digital technologies and combining several test modalities allow for the collection of richer information about cognitive and functional status, which is not ascertainable via conventional paper-and-pencil tests. Objective: This study aimed to assess the psychometric properties, operational feasibility, and patient acceptance of 10 promising technologies that are to be used as efficacy end points to measure cognition in future clinical drug trials. Methods: The Method for Evaluating Digital Endpoints in Alzheimer Disease study is an exploratory, cross-sectional, noninterventional study that will evaluate 10 digital technologies' ability to accurately classify participants into 4 cohorts according to the severity of cognitive impairment and dementia. Moreover, this study will assess the psychometric properties of each of the tested digital technologies, including the acceptable range to assess ceiling and floor effects, concurrent validity to correlate digital outcome measures to traditional paper-and-pencil tests in AD, reliability to compare test and retest, and responsiveness to evaluate the sensitivity to change in a mild cognitive challenge model. This study included 50 eligible male and female participants (aged between 60 and 80 years), of whom 13 (26\%) were amyloid-negative, cognitively healthy participants (controls); 12 (24\%) were amyloid-positive, cognitively healthy participants (presymptomatic); 13 (26\%) had mild cognitive impairment (predementia); and 12 (24\%) had mild AD (mild dementia). This study involved 4 in-clinic visits. During the initial visit, all participants completed all conventional paper-and-pencil assessments. During the following 3 visits, the participants underwent a series of novel digital assessments. Results: Participant recruitment and data collection began in June 2020 and continued until June 2021. Hence, the data collection occurred during the COVID-19 pandemic (SARS-CoV-2 virus pandemic). Data were successfully collected from all digital technologies to evaluate statistical and operational performance and patient acceptance. This paper reports the baseline demographics and characteristics of the population studied as well as the study's progress during the pandemic. Conclusions: This study was designed to generate feasibility insights and validation data to help advance novel digital technologies in clinical drug development. The learnings from this study will help guide future methods for assessing novel digital technologies and inform clinical drug trials in early AD, aiming to enhance clinical end point strategies with digital technologies. International Registered Report Identifier (IRRID): DERR1-10.2196/35442 ", doi="10.2196/35442", url="https://www.researchprotocols.org/2022/8/e35442", url="http://www.ncbi.nlm.nih.gov/pubmed/35947423" } @Article{info:doi/10.2196/36665, author="Moore, C. Raeanne and Parrish, M. Emma and Van Patten, Ryan and Paolillo, Emily and Filip, F. Tess and Bomyea, Jessica and Lomas, Derek and Twamley, W. Elizabeth and Eyler, T. Lisa and Depp, A. Colin", title="Initial Psychometric Properties of 7 NeuroUX Remote Ecological Momentary Cognitive Tests Among People With Bipolar Disorder: Validation Study", journal="J Med Internet Res", year="2022", month="Jul", day="29", volume="24", number="7", pages="e36665", keywords="neuropsychology", keywords="mobile health", keywords="ambulatory assessment", keywords="ecological momentary assessment", keywords="practice effects", keywords="validity", keywords="testing", keywords="serious mental illness", keywords="mobile phone", abstract="Background: As smartphone technology has become nearly ubiquitous, there is a growing body of literature suggesting that ecological momentary cognitive testing (EMCT) offers advantages over traditional pen-and-paper psychological assessment. We introduce a newly developed platform for the self-administration of cognitive tests in ecologically valid ways. Objective: The aim of this study is to develop a Health Insurance Portability and Accountability Act--compliant EMCT smartphone-based platform for the frequent and repeated testing of cognitive abilities in everyday life. This study examines the psychometric properties of 7 mobile cognitive tests covering domains of processing speed, visual working memory, recognition memory, and response inhibition within our platform among persons with and without bipolar disorder (BD). Ultimately, if shown to have adequate psychometric properties, EMCTs may be useful in research on BD and other neurological and psychiatric illnesses. Methods: A total of 45 persons with BD and 21 demographically comparable healthy volunteer participants (aged 18-65 years) completed smartphone-based EMCTs 3 times daily for 14 days. Each EMCT session lasted approximately 1.5 minutes. Only 2 to 3 tests were administered in any given session, no test was administered more than once per day, and alternate test versions were administered in each session. Results: The mean adherence to the EMCT protocol was 69.7\% (SD 20.5\%), resulting in 3965 valid and complete tests across the full sample. Participants were significantly more likely to miss tests on later versus earlier study days. Adherence did not differ by diagnostic status, suggesting that BD does not interfere with EMCT participation. In most tests, age and education were related to EMCT performance in expected directions. The average performances on most EMCTs were moderately to strongly correlated with the National Institutes of Health Toolbox Cognition Battery. Practice effects were observed in 5 tests, with significant differences in practice effects by BD status in 3 tests. Conclusions: Although additional reliability and validity data are needed, this study provides initial psychometric support for EMCTs in the assessment of cognitive performance in real-world contexts in BD. ", doi="10.2196/36665", url="https://www.jmir.org/2022/7/e36665", url="http://www.ncbi.nlm.nih.gov/pubmed/35904876" } @Article{info:doi/10.2196/31803, author="Boudard, Mathieu and Alexandre, Jean-Marc and Kervran, Charlotte and Jakubiec, Louise and Shmulewitz, Dvora and Hasin, Deborah and Fournet, Lucie and Rassis, Christophe and Claverie, Patrice and Serre, Fuschia and Auriacombe, Marc", title="Item Response Theory Analyses of Diagnostic and Statistical Manual of Mental Disorders, Fifth Edition (DSM-5) Criteria Adapted to Screen Use Disorder: Exploratory Survey", journal="J Med Internet Res", year="2022", month="Jul", day="27", volume="24", number="7", pages="e31803", keywords="screen media use", keywords="screen addiction", keywords="internet gaming disorder", keywords="screen use disorder", keywords="Item Response Theory", abstract="Background: Screen use is part of daily life worldwide and morbidity related to excess use of screens has been reported. Some use of screens in excess could indicate a screen use disorder (ScUD). An integrative approach to ScUD could better fit the polymodal reality of screens, and concurrent problems with screens, than a split approach, activity by activity. In that paradigm, a pragmatic and operationalized approach to study a potential ScUD requires the use of common criteria, for all screens and activities done on screens, in a single questionnaire. Objective: Our goals were (1) to describe screen uses in a general population sample and (2) to test the unidimensionality, local independence, and psychometric properties of the 9 Diagnostic and Statistical Manual of Mental Disorders, Fifth Edition (DSM-5) internet gaming disorder (IGD) criteria adapted to screen use in a community sample. We hypothesized that the 9 DSM-5 IGD criteria adapted to ScUD would show unidimensionality, local independence, and good discrimination, with criteria distributed on the severity continuum. Methods: This cross-sectional survey in a French suburban city targeted adults and adolescents. A self-administered questionnaire covered the main types of screens used and their use for various activities in the past month. Presence of ScUD diagnostic criteria in past 12 months was also self-evaluated in the questionnaire. Factor and 2-parameter Item Response Theory analysis were used to investigate the dimensionality, local independence, and psychometric properties of the ScUD criteria. Results: Among the 300 participants, 171 (57.0\%) were female (mean age 27 years), 297 (99.0\%) used screens, 134 (44.7\%) reported at least one criterion (potential problem users), and 5 (1.7\%) reported 5 or more criteria and endorsed an ScUD. The most endorsed criteria were loss of control (60/300, 20.0\%) and preoccupation (52/300, 17.3\%). Screen types used and screen activities differed between participants with no ScUD criteria and those with at least one ScUD criterion. The latter were more likely to have a computer as the most used screen type, and more video gaming, communication/social network, and watching news and research of information as activities. Unidimensionality was confirmed by all fit indices. Local independence was confirmed by the absence of residual correlation between the items. Criteria had relatively high factor loading, with loss of interest in other recreational activities having the highest. However, criteria with the lowest factor loading all remained above the cut-offs, sanctioning unidimensionality. Most discriminating criteria were loss of interests, preoccupation, deceive/cover up, and risk/lose relationship/opportunities, which also provided the most information on the measurement of the latent trait. Conclusions: We described screen uses in a French community sample and have shown that the adaptation of the DSM-5 IGD to ``ScUD'' has good psychometric validity and is discriminating, confirming our hypothesis. We suggest to use those criteria to assess potential ``ScUD.'' Further studies should determine if all criteria are needed and whether others should be added. ", doi="10.2196/31803", url="https://www.jmir.org/2022/7/e31803", url="http://www.ncbi.nlm.nih.gov/pubmed/35896018" } @Article{info:doi/10.2196/35807, author="Mullick, Tahsin and Radovic, Ana and Shaaban, Sam and Doryab, Afsaneh", title="Predicting Depression in Adolescents Using Mobile and Wearable Sensors: Multimodal Machine Learning--Based Exploratory Study", journal="JMIR Form Res", year="2022", month="Jun", day="24", volume="6", number="6", pages="e35807", keywords="adolescent", keywords="depression", keywords="uHealth", keywords="machine learning", keywords="mobile phone", abstract="Background: Depression levels in adolescents have trended upward over the past several years. According to a 2020 survey by the National Survey on Drug Use and Health, 4.1 million US adolescents have experienced at least one major depressive episode. This number constitutes approximately 16\% of adolescents aged 12 to 17 years. However, only 32.3\% of adolescents received some form of specialized or nonspecialized treatment. Identifying worsening symptoms earlier using mobile and wearable sensors may lead to earlier intervention. Most studies on predicting depression using sensor-based data are geared toward the adult population. Very few studies look into predicting depression in adolescents. Objective: The aim of our work was to study passively sensed data from adolescents with depression and investigate the predictive capabilities of 2 machine learning approaches to predict depression scores and change in depression levels in adolescents. This work also provided an in-depth analysis of sensor features that serve as key indicators of change in depressive symptoms and the effect of variation of data samples on model accuracy levels. Methods: This study included 55 adolescents with symptoms of depression aged 12 to 17 years. Each participant was passively monitored through smartphone sensors and Fitbit wearable devices for 24 weeks. Passive sensors collected call, conversation, location, and heart rate information daily. Following data preprocessing, 67\% (37/55) of the participants in the aggregated data set were analyzed. Weekly Patient Health Questionnaire-9 surveys answered by participants served as the ground truth. We applied regression-based approaches to predict the Patient Health Questionnaire-9 depression score and change in depression severity. These approaches were consolidated using universal and personalized modeling strategies. The universal strategies consisted of Leave One Participant Out and Leave Week X Out. The personalized strategy models were based on Accumulated Weeks and Leave One Week One User Instance Out. Linear and nonlinear machine learning algorithms were trained to model the data. Results: We observed that personalized approaches performed better on adolescent depression prediction compared with universal approaches. The best models were able to predict depression score and weekly change in depression level with root mean squared errors of 2.83 and 3.21, respectively, following the Accumulated Weeks personalized modeling strategy. Our feature importance investigation showed that the contribution of screen-, call-, and location-based features influenced optimal models and were predictive of adolescent depression. Conclusions: This study provides insight into the feasibility of using passively sensed data for predicting adolescent depression. We demonstrated prediction capabilities in terms of depression score and change in depression level. The prediction results revealed that personalized models performed better on adolescents than universal approaches. Feature importance provided a better understanding of depression and sensor data. Our findings can help in the development of advanced adolescent depression predictions. ", doi="10.2196/35807", url="https://formative.jmir.org/2022/6/e35807", url="http://www.ncbi.nlm.nih.gov/pubmed/35749157" } @Article{info:doi/10.2196/33555, author="Klein Schaarsberg, E. Ren{\'e}e and Popma, Arne and Lindauer, L. Ram{\'o}n J. and van Dam, Levi", title="The Effects of a Virtual Reality--Based Training Program for Adolescents With Disruptive Behavior Problems on Cognitive Distortions and Treatment Motivation: Protocol for a Multiple Baseline Single-Case Experimental Design", journal="JMIR Res Protoc", year="2022", month="May", day="20", volume="11", number="5", pages="e33555", keywords="treatment motivation", keywords="cognitive distortions", keywords="reflective functioning", keywords="disruptive behavior problems", keywords="adolescence", keywords="virtual reality", keywords="single-case experimental design", abstract="Background: Serious disruptive behavior among adolescents is a prevalent and often persistent problem. This highlights the importance of adequate and effective treatment to help adolescents with disruptive behavior problems react less hostile and aggressive. In order to create a treatment environment in which behavioral change can be enhanced, treatment motivation plays an essential role. Regarding treatment itself, a focus on challenging self-serving cognitive distortions in order to achieve behavioral change is important. Street Temptations (ST) is a new training program that was developed to address both treatment motivation and cognitive distortions in adolescents with disruptive behavior problems. One of the innovative aspects of ST is the use of virtual reality (VR) techniques to provide adolescents during treatment with visually presented daily social scenarios to activate emotional engagement and dysfunctional cognitions. By using the VR scenarios as an integral starting point of ST's sessions and transferring the power of the VR experience into playful and dynamic exercises to practice social perspective--taking, adolescents are encouraged to reflect on both their own behavior and that of others. This focus on reflection is grounded in ST's main treatment mechanism to influence treatment motivation and cognitive distortions, namely, mentalizing (ie, reflective functioning). Objective: The aim of this study is to describe the research protocol to evaluate the effects of ST on treatment motivation and cognitive distortions. We take a closer look at the use of ST and the methodology used, namely, the repeated single-case experimental design (SCED). Methods: The effects of ST are studied through a multiple baseline SCED, using both quantitative and qualitative data. In total, 18 adolescents from secure residential youth care facilities and secondary special education schools are randomly assigned to 1 of the 3 different baseline conditions. Throughout the baseline phase (1, 2, or 3 weeks), intervention phase (4 weeks), and follow-up phase (1, 2, or 3 weeks), daily measurements on treatment motivation and cognitive distortions are conducted. Secondary study parameters are assessed before baseline, after intervention, and after follow-up. Qualitative data are collected after intervention, as well as at 3 months and 6 months after the intervention. Results: Data collection for this study started in November 2021 and is planned to be completed by August 2023. The results will be published in peer-reviewed journals and presented at national and international conferences. Conclusions: ST aims to improve the disruptive behavior problems of adolescents. This study will be the first to gain insights into the effectiveness of ST. The strengths of this study include its thorough and individually focused design (SCED), the focus on a residential as well as a secondary special education setting, and the ecological validity. The implications for practice are discussed. Trial Registration: Central Committee on Research Involving Human Subjects NL75545.029.20. Netherlands Trial Register NL9639; https://www.trialregister.nl/trial/9639 International Registered Report Identifier (IRRID): PRR1-10.2196/33555 ", doi="10.2196/33555", url="https://www.researchprotocols.org/2022/5/e33555", url="http://www.ncbi.nlm.nih.gov/pubmed/35594071" } @Article{info:doi/10.2196/34347, author="Liu, Ying and Schneider, Stefan and Orriens, Bart and Meijer, Erik and Darling, E. Jill and Gutsche, Tania and Gatz, Margaret", title="Self-administered Web-Based Tests of Executive Functioning and Perceptual Speed: Measurement Development Study With a Large Probability-Based Survey Panel", journal="J Med Internet Res", year="2022", month="May", day="9", volume="24", number="5", pages="e34347", keywords="cognitive tests", keywords="internet", keywords="probability-based", keywords="web-based", keywords="executive function", keywords="response speed", keywords="self-administered test", keywords="mobile phone", abstract="Background: Cognitive testing in large population surveys is frequently used to describe cognitive aging and determine the incidence rates, risk factors, and long-term trajectories of the development of cognitive impairment. As these surveys are increasingly administered on internet-based platforms, web-based and self-administered cognitive testing calls for close investigation. Objective: Web-based, self-administered versions of 2 age-sensitive cognitive tests, the Stop and Go Switching Task for executive functioning and the Figure Identification test for perceptual speed, were developed and administered to adult participants in the Understanding America Study. We examined differences in cognitive test scores across internet device types and the extent to which the scores were associated with self-reported distractions in everyday environments in which the participants took the tests. In addition, national norms were provided for the US population. Methods: Data were collected from a probability-based internet panel representative of the US adult population---the Understanding America Study. Participants with access to both a keyboard- and mouse-based device and a touch screen--based device were asked to complete the cognitive tests twice in a randomized order across device types, whereas participants with access to only 1 type of device were asked to complete the tests twice on the same device. At the end of each test, the participants answered questions about interruptions and potential distractions that occurred during the test. Results: Of the 7410 (Stop and Go) and 7216 (Figure Identification) participants who completed the device ownership survey, 6129 (82.71\% for Stop and Go) and 6717 (93.08\% for Figure Identification) participants completed the first session and correctly responded to at least 70\% of the trials. On average, the standardized differences across device types were small, with the absolute value of Cohen d ranging from 0.05 (for the switch score in Stop and Go and the Figure Identification score) to 0.13 (for the nonswitch score in Stop and Go). Poorer cognitive performance was moderately associated with older age (the absolute value of r ranged from 0.32 to 0.61), and this relationship was comparable across device types (the absolute value of Cohen q ranged from 0.01 to 0.17). Approximately 12.72\% (779/6123 for Stop and Go) and 12.32\% (828/6721 for Figure Identification) of participants were interrupted during the test. Interruptions predicted poorer cognitive performance (P<.01 for all scores). Specific distractions (eg, watching television and listening to music) were inconsistently related to cognitive performance. National norms, calculated as weighted average scores using sampling weights, suggested poorer cognitive performance as age increased. Conclusions: Cognitive scores assessed by self-administered web-based tests were sensitive to age differences in cognitive performance and were comparable across the keyboard- and touch screen--based internet devices. Distraction in everyday environments, especially when interrupted during the test, may result in a nontrivial bias in cognitive testing. ", doi="10.2196/34347", url="https://www.jmir.org/2022/5/e34347", url="http://www.ncbi.nlm.nih.gov/pubmed/35532966" } @Article{info:doi/10.2196/35549, author="Braund, A. Taylor and Zin, The May and Boonstra, W. Tjeerd and Wong, J. Quincy J. and Larsen, E. Mark and Christensen, Helen and Tillman, Gabriel and O'Dea, Bridianne", title="Smartphone Sensor Data for Identifying and Monitoring Symptoms of Mood Disorders: A Longitudinal Observational Study", journal="JMIR Ment Health", year="2022", month="May", day="4", volume="9", number="5", pages="e35549", keywords="depression", keywords="bipolar disorder", keywords="sensors", keywords="mobile app", keywords="circadian rhythm", keywords="mobile phone", abstract="Background: Mood disorders are burdensome illnesses that often go undetected and untreated. Sensor technologies within smartphones may provide an opportunity for identifying the early changes in circadian rhythm and social support/connectedness that signify the onset of a depressive or manic episode. Objective: Using smartphone sensor data, this study investigated the relationship between circadian rhythm, which was determined by GPS data, and symptoms of mental health among a clinical sample of adults diagnosed with major depressive disorder or bipolar disorder. Methods: A total of 121 participants were recruited from a clinical setting to take part in a 10-week observational study. Self-report questionnaires for mental health outcomes, social support, social connectedness, and quality of life were assessed at 6 time points throughout the study period. Participants consented to passively sharing their smartphone GPS data for the duration of the study. Circadian rhythm (ie, regularity of location changes in a 24-hour rhythm) was extracted from GPS mobility patterns at baseline. Results: Although we found no association between circadian rhythm and mental health functioning at baseline, there was a positive association between circadian rhythm and the size of participants' social support networks at baseline (r=0.22; P=.03; R2=0.049). In participants with bipolar disorder, circadian rhythm was associated with a change in anxiety from baseline; a higher circadian rhythm was associated with an increase in anxiety and a lower circadian rhythm was associated with a decrease in anxiety at time point 5. Conclusions: Circadian rhythm, which was extracted from smartphone GPS data, was associated with social support and predicted changes in anxiety in a clinical sample of adults with mood disorders. Larger studies are required for further validations. However, smartphone sensing may have the potential to monitor early symptoms of mood disorders. ", doi="10.2196/35549", url="https://mental.jmir.org/2022/5/e35549", url="http://www.ncbi.nlm.nih.gov/pubmed/35507385" } @Article{info:doi/10.2196/35928, author="Harvey, Daisy and Lobban, Fiona and Rayson, Paul and Warner, Aaron and Jones, Steven", title="Natural Language Processing Methods and Bipolar Disorder: Scoping Review", journal="JMIR Ment Health", year="2022", month="Apr", day="22", volume="9", number="4", pages="e35928", keywords="bipolar disorder", keywords="mental health", keywords="mental illness", keywords="natural language processing", keywords="computational linguistics", abstract="Background: Health researchers are increasingly using natural language processing (NLP) to study various mental health conditions using both social media and electronic health records (EHRs). There is currently no published synthesis that relates specifically to the use of NLP methods for bipolar disorder, and this scoping review was conducted to synthesize valuable insights that have been presented in the literature. Objective: This scoping review explored how NLP methods have been used in research to better understand bipolar disorder and identify opportunities for further use of these methods. Methods: A systematic, computerized search of index and free-text terms related to bipolar disorder and NLP was conducted using 5 databases and 1 anthology: MEDLINE, PsycINFO, Academic Search Ultimate, Scopus, Web of Science Core Collection, and the ACL Anthology. Results: Of 507 identified studies, a total of 35 (6.9\%) studies met the inclusion criteria. A narrative synthesis was used to describe the data, and the studies were grouped into four objectives: prediction and classification (n=25), characterization of the language of bipolar disorder (n=13), use of EHRs to measure health outcomes (n=3), and use of EHRs for phenotyping (n=2). Ethical considerations were reported in 60\% (21/35) of the studies. Conclusions: The current literature demonstrates how language analysis can be used to assist in and improve the provision of care for people living with bipolar disorder. Individuals with bipolar disorder and the medical community could benefit from research that uses NLP to investigate risk-taking, web-based services, social and occupational functioning, and the representation of gender in bipolar disorder populations on the web. Future research that implements NLP methods to study bipolar disorder should be governed by ethical principles, and any decisions regarding the collection and sharing of data sets should ultimately be made on a case-by-case basis, considering the risk to the data participants and whether their privacy can be ensured. ", doi="10.2196/35928", url="https://mental.jmir.org/2022/4/e35928", url="http://www.ncbi.nlm.nih.gov/pubmed/35451984" } @Article{info:doi/10.2196/34105, author="Newson, Jane Jennifer and Pastukh, Vladyslav and Thiagarajan, C. Tara", title="Assessment of Population Well-being With the Mental Health Quotient: Validation Study", journal="JMIR Ment Health", year="2022", month="Apr", day="20", volume="9", number="4", pages="e34105", keywords="psychiatry", keywords="public health", keywords="methods", keywords="mental health", keywords="population health", keywords="social determinants of health", keywords="global health", keywords="behavioral symptoms", keywords="diagnosis", keywords="symptom assessment", keywords="psychopathology", keywords="mental disorders", keywords="mHealth", keywords="depression", keywords="anxiety", keywords="attention deficit disorder with hyperactivity", keywords="autistic disorder", keywords="internet", abstract="Background: The Mental Health Quotient (MHQ) is an anonymous web-based assessment of mental health and well-being that comprehensively covers symptoms across 10 major psychiatric disorders, as well as positive elements of mental function. It uses a novel life impact scale and provides a score to the individual that places them on a spectrum from Distressed to Thriving along with a personal report that offers self-care recommendations. Since April 2020, the MHQ has been freely deployed as part of the Mental Health Million Project. Objective: This paper demonstrates the reliability and validity of the MHQ, including the construct validity of the life impact scale, sample and test-retest reliability of the assessment, and criterion validation of the MHQ with respect to clinical burden and productivity loss. Methods: Data were taken from the Mental Health Million open-access database (N=179,238) and included responses from English-speaking adults (aged?18 years) from the United States, Canada, the United Kingdom, Ireland, Australia, New Zealand, South Africa, Singapore, India, and Nigeria collected during 2021. To assess sample reliability, random demographically matched samples (each 11,033/179,238, 6.16\%) were compared within the same 6-month period. Test-retest reliability was determined using the subset of individuals who had taken the assessment twice ?3 days apart (1907/179,238, 1.06\%). To assess the construct validity of the life impact scale, additional questions were asked about the frequency and severity of an example symptom (feelings of sadness, distress, or hopelessness; 4247/179,238, 2.37\%). To assess criterion validity, elements rated as having a highly negative life impact by a respondent (equivalent to experiencing the symptom ?5 days a week) were mapped to clinical diagnostic criteria to calculate the clinical burden (174,618/179,238, 97.42\%). In addition, MHQ scores were compared with the number of workdays missed or with reduced productivity in the past month (7625/179,238, 4.25\%). Results: Distinct samples collected during the same period had indistinguishable MHQ distributions and MHQ scores were correlated with r=0.84 between retakes within an 8- to 120-day period. Life impact ratings were correlated with frequency and severity of symptoms, with a clear linear relationship (R2>0.99). Furthermore, the aggregate MHQ scores were systematically related to both clinical burden and productivity. At one end of the scale, 89.08\% (8986/10,087) of those in the Distressed category mapped to one or more disorders and had an average productivity loss of 15.2 (SD 11.2; SEM [standard error of measurement] 0.5) days per month. In contrast, at the other end of the scale, 0\% (1/24,365) of those in the Thriving category mapped to any of the 10 disorders and had an average productivity loss of 1.3 (SD 3.6; SEM 0.1) days per month. Conclusions: The MHQ is a valid and reliable assessment of mental health and well-being when delivered anonymously on the web. ", doi="10.2196/34105", url="https://mental.jmir.org/2022/4/e34105", url="http://www.ncbi.nlm.nih.gov/pubmed/35442210" } @Article{info:doi/10.2196/34513, author="Yuan, Jing and Au, Rhoda and Karjadi, Cody and Ang, Fang Ting and Devine, Sherral and Auerbach, Sanford and DeCarli, Charles and Libon, J. David and Mez, Jesse and Lin, Honghuang", title="Associations Between the Digital Clock Drawing Test and Brain Volume: Large Community-Based Prospective Cohort (Framingham Heart Study)", journal="J Med Internet Res", year="2022", month="Apr", day="15", volume="24", number="4", pages="e34513", keywords="Clock Drawing Test", keywords="digital", keywords="neuropsychological test", keywords="cognitive", keywords="technology", keywords="Boston Process Approach", keywords="neurology", keywords="Framingham Heart Study", keywords="dementia", keywords="Alzheimer", abstract="Background: The digital Clock Drawing Test (dCDT) has been recently used as a more objective tool to assess cognition. However, the association between digitally obtained clock drawing features and structural neuroimaging measures has not been assessed in large population-based studies. Objective: We aimed to investigate the association between dCDT features and brain volume. Methods: This study included participants from the Framingham Heart Study who had both a dCDT and magnetic resonance imaging (MRI) scan, and were free of dementia or stroke. Linear regression models were used to assess the association between 18 dCDT composite scores (derived from 105 dCDT raw features) and brain MRI measures, including total cerebral brain volume (TCBV), cerebral white matter volume, cerebral gray matter volume, hippocampal volume, and white matter hyperintensity (WMH) volume. Classification models were also built from clinical risk factors, dCDT composite scores, and MRI measures to distinguish people with mild cognitive impairment (MCI) from those whose cognition was intact. Results: A total of 1656 participants were included in this study (mean age 61 years, SD 13 years; 50.9\% women), with 23 participants diagnosed with MCI. All dCDT composite scores were associated with TCBV after adjusting for multiple testing (P value <.05/18). Eleven dCDT composite scores were associated with cerebral white matter volume, but only 1 dCDT composite score was associated with cerebral gray matter volume. None of the dCDT composite scores was associated with hippocampal volume or WMH volume. The classification model for differentiating MCI and normal cognition participants, which incorporated age, sex, education, MRI measures, and dCDT composite scores, showed an area under the curve of 0.897. Conclusions: dCDT composite scores were significantly associated with multiple brain MRI measures in a large community-based cohort. The dCDT has the potential to be used as a cognitive assessment tool in the clinical diagnosis of MCI. ", doi="10.2196/34513", url="https://www.jmir.org/2022/4/e34513", url="http://www.ncbi.nlm.nih.gov/pubmed/35436225" } @Article{info:doi/10.2196/36825, author="Ye, Siao and Sun, Kevin and Huynh, Duong and Phi, Q. Huy and Ko, Brian and Huang, Bin and Hosseini Ghomi, Reza", title="A Computerized Cognitive Test Battery for Detection of Dementia and Mild Cognitive Impairment: Instrument Validation Study", journal="JMIR Aging", year="2022", month="Apr", day="15", volume="5", number="2", pages="e36825", keywords="cognitive test", keywords="mild cognitive impairment", keywords="dementia", keywords="cognitive decline", keywords="repeatable battery", keywords="discriminant analysis", abstract="Background: Early detection of dementia is critical for intervention and care planning but remains difficult. Computerized cognitive testing provides an accessible and promising solution to address these current challenges. Objective: The aim of this study was to evaluate a computerized cognitive testing battery (BrainCheck) for its diagnostic accuracy and ability to distinguish the severity of cognitive impairment. Methods: A total of 99 participants diagnosed with dementia, mild cognitive impairment (MCI), or normal cognition (NC) completed the BrainCheck battery. Statistical analyses compared participant performances on BrainCheck based on their diagnostic group. Results: BrainCheck battery performance showed significant differences between the NC, MCI, and dementia groups, achieving 88\% or higher sensitivity and specificity (ie, true positive and true negative rates) for separating dementia from NC, and 77\% or higher sensitivity and specificity in separating the MCI group from the NC and dementia groups. Three-group classification found true positive rates of 80\% or higher for the NC and dementia groups and true positive rates of 64\% or higher for the MCI group. Conclusions: BrainCheck was able to distinguish between diagnoses of dementia, MCI, and NC, providing a potentially reliable tool for early detection of cognitive impairment. ", doi="10.2196/36825", url="https://aging.jmir.org/2022/2/e36825", url="http://www.ncbi.nlm.nih.gov/pubmed/35436212" } @Article{info:doi/10.2196/31006, author="Zhou, Joanne and Lamichhane, Bishal and Ben-Zeev, Dror and Campbell, Andrew and Sano, Akane", title="Predicting Psychotic Relapse in Schizophrenia With Mobile Sensor Data: Routine Cluster Analysis", journal="JMIR Mhealth Uhealth", year="2022", month="Apr", day="11", volume="10", number="4", pages="e31006", keywords="schizophrenia", keywords="psychotic relapse", keywords="machine learning", keywords="clustering", keywords="mobile phone", keywords="routine", keywords="Gaussian mixture models", keywords="partition around medoids", keywords="dynamic time warping", keywords="balanced random forest", abstract="Background: Behavioral representations obtained from mobile sensing data can be helpful for the prediction of an oncoming psychotic relapse in patients with schizophrenia and the delivery of timely interventions to mitigate such relapse. Objective: In this study, we aim to develop clustering models to obtain behavioral representations from continuous multimodal mobile sensing data for relapse prediction tasks. The identified clusters can represent different routine behavioral trends related to daily living of patients and atypical behavioral trends associated with impending relapse. Methods: We used the mobile sensing data obtained from the CrossCheck project for our analysis. Continuous data from six different mobile sensing-based modalities (ambient light, sound, conversation, acceleration, etc) obtained from 63 patients with schizophrenia, each monitored for up to a year, were used for the clustering models and relapse prediction evaluation. Two clustering models, Gaussian mixture model (GMM) and partition around medoids (PAM), were used to obtain behavioral representations from the mobile sensing data. These models have different notions of similarity between behaviors as represented by the mobile sensing data, and thus, provide different behavioral characterizations. The features obtained from the clustering models were used to train and evaluate a personalized relapse prediction model using balanced random forest. The personalization was performed by identifying optimal features for a given patient based on a personalization subset consisting of other patients of similar age. Results: The clusters identified using the GMM and PAM models were found to represent different behavioral patterns (such as clusters representing sedentary days, active days but with low communication, etc). Although GMM-based models better characterized routine behaviors by discovering dense clusters with low cluster spread, some other identified clusters had a larger cluster spread, likely indicating heterogeneous behavioral characterizations. On the other hand, PAM model-based clusters had lower variability of cluster spread, indicating more homogeneous behavioral characterization in the obtained clusters. Significant changes near the relapse periods were observed in the obtained behavioral representation features from the clustering models. The clustering model-based features, together with other features characterizing the mobile sensing data, resulted in an F2 score of 0.23 for the relapse prediction task in a leave-one-patient-out evaluation setting. The obtained F2 score was significantly higher than that of a random classification baseline with an average F2 score of 0.042. Conclusions: Mobile sensing can capture behavioral trends using different sensing modalities. Clustering of the daily mobile sensing data may help discover routine and atypical behavioral trends. In this study, we used GMM-based and PAM-based cluster models to obtain behavioral trends in patients with schizophrenia. The features derived from the cluster models were found to be predictive for detecting an oncoming psychotic relapse. Such relapse prediction models can be helpful in enabling timely interventions. ", doi="10.2196/31006", url="https://mhealth.jmir.org/2022/4/e31006", url="http://www.ncbi.nlm.nih.gov/pubmed/35404256" } @Article{info:doi/10.2196/21111, author="Wiegersma, Sytske and Hidajat, Maurice and Schrieken, Bart and Veldkamp, Bernard and Olff, Miranda", title="Improving Web-Based Treatment Intake for Multiple Mental and Substance Use Disorders by Text Mining and Machine Learning: Algorithm Development and Validation", journal="JMIR Ment Health", year="2022", month="Apr", day="11", volume="9", number="4", pages="e21111", keywords="supervised text classification", keywords="multi-class classification", keywords="screening", keywords="mental health disorders", keywords="computerized CBT", keywords="automated intake and referral", abstract="Background: Text mining and machine learning are increasingly used in mental health care practice and research, potentially saving time and effort in the diagnosis and monitoring of patients. Previous studies showed that mental disorders can be detected based on text, but they focused on screening for a single predefined disorder instead of multiple disorders simultaneously. Objective: The aim of this study is to develop a Dutch multi-class text-classification model to screen for a range of mental disorders to refer new patients to the most suitable treatment. Methods: On the basis of textual responses of patients (N=5863) to a questionnaire currently used for intake and referral, a 7-class classifier was developed to distinguish among anxiety, panic, posttraumatic stress, mood, eating, substance use, and somatic symptom disorders. A linear support vector machine was fitted using nested cross-validation grid search. Results: The highest classification rate was found for eating disorders (82\%). The scores for panic (55\%), posttraumatic stress (52\%), mood (50\%), somatic symptom (50\%), anxiety (35\%), and substance use disorders (33\%) were lower, likely because of overlapping symptoms. The overall classification accuracy (49\%) was reasonable for a 7-class classifier. Conclusions: A classification model was developed that could screen text for multiple mental health disorders. The screener resulted in an additional outcome score that may serve as input for a formal diagnostic interview and referral. This may lead to a more efficient and standardized intake process. ", doi="10.2196/21111", url="https://mental.jmir.org/2022/4/e21111", url="http://www.ncbi.nlm.nih.gov/pubmed/35404261" } @Article{info:doi/10.2196/32824, author="Martin-Key, A. Nayra and Spadaro, Benedetta and Funnell, Erin and Barker, Jane Eleanor and Schei, Sofie Thea and Tomasik, Jakub and Bahn, Sabine", title="The Current State and Validity of Digital Assessment Tools for Psychiatry: Systematic Review", journal="JMIR Ment Health", year="2022", month="Mar", day="30", volume="9", number="3", pages="e32824", keywords="diagnostic accuracy", keywords="digital mental health", keywords="digital questionnaire", keywords="psychiatry", keywords="systematic review", abstract="Background: Given the role digital technologies are likely to play in the future of mental health care, there is a need for a comprehensive appraisal of the current state and validity (ie, screening or diagnostic accuracy) of digital mental health assessments. Objective: The aim of this review is to explore the current state and validity of question-and-answer--based digital tools for diagnosing and screening psychiatric conditions in adults. Methods: This systematic review was based on the Population, Intervention, Comparison, and Outcome framework and was carried out in accordance with the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) guidelines. MEDLINE, Embase, Cochrane Library, ASSIA, Web of Science Core Collection, CINAHL, and PsycINFO were systematically searched for articles published between 2005 and 2021. A descriptive evaluation of the study characteristics and digital solutions and a quantitative appraisal of the screening or diagnostic accuracy of the included tools were conducted. Risk of bias and applicability were assessed using the revised tool for the Quality Assessment of Diagnostic Accuracy Studies 2. Results: A total of 28 studies met the inclusion criteria, with the most frequently evaluated conditions encompassing generalized anxiety disorder, major depressive disorder, and any depressive disorder. Most of the studies used digitized versions of existing pen-and-paper questionnaires, with findings revealing poor to excellent screening or diagnostic accuracy (sensitivity=0.32-1.00, specificity=0.37-1.00, area under the receiver operating characteristic curve=0.57-0.98) and a high risk of bias for most of the included studies. Conclusions: The field of digital mental health tools is in its early stages, and high-quality evidence is lacking. International Registered Report Identifier (IRRID): RR2-10.2196/25382 ", doi="10.2196/32824", url="https://mental.jmir.org/2022/3/e32824", url="http://www.ncbi.nlm.nih.gov/pubmed/35353053" } @Article{info:doi/10.2196/34300, author="Marcu, Gabriela and Ondersma, J. Steven and Spiller, N. Allison and Broderick, M. Brianna and Kadri, Reema and Buis, R. Lorraine", title="The Perceived Benefits of Digital Interventions for Behavioral Health: Qualitative Interview Study", journal="J Med Internet Res", year="2022", month="Mar", day="30", volume="24", number="3", pages="e34300", keywords="computers", keywords="mobile apps", keywords="screening", keywords="brief interventions", keywords="diagnosis", keywords="computer-assisted/methods", keywords="surveys and questionnaires", keywords="motivational interviewing", keywords="therapy", keywords="implementation", keywords="qualitative", keywords="mobile phone", abstract="Background: Digital interventions have gained momentum in terms of behavioral health. However, owing to lacking standard approaches or tools for creating digital behavioral interventions, clinical researchers follow widely varying conceptions of how best to go about digital intervention development. Researchers also face significant cost-, time-, and expertise-related challenges in digital intervention development. Improving the availability of tools and guidance for researchers will require a thorough understanding of the motivations and needs of researchers seeking to create digital interventions. Objective: This study aims to understand the perceptions of behavioral researchers toward digital interventions, and inform the use of these interventions, by documenting the reasons why researchers are increasingly focusing their efforts on digital interventions and their perspectives on the perceived benefits that digital approaches can provide for researchers and intervention recipients. Methods: We conducted semistructured qualitative interviews with 18 researchers who had experience designing digital behavioral interventions or running studies with them. A convenience sample of interviewees was recruited from among users of the Computerized Intervention Authoring System platform, a web-based tool that facilitates the process of creating and deploying digital interventions in behavioral research. Interviews were conducted over teleconference between February and April 2020. Recordings from the interviews were transcribed and thematically analyzed by multiple coders. Results: Interviews were completed with 18 individuals and lasted between 24 and 65 (mean 46.9, SD 11.3) minutes. Interviewees were predominantly female (17/18, 94\%) and represented different job roles, ranging from researcher to project or study staff. Four major themes came out of the interviews concerning the benefits of digital interventions for behavioral health: convenience and flexibility for interventionists and recipients, support for implementing evidence-based interventions with fidelity, scaling and improving access to interventions, and getting a foot in the door despite stigma and disenfranchisement. Conclusions: Interviewees described a number of important potential benefits of digital interventions, particularly with respect to scientific rigor, scalability, and overcoming barriers to reaching more people. There are complex considerations with regard to translating behavior change strategies into digital forms of delivery, and interventionists make individual, sometimes unexpected, choices with minimal evidence of their relative effectiveness. Future research should investigate how behavioral researchers can be supported in making these choices toward usability, ease of access, and approachability of digital interventions. Our study underscores the need for authoring platforms that can facilitate the process of creating and deploying digital interventions to reach their full potential for interventionists and recipients alike. ", doi="10.2196/34300", url="https://www.jmir.org/2022/3/e34300", url="http://www.ncbi.nlm.nih.gov/pubmed/35353045" } @Article{info:doi/10.2196/31209, author="Kuleindiren, Narayan and Rifkin-Zybutz, Paul Raphael and Johal, Monika and Selim, Hamzah and Palmon, Itai and Lin, Aaron and Yu, Yizhou and Alim-Marvasti, Ali and Mahmud, Mohammad", title="Optimizing Existing Mental Health Screening Methods in a Dementia Screening and Risk Factor App: Observational Machine Learning Study", journal="JMIR Form Res", year="2022", month="Mar", day="22", volume="6", number="3", pages="e31209", keywords="depression", keywords="anxiety", keywords="screening", keywords="research method", keywords="questionnaire", keywords="precision", keywords="dementia", keywords="cognition", keywords="risk factors", keywords="machine learning", keywords="prediction", abstract="Background: Mindstep is an app that aims to improve dementia screening by assessing cognition and risk factors. It considers important clinical risk factors, including prodromal symptoms, mental health disorders, and differential diagnoses of dementia. The 9-item Patient Health Questionnaire for depression (PHQ-9) and the 7-item Generalized Anxiety Disorder Scale (GAD-7) are widely validated and commonly used scales used in screening for depression and anxiety disorders, respectively. Shortened versions of both (PHQ-2/GAD-2) have been produced. Objective: We sought to develop a method that maintained the brevity of these shorter questionnaires while maintaining the better precision of the original questionnaires. Methods: Single questions were designed to encompass symptoms covered in the original questionnaires. Answers to these questions were combined with PHQ-2/GAD-2, and anonymized risk factors were collected by Mindset4Dementia from 2235 users. Machine learning models were trained to use these single questions in combination with data already collected by the app: age, response to a joke, and reporting of functional impairment to predict binary and continuous outcomes as measured using PHQ-9/GAD-7. Our model was developed with a training data set by using 10-fold cross-validation and a holdout testing data set and compared to results from using the shorter questionnaires (PHQ-2/GAD-2) alone to benchmark performance. Results: We were able to achieve superior performance in predicting PHQ-9/GAD-7 screening cutoffs compared to PHQ-2 (difference in area under the curve 0.04, 95\% CI 0.00-0.08, P=.02) but not GAD-2 (difference in area under the curve 0.00, 95\% CI --0.02 to 0.03, P=.42). Regression models were able to accurately predict total questionnaire scores in PHQ-9 (R2=0.655, mean absolute error=2.267) and GAD-7 (R2=0.837, mean absolute error=1.780). Conclusions: We app-adapted PHQ-4 by adding brief summary questions about factors normally covered in the longer questionnaires. We additionally trained machine learning models that used the wide range of additional information already collected in Mindstep to make a short app-based screening tool for affective disorders, which appears to have superior or equivalent performance to well-established methods. ", doi="10.2196/31209", url="https://formative.jmir.org/2022/3/e31209", url="http://www.ncbi.nlm.nih.gov/pubmed/35315786" } @Article{info:doi/10.2196/23589, author="Behrens, Anders and Berglund, Sanmartin Johan and Anderberg, Peter", title="CoGNIT Automated Tablet Computer Cognitive Testing in Patients With Mild Cognitive Impairment: Feasibility Study", journal="JMIR Form Res", year="2022", month="Mar", day="11", volume="6", number="3", pages="e23589", keywords="internet", keywords="cognitive testing", keywords="software", keywords="testing", keywords="impairment", keywords="cognition", keywords="feasibility", keywords="diagnosis", keywords="app", keywords="assessment", keywords="cognitive impairment", abstract="Background: Early diagnosis of cognitive disorders is becoming increasingly important. Limited resources for specialist assessment and an increasing demographical challenge warrants the need for efficient methods of evaluation. In response, CoGNIT, a tablet app for automatic, standardized, and efficient assessment of cognitive function, was developed. Included tests span the cognitive domains regarded as important for assessment in a general memory clinic (memory, language, psychomotor speed, executive function, attention, visuospatial ability, manual dexterity, and symptoms of depression). Objective: The aim of this study was to assess the feasibility of automatic cognitive testing with CoGNIT in older patients with symptoms of mild cognitive impairment (MCI). Methods: Patients older than 55 years with symptoms of MCI (n=36) were recruited at the research clinic at the Blekinge Institute of Technology (BTH), Karlskrona, Sweden. A research nurse administered the Mini-Mental State Exam (MMSE) and the CoGNIT app on a tablet computer. Technical and testing issues were documented. Results: The test battery was completed by all 36 patients. One test, the four-finger--tapping test, was performed incorrectly by 42\% of the patients. Issues regarding clarity of instructions were found in 2 tests (block design test and the one finger-tapping test). Minor software bugs were identified. Conclusions: The overall feasibility of automatic cognitive testing with the CoGNIT app in patients with symptoms of MCI was good. The study highlighted tests that did not function optimally. The four-finger--tapping test will be discarded, and minor improvements to the software will be added before further studies and deployment in the clinic. ", doi="10.2196/23589", url="https://formative.jmir.org/2022/3/e23589", url="http://www.ncbi.nlm.nih.gov/pubmed/35275064" } @Article{info:doi/10.2196/31106, author="Cheah, Wen-Ting and Hwang, Jwu-Jia and Hong, Sheng-Yi and Fu, Li-Chen and Chang, Yu-Ling and Chen, Ta-Fu and Chen, I-An and Chou, Chun-Chen", title="A Digital Screening System for Alzheimer Disease Based on a Neuropsychological Test and a Convolutional Neural Network: System Development and Validation", journal="JMIR Med Inform", year="2022", month="Mar", day="9", volume="10", number="3", pages="e31106", keywords="Alzheimer disease", keywords="mild cognitive impairment", keywords="screening system", keywords="convolutional neural network", keywords="Rey-Osterrieth Complex Figure", abstract="Background: Alzheimer disease (AD) and other types of dementia are now considered one of the world's most pressing health problems for aging people worldwide. It was the seventh-leading cause of death, globally, in 2019. With a growing number of patients with dementia and increasing costs for treatment and care, early detection of the disease at the stage of mild cognitive impairment (MCI) will prevent the rapid progression of dementia. In addition to reducing the physical and psychological stress of patients' caregivers in the long term, it will also improve the everyday quality of life of patients. Objective: The aim of this study was to design a digital screening system to discriminate between patients with MCI and AD and healthy controls (HCs), based on the Rey-Osterrieth Complex Figure (ROCF) neuropsychological test. Methods: The study took place at National Taiwan University between 2018 and 2019. In order to develop the system, pretraining was performed using, and features were extracted from, an open sketch data set using a data-driven deep learning approach through a convolutional neural network. Later, the learned features were transferred to our collected data set to further train the classifier. The first data set was collected using pen and paper for the traditional method. The second data set used a tablet and smart pen for data collection. The system's performance was then evaluated using the data sets. Results: The performance of the designed system when using the data set that was collected using the traditional pen and paper method resulted in a mean area under the receiver operating characteristic curve (AUROC) of 0.913 (SD 0.004) when distinguishing between patients with MCI and HCs. On the other hand, when discriminating between patients with AD and HCs, the mean AUROC was 0.950 (SD 0.003) when using the data set that was collected using the digitalized method. Conclusions: The automatic ROCF test scoring system that we designed showed satisfying results for differentiating between patients with AD and MCI and HCs. Comparatively, our proposed network architecture provided better performance than our previous work, which did not include data augmentation and dropout techniques. In addition, it also performed better than other existing network architectures, such as AlexNet and Sketch-a-Net, with transfer learning techniques. The proposed system can be incorporated with other tests to assist clinicians in the early diagnosis of AD and to reduce the physical and mental burden on patients' family and friends. ", doi="10.2196/31106", url="https://medinform.jmir.org/2022/3/e31106", url="http://www.ncbi.nlm.nih.gov/pubmed/35262497" } @Article{info:doi/10.2196/28333, author="Ferrario, Andrea and Luo, Minxia and Polsinelli, J. Angelina and Moseley, A. Suzanne and Mehl, R. Matthias and Yordanova, Kristina and Martin, Mike and Demiray, Burcu", title="Predicting Working Memory in Healthy Older Adults Using Real-Life Language and Social Context Information: A Machine Learning Approach", journal="JMIR Aging", year="2022", month="Mar", day="8", volume="5", number="1", pages="e28333", keywords="cognitive aging", keywords="language complexity", keywords="social context", keywords="machine learning", keywords="natural language processing", keywords="Electronically Activated Recorder (EAR)", keywords="behavioral indicators", abstract="Background: Language use and social interactions have demonstrated a close relationship with cognitive measures. It is important to improve the understanding of language use and behavioral indicators from social context to study the early prediction of cognitive decline among healthy populations of older adults. Objective: This study aimed at predicting an important cognitive ability, working memory, of 98 healthy older adults participating in a 4-day-long naturalistic observation study. We used linguistic measures, part-of-speech (POS) tags, and social context information extracted from 7450 real-life audio recordings of their everyday conversations. Methods: The methods in this study comprise (1) the generation of linguistic measures, representing idea density, vocabulary richness, and grammatical complexity, as well as POS tags with natural language processing (NLP) from the transcripts of real-life conversations and (2) the training of machine learning models to predict working memory using linguistic measures, POS tags, and social context information. We measured working memory using (1) the Keep Track test, (2) the Consonant Updating test, and (3) a composite score based on the Keep Track and Consonant Updating tests. We trained machine learning models using random forest, extreme gradient boosting, and light gradient boosting machine algorithms, implementing repeated cross-validation with different numbers of folds and repeats and recursive feature elimination to avoid overfitting. Results: For all three prediction routines, models comprising linguistic measures, POS tags, and social context information improved the baseline performance on the validation folds. The best model for the Keep Track prediction routine comprised linguistic measures, POS tags, and social context variables. The best models for prediction of the Consonant Updating score and the composite working memory score comprised POS tags only. Conclusions: The results suggest that machine learning and NLP may support the prediction of working memory using, in particular, linguistic measures and social context information extracted from the everyday conversations of healthy older adults. Our findings may support the design of an early warning system to be used in longitudinal studies that collects cognitive ability scores and records real-life conversations unobtrusively. This system may support the timely detection of early cognitive decline. In particular, the use of a privacy-sensitive passive monitoring technology would allow for the design of a program of interventions to enable strategies and treatments to decrease or avoid early cognitive decline. ", doi="10.2196/28333", url="https://aging.jmir.org/2022/1/e28333", url="http://www.ncbi.nlm.nih.gov/pubmed/35258457" } @Article{info:doi/10.2196/34237, author="Myers, Rae Jennifer and Glenn, M. Jordan and Madero, N. Erica and Anderson, John and Mak-McCully, Rachel and Gray, Michelle and Gills, L. Joshua and Harrison, E. John", title="Asynchronous Remote Assessment for Cognitive Impairment: Reliability Verification of the Neurotrack Cognitive Battery", journal="JMIR Form Res", year="2022", month="Feb", day="18", volume="6", number="2", pages="e34237", keywords="cognition", keywords="screening", keywords="remote testing", keywords="psychometric", keywords="challenge", keywords="validation", keywords="assessment", keywords="impairment", keywords="access", keywords="reliability", keywords="stability", keywords="testing", keywords="utility", abstract="Background: As evidenced by the further reduction in access to testing during the COVID-19 pandemic, there is an urgent, growing need for remote cognitive assessment for individuals with cognitive impairment. The Neurotrack Cognitive Battery (NCB), our response to this need, was evaluated for its temporal reliability and stability as part of ongoing validation testing. Objective: The aim of this study is to assess the temporal reliability of the NCB tests (5 total) across a 1-week period and to determine the temporal stability of these measures across 3 consecutive administrations in a single day. Methods: For test-retest reliability, a range of 29-66 cognitively healthy participants (ages 18-68 years) completed each cognitive assessment twice, 1 week apart. In a separate study, temporal stability was assessed using data collected from 31 different cognitively healthy participants at 3 consecutive timepoints in a single day. Results: Correlations for the assessments were between 0.72 and 0.83, exceeding the standard acceptable threshold of 0.70 for temporal reliability. Intraclass correlations ranged from 0.60 to 0.84, indicating moderate to good temporal stability. Conclusions: These results highlight the NCB as a brief, easy-to-administer, and reliable assessment for remote cognitive testing. Additional validation research is underway to determine the full magnitude of the clinical utility of the NCB. ", doi="10.2196/34237", url="https://formative.jmir.org/2022/2/e34237", url="http://www.ncbi.nlm.nih.gov/pubmed/35179511" } @Article{info:doi/10.2196/33585, author="MacDonald, J. James and Baxter-King, Ryan and Vavreck, Lynn and Naeim, Arash and Wenger, Neil and Sepucha, Karen and Stanton, L. Annette", title="Depressive Symptoms and Anxiety During the COVID-19 Pandemic: Large, Longitudinal, Cross-sectional Survey", journal="JMIR Ment Health", year="2022", month="Feb", day="10", volume="9", number="2", pages="e33585", keywords="COVID-19", keywords="depression", keywords="anxiety", keywords="pandemic", keywords="mental health", keywords="public health", keywords="psychological variables", keywords="younger adults", keywords="symptom monitoring", keywords="health intervention", abstract="Background: The COVID-19 pandemic has influenced the mental health of millions across the globe. Understanding factors associated with depressive symptoms and anxiety across 12 months of the pandemic can help identify groups at higher risk and psychological processes that can be targeted to mitigate the long-term mental health impact of the pandemic. Objective: This study aims to determine sociodemographic features, COVID-19-specific factors, and general psychological variables associated with depressive symptoms and anxiety over 12 months of the pandemic. Methods: Nationwide, cross-sectional electronic surveys were implemented in May (n=14,636), July (n=14,936), October (n=14,946), and December (n=15,265) 2020 and March/April 2021 (n=14,557) in the United States. Survey results were weighted to be representative of the US population. The samples were drawn from a market research platform, with a 69\% cooperation rate. Surveys assessed depressive symptoms in the past 2 weeks and anxiety in the past week, as well as sociodemographic features; COVID-19 restriction stress, worry, perceived risk, coping strategies, and exposure; intolerance of uncertainty; and loneliness. Results: Across 12 months, an average of 24\% of respondents reported moderate-to-severe depressive symptoms and 32\% reported moderate-to-severe anxiety. Of the sociodemographic variables, age was most consistently associated with depressive symptoms and anxiety, with younger adults more likely to report higher levels of those outcomes. Intolerance of uncertainty and loneliness were consistently and strongly associated with the outcomes. Of the COVID-19-specific variables, stress from COVID-19 restrictions, worry about COVID-19, coping behaviors, and having COVID-19 were associated with a higher likelihood of depressive symptoms and anxiety. Conclusions: Depressive symptoms and anxiety were high in younger adults, adults who reported restriction stress or worry about COVID-19 or who had had COVID-19, and those with intolerance of uncertainty and loneliness. Symptom monitoring as well as early and accessible intervention are recommended. ", doi="10.2196/33585", url="https://mental.jmir.org/2022/2/e33585", url="http://www.ncbi.nlm.nih.gov/pubmed/35142619" } @Article{info:doi/10.2196/32368, author="Vlake, H. Johan and van Bommel, Jasper and Wils, Evert-Jan and Bienvenu, Joe and Hellemons, E. Merel and Korevaar, IM Tim and Schut, FC Anna and Labout, AM Joost and Schreuder, LH Lois and van Bavel, P. Marten and Gommers, Diederik and van Genderen, E. Michel", title="Intensive Care Unit--Specific Virtual Reality for Critically Ill Patients With COVID-19: Multicenter Randomized Controlled Trial", journal="J Med Internet Res", year="2022", month="Jan", day="31", volume="24", number="1", pages="e32368", keywords="SARS-CoV-2", keywords="intensive care", keywords="post-intensive care syndrome", keywords="virtual reality", keywords="quality of life", keywords="satisfaction", keywords="COVID-19", abstract="Background: Although psychological sequelae after intensive care unit (ICU) treatment are considered quite intrusive, robustly effective interventions to treat or prevent these long-term sequelae are lacking. Recently, it was demonstrated that ICU-specific virtual reality (ICU-VR) is a feasible and acceptable intervention with potential mental health benefits. However, its effect on mental health and ICU aftercare in COVID-19 ICU survivors is unknown. Objective: This study aimed to explore the effects of ICU-VR on mental health and on patients' perceived quality of, satisfaction with, and rating of ICU aftercare among COVID-19 ICU survivors. Methods: This was a multicenter randomized controlled trial. Patients were randomized to either the ICU-VR (intervention) or the control group. All patients were invited to an COVID-19 post-ICU follow-up clinic 3 months after hospital discharge, during which patients in the intervention group received ICU-VR. One month and 3 months later (4 and 6 months after hospital discharge), mental health, quality of life, perceived quality, satisfaction with, and rating of ICU aftercare were scored using questionnaires. Results: Eighty-nine patients (median age 58 years; 63 males, 70\%) were included. The prevalence and severity of psychological distress were limited throughout follow-up, and no differences in psychological distress or quality of life were observed between the groups. ICU-VR improved satisfaction with (mean score 8.7, SD 1.6 vs 7.6, SD 1.6 [ICU-VR vs control]; t64=--2.82, P=.006) and overall rating of ICU aftercare (mean overall rating of aftercare 8.9, SD 0.9 vs 7.8, SD 1.7 [ICU-VR vs control]; t64=--3.25; P=.002) compared to controls. ICU-VR added to the quality of ICU aftercare according to 81\% of the patients, and all patients would recommend ICU-VR to other ICU survivors. Conclusions: ICU-VR is a feasible and acceptable innovative method to improve satisfaction with and rating of ICU aftercare and adds to its perceived quality. We observed a low prevalence of psychological distress after ICU treatment for COVID-19, and ICU-VR did not improve psychological recovery or quality of life. Future research is needed to confirm our results in other critical illness survivors to potentially facilitate ICU-VR's widespread availability and application during follow-up. Trial Registration: Netherlands Trial Register NL8835; https://www.trialregister.nl/trial/8835 International Registered Report Identifier (IRRID): RR2-10.1186/s13063-021-05271-z ", doi="10.2196/32368", url="https://www.jmir.org/2022/1/e32368", url="http://www.ncbi.nlm.nih.gov/pubmed/34978530" } @Article{info:doi/10.2196/32832, author="Hennemann, Severin and Kuhn, Sebastian and Witth{\"o}ft, Michael and Jungmann, M. Stefanie", title="Diagnostic Performance of an App-Based Symptom Checker in Mental Disorders: Comparative Study in Psychotherapy Outpatients", journal="JMIR Ment Health", year="2022", month="Jan", day="31", volume="9", number="1", pages="e32832", keywords="mHealth", keywords="symptom checker", keywords="diagnostics", keywords="mental disorders", keywords="psychotherapy", keywords="mobile phone", abstract="Background: Digital technologies have become a common starting point for health-related information-seeking. Web- or app-based symptom checkers aim to provide rapid and accurate condition suggestions and triage advice but have not yet been investigated for mental disorders in routine health care settings. Objective: This study aims to test the diagnostic performance of a widely available symptom checker in the context of formal diagnosis of mental disorders when compared with therapists' diagnoses based on structured clinical interviews. Methods: Adult patients from an outpatient psychotherapy clinic used the app-based symptom checker Ada--check your health (ADA; Ada Health GmbH) at intake. Accuracy was assessed as the agreement of the first and 1 of the first 5 condition suggestions of ADA with at least one of the interview-based therapist diagnoses. In addition, sensitivity, specificity, and interrater reliabilities (Gwet first-order agreement coefficient [AC1]) were calculated for the 3 most prevalent disorder categories. Self-reported usability (assessed using the System Usability Scale) and acceptance of ADA (assessed using an adapted feedback questionnaire) were evaluated. Results: A total of 49 patients (30/49, 61\% women; mean age 33.41, SD 12.79 years) were included in this study. Across all patients, the interview-based diagnoses matched ADA's first condition suggestion in 51\% (25/49; 95\% CI 37.5-64.4) of cases and 1 of the first 5 condition suggestions in 69\% (34/49; 95\% CI 55.4-80.6) of cases. Within the main disorder categories, the accuracy of ADA's first condition suggestion was 0.82 for somatoform and associated disorders, 0.65 for affective disorders, and 0.53 for anxiety disorders. Interrater reliabilities ranged from low (AC1=0.15 for anxiety disorders) to good (AC1=0.76 for somatoform and associated disorders). The usability of ADA was rated as high in the System Usability Scale (mean 81.51, SD 11.82, score range 0-100). Approximately 71\% (35/49) of participants would have preferred a face-to-face over an app-based diagnostic. Conclusions: Overall, our findings suggest that a widely available symptom checker used in the formal diagnosis of mental disorders could provide clinicians with a list of condition suggestions with moderate-to-good accuracy. However, diagnostic performance was heterogeneous between disorder categories and included low interrater reliability. Although symptom checkers have some potential to complement the diagnostic process as a screening tool, the diagnostic performance should be tested in larger samples and in comparison with further diagnostic instruments. ", doi="10.2196/32832", url="https://mental.jmir.org/2022/1/e32832", url="http://www.ncbi.nlm.nih.gov/pubmed/35099395" } @Article{info:doi/10.2196/34475, author="Kalafatis, Chris and Modarres, Hadi Mohammad and Apostolou, Panos and Tabet, Naji and Khaligh-Razavi, Seyed-Mahdi", title="The Use of a Computerized Cognitive Assessment to Improve the Efficiency of Primary Care Referrals to Memory Services: Protocol for the Accelerating Dementia Pathway Technologies (ADePT) Study", journal="JMIR Res Protoc", year="2022", month="Jan", day="27", volume="11", number="1", pages="e34475", keywords="primary health care", keywords="general practice", keywords="dementia", keywords="cognitive assessment", keywords="artificial intelligence", keywords="early diagnosis", keywords="cognition", keywords="assessment", keywords="efficiency", keywords="diagnosis", keywords="COVID-19", keywords="memory", keywords="mental health", keywords="impairment", keywords="screening", keywords="detection", abstract="Background: Existing primary care cognitive assessment tools are crude or time-consuming screening instruments which can only detect cognitive impairment when it is well established. Due to the COVID-19 pandemic, memory services have adapted to the new environment by moving to remote patient assessments to continue meeting service user demand. However, the remote use of cognitive assessments has been variable while there has been scant evaluation of the outcome of such a change in clinical practice. Emerging research in remote memory clinics has highlighted computerized cognitive tests, such as the Integrated Cognitive Assessment (ICA), as prominent candidates for adoption in clinical practice both during the pandemic and for post-COVID-19 implementation as part of health care innovation. Objective: The aim of the Accelerating Dementia Pathway Technologies (ADePT) study is to develop a real-world evidence basis to support the adoption of ICA as an inexpensive screening tool for the detection of cognitive impairment to improve the efficiency of the dementia care pathway. Methods: Patients who have been referred to a memory clinic by a general practitioner (GP) are recruited. Participants complete the ICA either at home or in the clinic along with medical history and usability questionnaires. The GP referral and ICA outcome are compared with the specialist diagnosis obtained at the memory clinic. The clinical outcomes as well as National Health Service reference costing data will be used to assess the potential health and economic benefits of the use of the ICA in the dementia diagnosis pathway. Results: The ADePT study was funded in January 2020 by Innovate UK (Project Number 105837). As of September 2021, 86 participants have been recruited in the study, with 23 participants also completing a retest visit. Initially, the study was designed for in-person visits at the memory clinic; however, in light of the COVID-19 pandemic, the study was amended to allow remote as well as face-to-face visits. The study was also expanded from a single site to 4 sites in the United Kingdom. We expect results to be published by the second quarter of 2022. Conclusions: The ADePT study aims to improve the efficiency of the dementia care pathway at its very beginning and supports systems integration at the intersection between primary and secondary care. The introduction of a standardized, self-administered, digital assessment tool for the timely detection of neurodegeneration as part of a decision support system that can signpost accordingly can reduce unnecessary referrals, service backlog, and assessment variability. Trial Registration: ISRCTN 16596456; https://www.isrctn.com/ISRCTN16596456 International Registered Report Identifier (IRRID): DERR1-10.2196/34475 ", doi="10.2196/34475", url="https://www.researchprotocols.org/2022/1/e34475", url="http://www.ncbi.nlm.nih.gov/pubmed/34932495" } @Article{info:doi/10.2196/24699, author="Birnbaum, L. Michael and Abrami, Avner and Heisig, Stephen and Ali, Asra and Arenare, Elizabeth and Agurto, Carla and Lu, Nathaniel and Kane, M. John and Cecchi, Guillermo", title="Acoustic and Facial Features From Clinical Interviews for Machine Learning--Based Psychiatric Diagnosis: Algorithm Development", journal="JMIR Ment Health", year="2022", month="Jan", day="24", volume="9", number="1", pages="e24699", keywords="audiovisual patterns", keywords="speech analysis", keywords="facial analysis", keywords="psychiatry", keywords="schizophrenia spectrum disorders", keywords="bipolar disorder", keywords="symptom prediction", keywords="diagnostic prediction", keywords="machine learning", keywords="audiovisual", keywords="speech", keywords="schizophrenia", keywords="spectrum disorders", abstract="Background: In contrast to all other areas of medicine, psychiatry is still nearly entirely reliant on subjective assessments such as patient self-report and clinical observation. The lack of objective information on which to base clinical decisions can contribute to reduced quality of care. Behavioral health clinicians need objective and reliable patient data to support effective targeted interventions. Objective: We aimed to investigate whether reliable inferences---psychiatric signs, symptoms, and diagnoses---can be extracted from audiovisual patterns in recorded evaluation interviews of participants with schizophrenia spectrum disorders and bipolar disorder. Methods: We obtained audiovisual data from 89 participants (mean age 25.3 years; male: 48/89, 53.9\%; female: 41/89, 46.1\%): individuals with schizophrenia spectrum disorders (n=41), individuals with bipolar disorder (n=21), and healthy volunteers (n=27). We developed machine learning models based on acoustic and facial movement features extracted from participant interviews to predict diagnoses and detect clinician-coded neuropsychiatric symptoms, and we assessed model performance using area under the receiver operating characteristic curve (AUROC) in 5-fold cross-validation. Results: The model successfully differentiated between schizophrenia spectrum disorders and bipolar disorder (AUROC 0.73) when aggregating face and voice features. Facial action units including cheek-raising muscle (AUROC 0.64) and chin-raising muscle (AUROC 0.74) provided the strongest signal for men. Vocal features, such as energy in the frequency band 1 to 4 kHz (AUROC 0.80) and spectral harmonicity (AUROC 0.78), provided the strongest signal for women. Lip corner--pulling muscle signal discriminated between diagnoses for both men (AUROC 0.61) and women (AUROC 0.62). Several psychiatric signs and symptoms were successfully inferred: blunted affect (AUROC 0.81), avolition (AUROC 0.72), lack of vocal inflection (AUROC 0.71), asociality (AUROC 0.63), and worthlessness (AUROC 0.61). Conclusions: This study represents advancement in efforts to capitalize on digital data to improve diagnostic assessment and supports the development of a new generation of innovative clinical tools by employing acoustic and facial data analysis. ", doi="10.2196/24699", url="https://mental.jmir.org/2022/1/e24699", url="http://www.ncbi.nlm.nih.gov/pubmed/35072648" } @Article{info:doi/10.2196/34333, author="Parra, Federico and Benezeth, Yannick and Yang, Fan", title="Automatic Assessment of Emotion Dysregulation in American, French, and Tunisian Adults and New Developments in Deep Multimodal Fusion: Cross-sectional Study", journal="JMIR Ment Health", year="2022", month="Jan", day="24", volume="9", number="1", pages="e34333", keywords="emotion dysregulation", keywords="deep multimodal fusion", keywords="small data", keywords="psychometrics", abstract="Background: Emotion dysregulation is a key dimension of adult psychological functioning. There is an interest in developing a computer-based, multimodal, and automatic measure. Objective: We wanted to train a deep multimodal fusion model to estimate emotion dysregulation in adults based on their responses to the Multimodal Developmental Profile, a computer-based psychometric test, using only a small training sample and without transfer learning. Methods: Two hundred and forty-eight participants from 3 different countries took the Multimodal Developmental Profile test, which exposed them to 14 picture and music stimuli and asked them to express their feelings about them, while the software extracted the following features from the video and audio signals: facial expressions, linguistic and paralinguistic characteristics of speech, head movements, gaze direction, and heart rate variability derivatives. Participants also responded to the brief version of the Difficulties in Emotional Regulation Scale. We separated and averaged the feature signals that corresponded to the responses to each stimulus, building a structured data set. We transformed each person's per-stimulus structured data into a multimodal codex, a grayscale image created by projecting each feature's normalized intensity value onto a cartesian space, deriving each pixel's position by applying the Uniform Manifold Approximation and Projection method. The codex sequence was then fed to 2 network types. First, 13 convolutional neural networks dealt with the spatial aspect of the problem, estimating emotion dysregulation by analyzing each of the codified responses. These convolutional estimations were then fed to a transformer network that decoded the temporal aspect of the problem, estimating emotional dysregulation based on the succession of responses. We introduce a Feature Map Average Pooling layer, which computes the mean of the convolved feature maps produced by our convolution layers, dramatically reducing the number of learnable weights and increasing regularization through an ensembling effect. We implemented 8-fold cross-validation to provide a good enough estimation of the generalization ability to unseen samples. Most of the experiments mentioned in this paper are easily replicable using the associated Google Colab system. Results: We found an average Pearson correlation (r) of 0.55 (with an average P value of <.001) between ground truth emotion dysregulation and our system's estimation of emotion dysregulation. An average mean absolute error of 0.16 and a mean concordance correlation coefficient of 0.54 were also found. Conclusions: In psychometry, our results represent excellent evidence of convergence validity, suggesting that the Multimodal Developmental Profile could be used in conjunction with this methodology to provide a valid measure of emotion dysregulation in adults. Future studies should replicate our findings using a hold-out test sample. Our methodology could be implemented more generally to train deep neural networks where only small training samples are available. ", doi="10.2196/34333", url="https://mental.jmir.org/2022/1/e34333", url="http://www.ncbi.nlm.nih.gov/pubmed/35072643" } @Article{info:doi/10.2196/28647, author="Dominiak, Monika and Kaczmarek-Majer, Katarzyna and Antosik-W{\'o}jci?ska, Z. Anna and Opara, R. Karol and Olwert, Anna and Radziszewska, Weronika and Hryniewicz, Olgierd and ?wi?cicki, ?ukasz and Wojnar, Marcin and Mierzejewski, Pawe?", title="Behavioral and Self-reported Data Collected From Smartphones for the Assessment of Depressive and Manic Symptoms in Patients With Bipolar Disorder: Prospective Observational Study", journal="J Med Internet Res", year="2022", month="Jan", day="19", volume="24", number="1", pages="e28647", keywords="bipolar disorder", keywords="generalized linear model", keywords="mixed-effects regression", keywords="classification", keywords="manic episodes", keywords="depressive episodes", keywords="smartphone", keywords="behavioral markers", keywords="mHealth", keywords="remote monitoring", abstract="Background: Smartphones allow for real-time monitoring of patients' behavioral activities in a naturalistic setting. These data are suggested as markers for the mental state of patients with bipolar disorder (BD). Objective: We assessed the relations between data collected from smartphones and the clinically rated depressive and manic symptoms together with the corresponding affective states in patients with BD. Methods: BDmon, a dedicated mobile app, was developed and installed on patients' smartphones to automatically collect the statistics about their phone calls and text messages as well as their self-assessments of sleep and mood. The final sample for the numerical analyses consisted of 51 eligible patients who participated in at least two psychiatric assessments and used the BDmon app (mean participation time, 208 [SD 132] days). In total, 196 psychiatric assessments were performed using the Hamilton Depression Rating Scale and the Young Mania Rating Scale. Generalized linear mixed-effects models were applied to quantify the strength of the relation between the daily statistics on the behavioral data collected automatically from smartphones and the affective symptoms and mood states in patients with BD. Results: Objective behavioral data collected from smartphones were found to be related with the BD states as follows: (1) depressed patients tended to make phone calls less frequently than euthymic patients ($\beta$=?.064, P=.01); (2) the number of incoming answered calls during depression was lower than that during euthymia ($\beta$=?.15, P=.01) and, concurrently, missed incoming calls were more frequent and increased as depressive symptoms intensified ($\beta$=4.431, P<.001; $\beta$=4.861, P<.001, respectively); (3) the fraction of outgoing calls was higher in manic states ($\beta$=2.73, P=.03); (4) the fraction of missed calls was higher in manic/mixed states as compared to that in the euthymic state ($\beta$=3.53, P=.01) and positively correlated to the severity of symptoms ($\beta$=2.991, P=.02); (5) the variability of the duration of the outgoing calls was higher in manic/mixed states ($\beta$=.0012, P=.045) and positively correlated to the severity of symptoms ($\beta$=.0017, P=.02); and (6) the number and length of the sent text messages was higher in manic/mixed states as compared to that in the euthymic state ($\beta$=.031, P=.01; $\beta$=.015, P=.01; respectively) and positively correlated to the severity of manic symptoms ($\beta$=.116, P<.001; $\beta$=.022, P<.001; respectively). We also observed that self-assessment of mood was lower in depressive ($\beta$=?1.452, P<.001) and higher in manic states ($\beta$=.509, P<.001). Conclusions: Smartphone-based behavioral parameters are valid markers for assessing the severity of affective symptoms and discriminating between mood states in patients with BD. This technology opens a way toward early detection of worsening of the mental state and thereby increases the patient's chance of improving in the course of the illness. ", doi="10.2196/28647", url="https://www.jmir.org/2022/1/e28647", url="http://www.ncbi.nlm.nih.gov/pubmed/34874015" } @Article{info:doi/10.2196/47487, author="Cubillos, Claudio and Rienzo, Antonio", title="Digital Cognitive Assessment Tests for Older Adults: Systematic Literature Review", journal="JMIR Ment Health", year="2023", month="Dec", day="8", volume="10", pages="e47487", keywords="cognitive digital test", keywords="systematic review", keywords="cognitive screening", keywords="digital interventions", keywords="older adults", abstract="Background: The global health pandemic has affected the increasing older adult population, especially those with mental illnesses. It is necessary to prevent cases of cognitive impairment in adults early on, and this requires the support of information and communication technologies for evaluating and training cognitive functions. This can be achieved through computer applications designed for cognitive assessment. Objective: In this review, we aimed to assess the state of the art of the current platforms and digital test applications for cognitive evaluation, with a focus on older adults. Methods: A systematic literature search was conducted on 3 databases (Web of Science, PubMed, and Scopus) to retrieve recent articles on the applications of digital tests for cognitive assessment and analyze them based on the methodology used. Four research questions were considered. Through the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) methodology, following the application of inclusion and exclusion criteria, a total of 20 articles were finally reviewed. Results: Some gaps and trends were identified regarding the types of digital applications and technologies used, the evaluated effects on cognitive domains, and the psychometric parameters and personal characteristics considered for validation. Conclusions: Computerized tests (similar to paper-and-pencil tests) and test batteries (on computers, tablets, or web platforms) were the predominant types of assessments. Initial studies with simulators, virtual environments, and daily-life activity games were also conducted. Diverse validation methods and psychometric properties were observed; however, there was a lack of evaluations that involved specific populations with diverse education levels, cultures, and degrees of technology acceptance. In addition, these evaluations should consider emotional and usability aspects. ", doi="10.2196/47487", url="https://mental.jmir.org/2023/1/e47487", url="http://www.ncbi.nlm.nih.gov/pubmed/38064247" } @Article{info:doi/10.2196/49147, author="Gu, Dongmei and Lv, Xiaozhen and Shi, Chuan and Zhang, Tianhong and Liu, Sha and Fan, Zili and Tu, Lihui and Zhang, Ming and Zhang, Nan and Chen, Liming and Wang, Zhijiang and Wang, Jing and Zhang, Ying and Li, Huizi and Wang, Luchun and Zhu, Jiahui and Zheng, Yaonan and Wang, Huali and Yu, Xin and ", title="A Stable and Scalable Digital Composite Neurocognitive Test for Early Dementia Screening Based on Machine Learning: Model Development and Validation Study", journal="J Med Internet Res", year="2023", month="Dec", day="1", volume="25", pages="e49147", keywords="mild cognitive impairment", keywords="digital cognitive assessment", keywords="machine learning", keywords="neurocognitive test", keywords="cognitive screening", keywords="dementia", abstract="Background: Dementia has become a major public health concern due to its heavy disease burden. Mild cognitive impairment (MCI) is a transitional stage between healthy aging and dementia. Early identification of MCI is an essential step in dementia prevention. Objective: Based on machine learning (ML) methods, this study aimed to develop and validate a stable and scalable panel of cognitive tests for the early detection of MCI and dementia based on the Chinese Neuropsychological Consensus Battery (CNCB) in the Chinese Neuropsychological Normative Project (CN-NORM) cohort. Methods: CN-NORM was a nationwide, multicenter study conducted in China with 871 participants, including an MCI group (n=327, 37.5\%), a dementia group (n=186, 21.4\%), and a cognitively normal (CN) group (n=358, 41.1\%). We used the following 4 algorithms to select candidate variables: the F-score according to the SelectKBest method, the area under the curve (AUC) from logistic regression (LR), P values from the logit method, and backward stepwise elimination. Different models were constructed after considering the administration duration and complexity of combinations of various tests. Receiver operating characteristic curve and AUC metrics were used to evaluate the discriminative ability of the models via stratified sampling cross-validation and LR and support vector classification (SVC) algorithms. This model was further validated in the Alzheimer's Disease Neuroimaging Initiative phase 3 (ADNI-3) cohort (N=743), which included 416 (56\%) CN subjects, 237 (31.9\%) patients with MCI, and 90 (12.1\%) patients with dementia. Results: Except for social cognition, all other domains in the CNCB differed between the MCI and CN groups (P<.008). In feature selection results regarding discrimination between the MCI and CN groups, the Hopkins Verbal Learning Test-5 minutes Recall had the best performance, with the highest mean AUC of up to 0.80 (SD 0.02) and an F-score of up to 258.70. The scalability of model 5 (Hopkins Verbal Learning Test-5 minutes Recall and Trail Making Test-B) was the lowest. Model 5 achieved a higher level of discrimination than the Hong Kong Brief Cognitive test score in distinguishing between the MCI and CN groups (P<.05). Model 5 also provided the highest sensitivity of up to 0.82 (range 0.72-0.92) and 0.83 (range 0.75-0.91) according to LR and SVC, respectively. This model yielded a similar robust discriminative performance in the ADNI-3 cohort regarding differentiation between the MCI and CN groups, with a mean AUC of up to 0.81 (SD 0) according to both LR and SVC algorithms. Conclusions: We developed a stable and scalable composite neurocognitive test based on ML that could differentiate not only between patients with MCI and controls but also between patients with different stages of cognitive impairment. This composite neurocognitive test is a feasible and practical digital biomarker that can potentially be used in large-scale cognitive screening and intervention studies. ", doi="10.2196/49147", url="https://www.jmir.org/2023/1/e49147", url="http://www.ncbi.nlm.nih.gov/pubmed/38039074" } @Article{info:doi/10.2196/50193, author="He, Yupeng and Matsunaga, Masaaki and Li, Yuanying and Kishi, Taro and Tanihara, Shinichi and Iwata, Nakao and Tabuchi, Takahiro and Ota, Atsuhiko", title="Classifying Schizophrenia Cases by Artificial Neural Network Using Japanese Web-Based Survey Data: Case-Control Study", journal="JMIR Form Res", year="2023", month="Nov", day="15", volume="7", pages="e50193", keywords="artificial neural network", keywords="schizophrenia", keywords="prevalence", keywords="Japan", keywords="web-based survey", keywords="mental health", keywords="psychosis", keywords="machine learning", keywords="epidemiology", abstract="Background: In Japan, challenges were reported in accurately estimating the prevalence of schizophrenia among the general population. Retrieving previous studies, we investigated that patients with schizophrenia were more likely to experience poor subjective well-being and various physical, psychiatric, and social comorbidities. These factors might have great potential for precisely classifying schizophrenia cases in order to estimate the prevalence. Machine learning has shown a positive impact on many fields, including epidemiology, due to its high-precision modeling capability. It has been applied in research on mental disorders. However, few studies have applied machine learning technology to the precise classification of schizophrenia cases by variables of demographic and health-related backgrounds, especially using large-scale web-based surveys. Objective: The aim of the study is to construct an artificial neural network (ANN) model that can accurately classify schizophrenia cases from large-scale Japanese web-based survey data and to verify the generalizability of the model. Methods: Data were obtained from a large Japanese internet research pooled panel (Rakuten Insight, Inc) in 2021. A total of 223 individuals, aged 20-75 years, having schizophrenia, and 1776 healthy controls were included. Answers to the questions in a web-based survey were formatted as 1 response variable (self-report diagnosed with schizophrenia) and multiple feature variables (demographic, health-related backgrounds, physical comorbidities, psychiatric comorbidities, and social comorbidities). An ANN was applied to construct a model for classifying schizophrenia cases. Logistic regression (LR) was used as a reference. The performances of the models and algorithms were then compared. Results: The model trained by the ANN performed better than LR in terms of area under the receiver operating characteristic curve (0.86 vs 0.78), accuracy (0.93 vs 0.91), and specificity (0.96 vs 0.94), while the model trained by LR showed better sensitivity (0.63 vs 0.56). Comparing the performances of the ANN and LR, the ANN was better in terms of area under the receiver operating characteristic curve (bootstrapping: 0.847 vs 0.773 and cross-validation: 0.81 vs 0.72), while LR performed better in terms of accuracy (0.894 vs 0.856). Sleep medication use, age, household income, and employment type were the top 4 variables in terms of importance. Conclusions: This study constructed an ANN model to classify schizophrenia cases using web-based survey data. Our model showed a high internal validity. The findings are expected to provide evidence for estimating the prevalence of schizophrenia in the Japanese population and informing future epidemiological studies. ", doi="10.2196/50193", url="https://formative.jmir.org/2023/1/e50193", url="http://www.ncbi.nlm.nih.gov/pubmed/37966882" } @Article{info:doi/10.2196/48754, author="Abd-alrazaq, Alaa and AlSaad, Rawan and Harfouche, Manale and Aziz, Sarah and Ahmed, Arfan and Damseh, Rafat and Sheikh, Javaid", title="Wearable Artificial Intelligence for Detecting Anxiety: Systematic Review and Meta-Analysis", journal="J Med Internet Res", year="2023", month="Nov", day="8", volume="25", pages="e48754", keywords="anxiety", keywords="artificial intelligence", keywords="wearable devices", keywords="machine learning", keywords="systematic review", keywords="mobile phone", abstract="Background: Anxiety disorders rank among the most prevalent mental disorders worldwide. Anxiety symptoms are typically evaluated using self-assessment surveys or interview-based assessment methods conducted by clinicians, which can be subjective, time-consuming, and challenging to repeat. Therefore, there is an increasing demand for using technologies capable of providing objective and early detection of anxiety. Wearable artificial intelligence (AI), the combination of AI technology and wearable devices, has been widely used to detect and predict anxiety disorders automatically, objectively, and more efficiently. Objective: This systematic review and meta-analysis aims to assess the performance of wearable AI in detecting and predicting anxiety. Methods: Relevant studies were retrieved by searching 8 electronic databases and backward and forward reference list checking. In total, 2 reviewers independently carried out study selection, data extraction, and risk-of-bias assessment. The included studies were assessed for risk of bias using a modified version of the Quality Assessment of Diagnostic Accuracy Studies--Revised. Evidence was synthesized using a narrative (ie, text and tables) and statistical (ie, meta-analysis) approach as appropriate. Results: Of the 918 records identified, 21 (2.3\%) were included in this review. A meta-analysis of results from 81\% (17/21) of the studies revealed a pooled mean accuracy of 0.82 (95\% CI 0.71-0.89). Meta-analyses of results from 48\% (10/21) of the studies showed a pooled mean sensitivity of 0.79 (95\% CI 0.57-0.91) and a pooled mean specificity of 0.92 (95\% CI 0.68-0.98). Subgroup analyses demonstrated that the performance of wearable AI was not moderated by algorithms, aims of AI, wearable devices used, status of wearable devices, data types, data sources, reference standards, and validation methods. Conclusions: Although wearable AI has the potential to detect anxiety, it is not yet advanced enough for clinical use. Until further evidence shows an ideal performance of wearable AI, it should be used along with other clinical assessments. Wearable device companies need to develop devices that can promptly detect anxiety and identify specific time points during the day when anxiety levels are high. Further research is needed to differentiate types of anxiety, compare the performance of different wearable devices, and investigate the impact of the combination of wearable device data and neuroimaging data on the performance of wearable AI. Trial Registration: PROSPERO CRD42023387560; https://www.crd.york.ac.uk/prospero/display\_record.php?RecordID=387560 ", doi="10.2196/48754", url="https://www.jmir.org/2023/1/e48754", url="http://www.ncbi.nlm.nih.gov/pubmed/37938883" } @Article{info:doi/10.2196/48143, author="Engineer, Margi and Kot, Sushant and Dixon, Emma", title="Investigating the Readability and Linguistic, Psychological, and Emotional Characteristics of Digital Dementia Information Written in the English Language: Multitrait-Multimethod Text Analysis", journal="JMIR Form Res", year="2023", month="Oct", day="25", volume="7", pages="e48143", keywords="natural language processing", keywords="consumer health information", keywords="readability", keywords="Alzheimer disease and related dementias", keywords="caregivers", abstract="Background: Past research in the Western context found that people with dementia search for digital dementia information in peer-reviewed medical research articles, dementia advocacy and medical organizations, and blogs written by other people with dementia. This past work also demonstrated that people with dementia do not perceive English digital dementia information as emotionally or cognitively accessible. Objective: In this study, we sought to investigate the readability; linguistic, psychological, and emotional characteristics; and target audiences of digital dementia information. We conducted a textual analysis of 3 different types of text-based digital dementia information written in English: 300 medical articles, 35 websites, and 50 blogs. Methods: We assessed the text's readability using the Flesch Reading Ease and Flesch-Kincaid Grade Level measurements, as well as tone, analytical thinking, clout, authenticity, and word frequencies using a natural language processing tool, Linguistic Inquiry and Word Count Generator. We also conducted a thematic analysis to categorize the target audiences for each information source and used these categorizations for further statistical analysis. Results: The median Flesch-Kincaid Grade Level readability score and Flesch Reading Ease score for all types of information (N=1139) were 12.1 and 38.6, respectively, revealing that the readability scores of all 3 information types were higher than the minimum requirement. We found that medical articles had significantly (P=.05) higher word count and analytical thinking scores as well as significantly lower clout, authenticity, and emotional tone scores than websites and blogs. Further, blogs had significantly (P=.48) higher word count and authenticity scores but lower analytical scores than websites. Using thematic analysis, we found that most of the blogs (156/227, 68.7\%) and web pages (399/612, 65.2\%) were targeted at people with dementia. Website information targeted at a general audience had significantly lower readability scores. In addition, website information targeted at people with dementia had higher word count and lower emotional tone ratings. The information on websites targeted at caregivers had significantly higher clout and lower authenticity scores. Conclusions: Our findings indicate that there is an abundance of digital dementia information written in English that is targeted at people with dementia, but this information is not readable by a general audience. This is problematic considering that people with <12 years of education are at a higher risk of developing dementia. Further, our findings demonstrate that digital dementia information written in English has a negative tone, which may be a contributing factor to the mental health crisis many people with dementia face after receiving a diagnosis. Therefore, we call for content creators to lower readability scores to make the information more accessible to a general audience and to focus their efforts on providing information in a way that does not perpetuate overly negative narratives of dementia. ", doi="10.2196/48143", url="https://formative.jmir.org/2023/1/e48143", url="http://www.ncbi.nlm.nih.gov/pubmed/37878351" } @Article{info:doi/10.2196/48444, author="Glavin, Darragh and Grua, Martino Eoin and Nakamura, Akemi Carina and Scazufca, Marcia and Ribeiro dos Santos, Edinilza and Wong, Y. Gloria H. and Hollingworth, William and Peters, J. Tim and Araya, Ricardo and Van de Ven, Pepijn", title="Patient Health Questionnaire-9 Item Pairing Predictiveness for Prescreening Depressive Symptomatology: Machine Learning Analysis", journal="JMIR Ment Health", year="2023", month="Oct", day="19", volume="10", pages="e48444", keywords="Patient Health Questionnaire-2", keywords="PHQ-2", keywords="Patient Health Questionnaire-9", keywords="PHQ-9 items", keywords="depressive symptomatology", keywords="ultrabrief questionnaires", keywords="prescreening", keywords="machine learning", keywords="cardinal symptoms", keywords="low energy", keywords="psychomotor dysfunction", keywords="depressed mood", abstract="Background: Anhedonia and depressed mood are considered the cardinal symptoms of major depressive disorder. These are the first 2 items of the Patient Health Questionnaire (PHQ)--9 and comprise the ultrabrief PHQ-2 used for prescreening depressive symptomatology. The prescreening performance of alternative PHQ-9 item pairings is rarely compared with that of the PHQ-2. Objective: This study aims to use machine learning (ML) with the PHQ-9 items to identify and validate the most predictive 2-item depressive symptomatology ultrabrief questionnaire and to test the generalizability of the best pairings found on the primary data set, with 6 external data sets from different populations to validate their use as prescreening instruments. Methods: All 36 possible PHQ-9 item pairings (each yielding scores of 0-6) were investigated using ML-based methods with logistic regression models. Their performances were evaluated based on the classification of depressive symptomatology, defined as PHQ-9 scores ?10. This gave each pairing an equal opportunity and avoided any bias in item pairing selection. Results: The ML-based PHQ-9 items 2 and 4 (phq2\&4), the depressed mood and low-energy item pairing, and PHQ-9 items 2 and 8 (phq2\&8), the depressed mood and psychomotor retardation or agitation item pairing, were found to be the best on the primary data set training split. They generalized well on the primary data set test split with area under the curves (AUCs) of 0.954 and 0.946, respectively, compared with an AUC of 0.942 for the PHQ-2. The phq2\&4 had a higher AUC than the PHQ-2 on all 6 external data sets, and the phq2\&8 had a higher AUC than the PHQ-2 on 3 data sets. The phq2\&4 had the highest Youden index (an unweighted average of sensitivity and specificity) on 2 external data sets, and the phq2\&8 had the highest Youden index on another 2. The PHQ-2?2 cutoff also had the highest Youden index on 2 external data sets, joint highest with the phq2\&4 on 1, but its performance fluctuated the most. The PHQ-2?3 cutoff had the highest Youden index on 1 external data set. The sensitivity and specificity achieved by the phq2\&4 and phq2\&8 were more evenly balanced than the PHQ-2?2 and ?3 cutoffs. Conclusions: The PHQ-2 did not prove to be a more effective prescreening instrument when compared with other PHQ-9 item pairings. Evaluating all item pairings showed that, compared with alternative partner items, the anhedonia item underperformed alongside the depressed mood item. This suggests that the inclusion of anhedonia as a core symptom of depression and its presence in ultrabrief questionnaires may be incompatible with the empirical evidence. The use of the PHQ-2 to prescreen for depressive symptomatology could result in a greater number of misclassifications than alternative item pairings. ", doi="10.2196/48444", url="https://mental.jmir.org/2023/1/e48444", url="http://www.ncbi.nlm.nih.gov/pubmed/37856186" } @Article{info:doi/10.2196/48152, author="Beltzer, L. Miranda and Meyerhoff, Jonah and Popowski, A. Sarah and Mohr, C. David and Kornfield, Rachel", title="Mental Health Self-Tracking Preferences of Young Adults With Depression and Anxiety Not Engaged in Treatment: Qualitative Analysis", journal="JMIR Form Res", year="2023", month="Oct", day="6", volume="7", pages="e48152", keywords="self-tracking", keywords="self-monitoring", keywords="self-help", keywords="depression", keywords="anxiety", keywords="young adults", keywords="mHealth", keywords="technology", keywords="qualitative analysis", keywords="focus group", keywords="personal informatics", keywords="mood", keywords="thematic analysis", abstract="Background: Despite the high prevalence of anxiety and depression among young adults, many do not seek formal treatment. Some may turn to digital mental health tools for support instead, including to self-track moods, behaviors, and other variables related to mental health. Researchers have sought to understand processes and motivations involved in self-tracking, but few have considered the specific needs and preferences of young adults who are not engaged in treatment and who seek to use self-tracking to support mental health. Objective: This study seeks to assess the types of experiences young adults not engaged in treatment have had with digital self-tracking for mood and other mental health data and to assess how young adults not seeking treatment want to engage in self-tracking to support their mental health. Methods: We conducted 2 online asynchronous discussion groups with 50 young adults aged 18 years to 25 years who were not engaged in treatment. Participants were recruited after indicating moderate to severe symptoms of depression or anxiety on screening surveys hosted on the website of Mental Health America. Participants who enrolled in the study responded anonymously to discussion prompts on a message board, as well as to each other's responses, and 3 coders performed a thematic analysis of their responses. Results: Participants had mixed experiences with self-tracking in the past, including disliking when tracking highlighted unwanted behaviors and discontinuing tracking for a variety of reasons. They had more positive past experiences tracking behaviors and tasks they wanted to increase, using open-ended journaling, and with gamified elements to increase motivation. Participants highlighted several design considerations they wanted self-tracking tools to address, including building self-understanding; organization, reminders, and structure; and simplifying the self-tracking experience. Participants wanted self-tracking to help them identify their feelings and how their feelings related to other variables like sleep, exercise, and events in their lives. Participants also highlighted self-tracking as useful for motivating and supporting basic activities and tasks of daily living during periods of feeling overwhelmed or low mood and providing a sense of accomplishment and stability. Although self-tracking can be burdensome, participants were interested and provided suggestions for simplifying the process. Conclusions: These young adults not engaged in treatment reported interest in using self-tracking to build self-understanding as a goal in and of itself or as a first step in contemplating and preparing for behavior change or treatment-seeking. Alexithymia, amotivation, and feeling overwhelmed may serve both as barriers to self-tracking and opportunities for self-tracking to help. ", doi="10.2196/48152", url="https://formative.jmir.org/2023/1/e48152", url="http://www.ncbi.nlm.nih.gov/pubmed/37801349" } @Article{info:doi/10.2196/48425, author="Kim, Hyeonseong and Jeong, Seohyun and Hwang, Inae and Sung, Kiyoung and Moon, Woori and Shin, Min-Sup", title="Validation of a Brief Internet-Based Self-Report Measure of Maladaptive Personality and Interpersonal Schema: Confirmatory Factor Analysis", journal="Interact J Med Res", year="2023", month="Sep", day="29", volume="12", pages="e48425", keywords="maladaptive schema", keywords="measure of schema", keywords="self-report measure", keywords="internet-based measure", keywords="digital mental health care", keywords="interpersonal schema", abstract="Background: Existing digital mental health interventions mainly focus on the symptoms of specific mental disorders, but do not focus on Maladaptive Personalities and Interpersonal Schemas (MPISs). As an initial step toward considering personalities and schemas in intervention programs, there is a need for the development of tools for measuring core personality traits and interpersonal schemas known to cause psychological discomfort among potential users of digital mental health interventions. Thus, the MPIS was developed. Objective: The objectives of this study are to validate the MPIS by comparing 2 models of the MPIS factor structure and to understand the characteristics of the MPIS by assessing its correlations with other measures. Methods: Data were collected from 234 participants who were using web-based community sites in South Korea, including university students, graduate students, working professionals, and homemakers. All the data were gathered through web-based surveys. Confirmatory factor analysis was used to compare a single-factor model with a 5-factor model. Reliability and correlation analyses with other scales were performed. Results: The results of confirmatory factor analysis indicated that the 5-factor model ($\chi$2550=1278.1; Tucker-Lewis index=0.80; comparative fit index=0.81; and Root Mean Square Error of Approximation=0.07) was more suitable than the single-factor model ($\chi$2560=2341.5; Tucker-Lewis index=0.52; comparative fit index=0.54; and Root Mean Square Error of Approximation=0.11) for measuring maladaptive personality traits and interpersonal relationship patterns. The internal consistency of each factor of the MPIS was good (Cronbach $\alpha$=.71-.88), and the correlations with existing measures were statistically significant. The MPIS is a validated 35-item tool for measuring 5 essential personality traits and interpersonal schemas in adults aged 18-39 years. Conclusions: This study introduced the MPIS, a concise and effective questionnaire capable of measuring maladaptive personality traits and interpersonal relationship schemas. Through analysis, the MPIS was shown to reliably assess these psychological constructs and validate them. Its web-based accessibility and reduced item count make it a valuable tool for mental health assessment. Future applications include its integration into digital mental health care services, allowing easy web-based administration and aiding in the classification of psychological therapy programs based on the obtained results. Trial Registration: ClinicalTrials.gov NCT05952063; https://www.clinicaltrials.gov/study/NCT05952063 ", doi="10.2196/48425", url="https://www.i-jmr.org/2023/1/e48425", url="http://www.ncbi.nlm.nih.gov/pubmed/37773606" } @Article{info:doi/10.2196/46675, author="Lynham, Joanne Amy and Jones, R. Ian and Walters, R. James T.", title="Cardiff Online Cognitive Assessment in a National Sample: Cross-Sectional Web-Based Study", journal="J Med Internet Res", year="2023", month="Sep", day="13", volume="25", pages="e46675", keywords="cognition", keywords="digital assessment", keywords="mental health", keywords="mobile phone", keywords="normative data", keywords="web-based", keywords="cognitive assessment", keywords="CONCA", abstract="Background: Psychiatric disorders are associated with cognitive impairment. We have developed a web-based, 9-task cognitive battery to measure the core domains affected in people with psychiatric disorders. To date, this assessment has been used to collect data on a clinical sample of participants with psychiatric disorders. Objective: The aims of this study were (1) to establish a briefer version of the battery (called the Cardiff Online Cognitive Assessment [CONCA]) that can give a valid measure of cognitive ability (``g'') and (2) to collect normative data and demonstrate CONCA's application in a health population sample. Methods: Based on 6 criteria and data from our previous study, we selected 5 out of the original 9 tasks to include in CONCA. These included 3 core tasks that were sufficient to derive a measure of ``g'' and 2 optional tasks. Participants from a web-based national cohort study (HealthWise Wales) were invited to complete CONCA. Completion rates, sample characteristics, performance distributions, and associations between cognitive performance and demographic characteristics and mental health measures were examined. Results: A total of 3679 participants completed at least one CONCA task, of which 3135 completed all 3 core CONCA tasks. Performance on CONCA was associated with age (B=--0.05, SE 0.002; P<.001), device (tablet computer: B=--0.26, SE 0.05; P<.001; smartphone: B=--0.46, SE 0.05; P<.001), education (degree: B=1.68, SE 0.14; P<.001), depression symptoms (B=--0.04, SE 0.01; P<.001), and anxiety symptoms (B=--0.04, SE 0.01; P<.001). Conclusions: CONCA provides a valid measure of ``g,'' which can be derived using as few as 3 tasks that take no more than 15 minutes. Performance on CONCA showed associations with demographic characteristics in the expected direction and was associated with current depression and anxiety symptoms. The effect of device on cognitive performance is an important consideration for research using web-based assessments. ", doi="10.2196/46675", url="https://www.jmir.org/2023/1/e46675", url="http://www.ncbi.nlm.nih.gov/pubmed/37703073" } @Article{info:doi/10.2196/45161, author="Chong, K. Min and Hickie, B. Ian and Cross, P. Shane and McKenna, Sarah and Varidel, Mathew and Capon, William and Davenport, A. Tracey and LaMonica, M. Haley and Sawrikar, Vilas and Guastella, Adam and Naismith, L. Sharon and Scott, M. Elizabeth and Iorfino, Frank", title="Digital Application of Clinical Staging to Support Stratification in Youth Mental Health Services: Validity and Reliability Study", journal="JMIR Form Res", year="2023", month="Sep", day="8", volume="7", pages="e45161", keywords="clinical staging", keywords="digital health solution", keywords="online diagnosis", keywords="service transformation", keywords="staged care", keywords="stratified care", keywords="youth mental health", abstract="Background: As the demand for youth mental health care continues to rise, managing wait times and reducing treatment delays are key challenges to delivering timely and quality care. Clinical staging is a heuristic model for youth mental health that can stratify care allocation according to individuals' risk of illness progression. The application of staging has been traditionally limited to trained clinicians yet leveraging digital technologies to apply clinical staging could increase the scalability and usability of this model in services. Objective: The aim of this study was to validate a digital algorithm to accurately differentiate young people at lower and higher risk of developing mental disorders. Methods: We conducted a study with a cohort comprising 131 young people, aged between 16 and 25 years, who presented to youth mental health services in Australia between November 2018 and March 2021. Expert psychiatrists independently assigned clinical stages (either stage 1a or stage 1b+), which were then compared to the digital algorithm's allocation based on a multidimensional self-report questionnaire. Results: Of the 131 participants, the mean age was 20.3 (SD 2.4) years, and 72\% (94/131) of them were female. Ninety-one percent of clinical stage ratings were concordant between the digital algorithm and the experts' ratings, with a substantial interrater agreement ($\kappa$=0.67; P<.001). The algorithm demonstrated an accuracy of 91\% (95\% CI 86\%-95\%; P=.03), a sensitivity of 80\%, a specificity of 93\%, and an F1-score of 73\%. Of the concordant ratings, 16 young people were allocated to stage 1a, while 103 were assigned to stage 1b+. Among the 12 discordant cases, the digital algorithm allocated a lower stage (stage 1a) to 8 participants compared to the experts. These individuals had significantly milder symptoms of depression (P<.001) and anxiety (P<.001) compared to those with concordant stage 1b+ ratings. Conclusions: This novel digital algorithm is sufficiently robust to be used as an adjunctive decision support tool to stratify care and assist with demand management in youth mental health services. This work could transform care pathways and expedite care allocation for those in the early stages of common anxiety and depressive disorders. Between 11\% and 27\% of young people seeking care may benefit from low-intensity, self-directed, or brief interventions. Findings from this study suggest the possibility of redirecting clinical capacity to focus on individuals in stage 1b+ for further assessment and intervention. ", doi="10.2196/45161", url="https://formative.jmir.org/2023/1/e45161", url="http://www.ncbi.nlm.nih.gov/pubmed/37682588" } @Article{info:doi/10.2196/47315, author="Bilu, Yonatan and Amit, Guy and Sudry, Tamar and Akiva, Pinchas and Avgil Tsadok, Meytal and Zimmerman, R. Deena and Baruch, Ravit and Sadaka, Yair", title="A Developmental Surveillance Score for Quantitative Monitoring of Early Childhood Milestone Attainment: Algorithm Development and Validation", journal="JMIR Public Health Surveill", year="2023", month="Aug", day="18", volume="9", pages="e47315", keywords="child development", keywords="risk scores", keywords="scoring methods", keywords="language delay", keywords="motor skills delay", keywords="developmental", keywords="surveillance", keywords="developmental delays", keywords="developmental milestones", keywords="young children", keywords="intervention", keywords="child", abstract="Background: Developmental surveillance, conducted routinely worldwide, is fundamental for timely identification of children at risk of developmental delays. It is typically executed by assessing age-appropriate milestone attainment and applying clinical judgment during health supervision visits. Unlike developmental screening and evaluation tools, surveillance typically lacks standardized quantitative measures, and consequently, its interpretation is often qualitative and subjective. Objective: Herein, we suggested a novel method for aggregating developmental surveillance assessments into a single score that coherently depicts and monitors child development. We described the procedure for calculating the score and demonstrated its ability to effectively capture known population-level associations. Additionally, we showed that the score can be used to describe longitudinal patterns of development that may facilitate tracking and classifying developmental trajectories of children. Methods: We described the Developmental Surveillance Score (DSS), a simple-to-use tool that quantifies the age-dependent severity level of a failure at attaining developmental milestones based on the recently introduced Israeli developmental surveillance program. We evaluated the DSS using a nationwide cohort of >1 million Israeli children from birth to 36 months of age, assessed between July 1, 2014, and September 1, 2021. We measured the score's ability to capture known associations between developmental delays and characteristics of the mother and child. Additionally, we computed series of the DSS in consecutive visits to describe a child's longitudinal development and applied cluster analysis to identify distinct patterns of these developmental trajectories. Results: The analyzed cohort included 1,130,005 children. The evaluation of the DSS on subpopulations of the cohort, stratified by known risk factors of developmental delays, revealed expected relations between developmental delay and characteristics of the child and mother, including demographics and obstetrics-related variables. On average, the score was worse for preterm children compared to full-term children and for male children compared to female children, and it was correspondingly worse for lower levels of maternal education. The trajectories of scores in 6 consecutive visits were available for 294,000 children. The clustering of these trajectories revealed 3 main types of developmental patterns that are consistent with clinical experience: children who successfully attain milestones, children who initially tend to fail but improve over time, and children whose failures tend to increase over time. Conclusions: The suggested score is straightforward to compute in its basic form and can be easily implemented as a web-based tool in its more elaborate form. It highlights known and novel relations between developmental delay and characteristics of the mother and child, demonstrating its potential usefulness for surveillance and research. Additionally, it can monitor the developmental trajectory of a child and characterize it. Future work is needed to calibrate the score vis-a-vis other screening tools, validate it worldwide, and integrate it into the clinical workflow of developmental surveillance. ", doi="10.2196/47315", url="https://publichealth.jmir.org/2023/1/e47315", url="http://www.ncbi.nlm.nih.gov/pubmed/37489583" } @Article{info:doi/10.2196/28848, author="Ahmed, Sabbir Md and Ahmed, Nova", title="A Fast and Minimal System to Identify Depression Using Smartphones: Explainable Machine Learning--Based Approach", journal="JMIR Form Res", year="2023", month="Aug", day="10", volume="7", pages="e28848", keywords="smartphone", keywords="depression", keywords="explainable machine learning", keywords="low-resource settings", keywords="real-time system", keywords="students", abstract="Background: Existing robust, pervasive device-based systems developed in recent years to detect depression require data collected over a long period and may not be effective in cases where early detection is crucial. Additionally, due to the requirement of running systems in the background for prolonged periods, existing systems can be resource inefficient. As a result, these systems can be infeasible in low-resource settings. Objective: Our main objective was to develop a minimalistic system to identify depression using data retrieved in the fastest possible time. Another objective was to explain the machine learning (ML) models that were best for identifying depression. Methods: We developed a fast tool that retrieves the past 7 days' app usage data in 1 second (mean 0.31, SD 1.10 seconds). A total of 100 students from Bangladesh participated in our study, and our tool collected their app usage data and responses to the Patient Health Questionnaire-9. To identify depressed and nondepressed students, we developed a diverse set of ML models: linear, tree-based, and neural network--based models. We selected important features using the stable approach, along with 3 main types of feature selection (FS) approaches: filter, wrapper, and embedded methods. We developed and validated the models using the nested cross-validation method. Additionally, we explained the best ML models through the Shapley additive explanations (SHAP) method. Results: Leveraging only the app usage data retrieved in 1 second, our light gradient boosting machine model used the important features selected by the stable FS approach and correctly identified 82.4\% (n=42) of depressed students (precision=75\%, F1-score=78.5\%). Moreover, after comprehensive exploration, we presented a parsimonious stacking model where around 5 features selected by the all-relevant FS approach Boruta were used in each iteration of validation and showed a maximum precision of 77.4\% (balanced accuracy=77.9\%). Feature importance analysis suggested app usage behavioral markers containing diurnal usage patterns as being more important than aggregated data-based markers. In addition, a SHAP analysis of our best models presented behavioral markers that were related to depression. For instance, students who were not depressed spent more time on education apps on weekdays, whereas those who were depressed used a higher number of photo and video apps and also had a higher deviation in using photo and video apps over the morning, afternoon, evening, and night time periods of the weekend. Conclusions: Due to our system's fast and minimalistic nature, it may make a worthwhile contribution to identifying depression in underdeveloped and developing regions. In addition, our detailed discussion about the implication of our findings can facilitate the development of less resource-intensive systems to better understand students who are depressed and take steps for intervention. ", doi="10.2196/28848", url="https://formative.jmir.org/2023/1/e28848", url="http://www.ncbi.nlm.nih.gov/pubmed/37561568" } @Article{info:doi/10.2196/48709, author="Virk, Punit and Arora, Ravia and Burt, Heather and Finnamore, Caitlin and Gadermann, Anne and Barbic, Skye and Doan, Quynh", title="Evaluating the Psychometric Properties and Clinical Utility of a Digital Psychosocial Self-Screening Tool (HEARTSMAP-U) for Postsecondary Students: Prospective Cohort Study", journal="JMIR Ment Health", year="2023", month="Aug", day="9", volume="10", pages="e48709", keywords="mental health", keywords="screening", keywords="validity", keywords="postsecondary students", keywords="clinical utility", abstract="Background: Existing screening tools for mental health issues among postsecondary students have several challenges, including a lack of standardization and codevelopment by students. HEARTSMAP-U was adapted to address these issues. Objective: This study aimed to evaluate the suitability of HEARTSMAP-U as a self-screening tool for psychosocial issues among postsecondary students by evaluating its validity evidence and clinical utility. Methods: A prospective cohort study was conducted with University of British Columbia Vancouver students to evaluate HEARTSMAP-U's predictive validity and convergent validity. Participating students completed baseline and 3-month follow-up assessments via HEARTSMAP-U and a clinician-administered interview. Results: In a diverse student sample (n=100), HEARTSMAP-U demonstrated high sensitivity (95\%-100\%) in identifying any psychiatric concerns that were flagged by a research clinician, with lower specificity (21\%-25\%). Strong convergent validity (r=0.54-0.68) was demonstrated when relevant domains and sections of HEARTSMAP-U were compared with those of other conceptually similar instruments. Conclusions: This preliminary evaluation suggests that HEARTSMAP-U may be suitable for screening in the postsecondary educational setting. However, a larger-scale evaluation is necessary to confirm and expand on these findings. ", doi="10.2196/48709", url="https://mental.jmir.org/2023/1/e48709", url="http://www.ncbi.nlm.nih.gov/pubmed/37556180" } @Article{info:doi/10.2196/43271, author="Spadaro, Benedetta and Martin-Key, A. Nayra and Funnell, Erin and Ben{\'a}{\v c}ek, Ji?{\'i} and Bahn, Sabine", title="Opportunities for the Implementation of a Digital Mental Health Assessment Tool in the United Kingdom: Exploratory Survey Study", journal="JMIR Form Res", year="2023", month="Aug", day="7", volume="7", pages="e43271", keywords="assessment", keywords="digital mental health", keywords="development", keywords="implementation", keywords="mental health", keywords="provision", keywords="support", keywords="mobile phone", abstract="Background: Every year, one-fourth of the people in the United Kingdom experience diagnosable mental health concerns, yet only a proportion receive a timely diagnosis and treatment. With novel developments in digital technologies, the potential to increase access to mental health assessments and triage is promising. Objective: This study aimed to investigate the current state of mental health provision in the United Kingdom and understand the utility of, and interest in, digital mental health technologies. Methods: A web-based survey was generated using Qualtrics XM. Participants were recruited via social media. Data were explored using descriptive statistics. Results: The majority of the respondents (555/618, 89.8\%) had discussed their mental health with a general practitioner. More than three-fourths (503/618, 81.4\%) of the respondents had been diagnosed with a mental health disorder, with the most common diagnoses being depression and generalized anxiety disorder. Diagnostic waiting times from first contact with a health care professional varied by diagnosis. Neurodevelopmental disorders (30/56, 54\%), bipolar disorder (25/52, 48\%), and personality disorders (48/101, 47.5\%) had the longest waiting times, with almost half (103/209, 49.3\%) of these diagnoses taking >6 months. Participants stated that waiting times resulted in symptoms worsening (262/353, 74.2\%), lower quality of life (166/353, 47\%), and the necessity to seek emergency care (109/353, 30.9\%). Of the 618 participants, 386 (62.5\%) stated that they felt that their mental health symptoms were not always taken seriously by their health care provider and 297 (48.1\%) were not given any psychoeducational information. The majority of the respondents (416/595, 77.5\%) did not have the chance to discuss mental health support and treatment options. Critically, 16.1\% (96/595) did not find any treatment or support provided at all helpful, with 63\% (48/76) having discontinued treatment with no effective alternatives. Furthermore, 88.3\% (545/617) of the respondents) had sought help on the web regarding mental health symptoms, and 44.4\% (272/612) had used a web application or smartphone app for their mental health. Psychoeducation (364/596, 61.1\%), referral to a health care professional (332/596, 55.7\%), and symptom monitoring (314/596, 52.7\%) were the most desired app features. Only 6.8\% (40/590) of the participants said that they would not be interested in using a mental health assessment app. Respondents were the most interested to receive an overall severity score of their mental health symptoms (441/546, 80.8\%) and an indication of whether they should seek mental health support (454/546, 83.2\%). Conclusions: Key gaps in current UK mental health care provision are highlighted. Assessment and treatment waiting times together with a lack of information regarding symptoms and treatment options translated into poor care experiences. The participants' responses provide proof-of-concept support for the development of a digital mental health assessment app and valuable recommendations regarding desirable app features. ", doi="10.2196/43271", url="https://formative.jmir.org/2023/1/e43271", url="http://www.ncbi.nlm.nih.gov/pubmed/37549003" } @Article{info:doi/10.2196/42017, author="Zolnowski-Kolp, Victoria and Um Din, Nathavy and Havreng-Th{\'e}ry, Charlotte and Pariel, Sylvie and Veyron, Jacques-Henri and Lafuente-Lafuente, Carmelo and Belmin, Joel", title="Assessment of Frailty by the French Version of the Vulnerable Elders Survey-13 on Digital Tablet: Validation Study", journal="J Med Internet Res", year="2023", month="Aug", day="2", volume="25", pages="e42017", keywords="frailty", keywords="Vulnerable Elders Survey-13 (VES-13)", keywords="elderly", keywords="older people", keywords="family caregivers", keywords="French version", keywords="electronic assessment", keywords="digital technology", keywords="digital health", keywords="eHealth", keywords="tablet", keywords="validity", abstract="Background: Frailty assessment is a major issue in geriatric medicine. The Vulnerable Elders Survey-13 (VES-13) is a simple and practical tool that identifies frailty through a 13-item questionnaire completed by older adults or their family caregivers by self-administration (pencil and paper) or by telephone interview. The VES-13 provides a 10-point score that is also a recognized mortality predictor. Objective: This study aims to design an electronic version of the Echelle de Vuln{\'e}rabilit{\'e} des Ain{\'e}s-13, the French version of the VES-13 (eEVA-13) for use on a digital tablet and validate it. Methods: The scale was implemented as a web App in 3 different screens and used on an Android tablet (14.0{\texttimes} 25.6 cm). Participants were patients attending the outpatient clinic of a French geriatric hospital or hospitalized in a rehabilitation ward and family caregivers of geriatric patients. They completed the scale twice, once by a reference method (self-administered questionnaire or telephone interview) and once by eEVA-13 using the digital tablet. Agreement for diagnosis of frailty was assessed with the $\kappa$ coefficient, and scores were compared by Bland and Altman plots and interclass correlation coefficients. User experience was assessed by a self-administered questionnaire. Results: In total, 86 participants, including 40 patients and 46 family caregivers, participated in the study. All family caregivers had previously used digital devices, while 13 (32.5\%) and 10 (25\%) patients had no or infrequent use of them previously. We observed no failure to complete the eEVA-13, and 70\% of patients (28/40) and no family caregivers needed support to complete the eEVA-13. The agreement between the eEVA-13 and the reference method for the diagnosis of frailty was excellent ($\kappa$=0.92) with agreement in 83 cases and disagreement in 3 cases. The mean difference between the scores provided by the 2 scales was 0.081 (95\% CI--1.263 to 1.426). Bland and Altman plots showed a high level of agreement between the eEVA-13 and the reference methods and interclass correlation coefficient value was 0.997 (95\% CI 0.994-0.998) for the paper and tablet group and 0.977 (95\% CI 0.957-0.988) for the phone and tablet groups. The tablet assessment was found to be easy to use by 77.5\% (31/40) of patients and by 96\% (44/46) of caregivers. Finally, 85\% (39/46) of family caregivers and 50\% (20/40) of patients preferred the eEVA-13 to the original version. Conclusions: The eEVA-13 is an appropriate digital tool for diagnosing frailty and can be used by older adults and their family caregivers. The scores obtained with eEVA-13 are highly correlated with those obtained with the original version. The use of health questionnaires on digital tablets is feasible in frail and very old patients, although some patients may need help to use them. ", doi="10.2196/42017", url="https://www.jmir.org/2023/1/e42017", url="http://www.ncbi.nlm.nih.gov/pubmed/37531175" } @Article{info:doi/10.2196/46165, author="Lee, Yun Dong and Choi, Byungjin and Kim, Chungsoo and Fridgeirsson, Egill and Reps, Jenna and Kim, Myoungsuk and Kim, Jihyeong and Jang, Jae-Won and Rhee, Youl Sang and Seo, Won-Woo and Lee, Seunghoon and Son, Joon Sang and Park, Woong Rae", title="Privacy-Preserving Federated Model Predicting Bipolar Transition in Patients With Depression: Prediction Model Development Study", journal="J Med Internet Res", year="2023", month="Jul", day="20", volume="25", pages="e46165", keywords="federated learning", keywords="depression", keywords="bipolar disorder", keywords="data standardization", keywords="differential privacy", abstract="Background: Mood disorder has emerged as a serious concern for public health; in particular, bipolar disorder has a less favorable prognosis than depression. Although prompt recognition of depression conversion to bipolar disorder is needed, early prediction is challenging due to overlapping symptoms. Recently, there have been attempts to develop a prediction model by using federated learning. Federated learning in medical fields is a method for training multi-institutional machine learning models without patient-level data sharing. Objective: This study aims to develop and validate a federated, differentially private multi-institutional bipolar transition prediction model. Methods: This retrospective study enrolled patients diagnosed with the first depressive episode at 5 tertiary hospitals in South Korea. We developed models for predicting bipolar transition by using data from 17,631 patients in 4 institutions. Further, we used data from 4541 patients for external validation from 1 institution. We created standardized pipelines to extract large-scale clinical features from the 4 institutions without any code modification. Moreover, we performed feature selection in a federated environment for computational efficiency and applied differential privacy to gradient updates. Finally, we compared the federated and the 4 local models developed with each hospital's data on internal and external validation data sets. Results: In the internal data set, 279 out of 17,631 patients showed bipolar disorder transition. In the external data set, 39 out of 4541 patients showed bipolar disorder transition. The average performance of the federated model in the internal test (area under the curve [AUC] 0.726) and external validation (AUC 0.719) data sets was higher than that of the other locally developed models (AUC 0.642-0.707 and AUC 0.642-0.699, respectively). In the federated model, classifications were driven by several predictors such as the Charlson index (low scores were associated with bipolar transition, which may be due to younger age), severe depression, anxiolytics, young age, and visiting months (the bipolar transition was associated with seasonality, especially during the spring and summer months). Conclusions: We developed and validated a differentially private federated model by using distributed multi-institutional psychiatric data with standardized pipelines in a real-world environment. The federated model performed better than models using local data only. ", doi="10.2196/46165", url="https://www.jmir.org/2023/1/e46165", url="http://www.ncbi.nlm.nih.gov/pubmed/37471130" } @Article{info:doi/10.2196/45572, author="Frank, C. Adam and Li, Ruibei and Peterson, S. Bradley and Narayanan, S. Shrikanth", title="Wearable and Mobile Technologies for the Evaluation and Treatment of Obsessive-Compulsive Disorder: Scoping Review", journal="JMIR Ment Health", year="2023", month="Jul", day="18", volume="10", pages="e45572", keywords="wearable", keywords="smartphone", keywords="obsessive-compulsive disorder", keywords="OCD", keywords="digital", keywords="phenotype", keywords="biomarker", keywords="mobile phone", abstract="Background: Smartphones and wearable biosensors can continuously and passively measure aspects of behavior and physiology while also collecting data that require user input. These devices can potentially be used to monitor symptom burden; estimate diagnosis and risk for relapse; predict treatment response; and deliver digital interventions in patients with obsessive-compulsive disorder (OCD), a prevalent and disabling psychiatric condition that often follows a chronic and fluctuating course and may uniquely benefit from these technologies. Objective: Given the speed at which mobile and wearable technologies are being developed and implemented in clinical settings, a continual reappraisal of this field is needed. In this scoping review, we map the literature on the use of wearable devices and smartphone-based devices or apps in the assessment, monitoring, or treatment of OCD. Methods: In July 2022 and April 2023, we conducted an initial search and an updated search, respectively, of multiple databases, including PubMed, Embase, APA PsycINFO, and Web of Science, with no restriction on publication period, using the following search strategy: (``OCD'' OR ``obsessive'' OR ``obsessive-compulsive'') AND (``smartphone'' OR ``phone'' OR ``wearable'' OR ``sensing'' OR ``biofeedback'' OR ``neurofeedback'' OR ``neuro feedback'' OR ``digital'' OR ``phenotyping'' OR ``mobile'' OR ``heart rate variability'' OR ``actigraphy'' OR ``actimetry'' OR ``biosignals'' OR ``biomarker'' OR ``signals'' OR ``mobile health''). Results: We analyzed 2748 articles, reviewed the full text of 77 articles, and extracted data from the 25 articles included in this review. We divided our review into the following three parts: studies without digital or mobile intervention and with passive data collection, studies without digital or mobile intervention and with active or mixed data collection, and studies with a digital or mobile intervention. Conclusions: Use of mobile and wearable technologies for OCD has developed primarily in the past 15 years, with an increasing pace of related publications. Passive measures from actigraphy generally match subjective reports. Ecological momentary assessment is well tolerated for the naturalistic assessment of symptoms, may capture novel OCD symptoms, and may also document lower symptom burden than retrospective recall. Digital or mobile treatments are diverse; however, they generally provide some improvement in OCD symptom burden. Finally, ongoing work is needed for a safe and trusted uptake of technology by patients and providers. ", doi="10.2196/45572", url="https://mental.jmir.org/2023/1/e45572", url="http://www.ncbi.nlm.nih.gov/pubmed/37463010" } @Article{info:doi/10.2196/42637, author="Zhang, Wei and Zheng, Xiaoran and Tang, Zeshen and Wang, Haoran and Li, Renren and Xie, Zengmai and Yan, Jiaxin and Zhang, Xiaochen and Yu, Qing and Wang, Fei and Li, Yunxia", title="Combination of Paper and Electronic Trail Making Tests for Automatic Analysis of Cognitive Impairment: Development and Validation Study", journal="J Med Internet Res", year="2023", month="Jun", day="9", volume="25", pages="e42637", keywords="cognition impairment", keywords="Trail Making Test", keywords="vector quantization", keywords="screening", keywords="mixed mode", keywords="paper and electronic devices", abstract="Background: Computer-aided detection, used in the screening and diagnosing of cognitive impairment, provides an objective, valid, and convenient assessment. Particularly, digital sensor technology is a promising detection method. Objective: This study aimed to develop and validate a novel Trail Making Test (TMT) using a combination of paper and electronic devices. Methods: This study included community-dwelling older adult individuals (n=297), who were classified into (1) cognitively healthy controls (HC; n=100 participants), (2) participants diagnosed with mild cognitive impairment (MCI; n=98 participants), and (3) participants with Alzheimer disease (AD; n=99 participants). An electromagnetic tablet was used to record each participant's hand-drawn stroke. A sheet of A4 paper was placed on top of the tablet to maintain the traditional interaction style for participants who were not familiar or comfortable with electronic devices (such as touchscreens). In this way, all participants were instructed to perform the TMT-square and circle. Furthermore, we developed an efficient and interpretable cognitive impairment--screening model to automatically analyze cognitive impairment levels that were dependent on demographic characteristics and time-, pressure-, jerk-, and template-related features. Among these features, novel template-based features were based on a vector quantization algorithm. First, the model identified a candidate trajectory as the standard answer (template) from the HC group. The distance between the recorded trajectories and reference was computed as an important evaluation index. To verify the effectiveness of our method, we compared the performance of a well-trained machine learning model using the extracted evaluation index with conventional demographic characteristics and time-related features. The well-trained model was validated using follow-up data (HC group: n=38; MCI group: n=32; and AD group: n=22). Results: We compared 5 candidate machine learning methods and selected random forest as the ideal model with the best performance (accuracy: 0.726 for HC vs MCI, 0.929 for HC vs AD, and 0.815 for AD vs MCI). Meanwhile, the well-trained classifier achieved better performance than the conventional assessment method, with high stability and accuracy of the follow-up data. Conclusions: The study demonstrated that a model combining both paper and electronic TMTs increases the accuracy of evaluating participants' cognitive impairment compared to conventional paper-based feature assessment. ", doi="10.2196/42637", url="https://www.jmir.org/2023/1/e42637", url="http://www.ncbi.nlm.nih.gov/pubmed/37294606" } @Article{info:doi/10.2196/43385, author="Klein, Britt and Nguyen, Huy and McLaren, Suzanne and Andrews, Brooke and Shandley, Kerrie", title="A Fully Automated Self-help Biopsychosocial Transdiagnostic Digital Intervention to Reduce Anxiety and/or Depression and Improve Emotional Regulation and Well-being: Pre--Follow-up Single-Arm Feasibility Trial", journal="JMIR Form Res", year="2023", month="May", day="30", volume="7", pages="e43385", keywords="anxiety", keywords="depression", keywords="fully automated", keywords="self-help", keywords="digital intervention", keywords="transdiagnostic", keywords="biopsychosocial", keywords="emotion regulation", keywords="allostatic load", keywords="brain plasticity", keywords="positive affect", keywords="comorbidity", abstract="Background: Anxiety disorders and depression are prevalent disorders with high comorbidity, leading to greater chronicity and severity of symptoms. Given the accessibility to treatment issues, more evaluation is needed to assess the potential benefits of fully automated self-help transdiagnostic digital interventions. Innovating beyond the current transdiagnostic one-size-fits-all shared mechanistic approach may also lead to further improvements. Objective: The primary objective of this study was to explore the preliminary effectiveness and acceptability of a new fully automated self-help biopsychosocial transdiagnostic digital intervention (Life Flex) aimed at treating anxiety and/or depression, as well as improving emotional regulation; emotional, social, and psychological well-being; optimism; and health-related quality of life. Methods: This was a real-world pre-during-post-follow-up feasibility trial design evaluation of Life Flex. Participants were assessed at the preintervention time point (week 0), during intervention (weeks 3 and 5), at the postintervention time point (week 8), and at 1- and 3-month follow-ups (weeks 12 and 20, respectively). Results: The results provided early support for the Life Flex program in reducing anxiety (Generalized Anxiety Disorder 7), depression (Patient Health Questionnaire 9), psychological distress (Kessler 6), and emotional dysregulation (Difficulties in Emotional Regulation 36) and increasing emotional, social, and psychological well-being (Mental Health Continuum---Short Form); optimism (Revised Life Orientation Test); and health-related quality of life (EQ-5D-3L Utility Index and Health Rating; all false discovery rate [FDR]<.001). Large within-group treatment effect sizes (range |d|=0.82 to 1.33) were found for most variables from pre- to postintervention assessments and at the 1- and 3-month follow-up. The exceptions were medium treatment effect sizes for EQ-5D-3L Utility Index (range Cohen d=?0.50 to ?0.63) and optimism (range Cohen d=?0.72 to ?0.79) and small-to-medium treatment effect size change for EQ-5D-3L Health Rating (range Cohen d=?0.34 to ?0.58). Changes across all outcome variables were generally strongest for participants with preintervention clinical comorbid anxiety and depression presentations (range |d|=0.58 to 2.01) and weakest for participants presenting with nonclinical anxiety and/or depressive symptoms (|d|=0.05 to 0.84). Life Flex was rated as acceptable at the postintervention time point, and participants indicated that they enjoyed the transdiagnostic program and biological, wellness, and lifestyle-focused content and strategies. Conclusions: Given the paucity of evidence on fully automated self-help transdiagnostic digital interventions for anxiety and/or depressive symptomatology and general treatment accessibility issues, this study provides preliminary support for biopsychosocial transdiagnostic interventions, such as Life Flex, as a promising future mental health service delivery gap filler. Following large-scale, randomized controlled trials, the potential benefits of fully automated self-help digital health programs, such as Life Flex, could be considerable. Trial Registration: Australian and New Zealand Clinical Trials Registry ACTRN12615000480583; https://www.anzctr.org.au/Trial/Registration/TrialReview.aspx?id=368007 ", doi="10.2196/43385", url="https://formative.jmir.org/2023/1/e43385", url="http://www.ncbi.nlm.nih.gov/pubmed/37252790" } @Article{info:doi/10.2196/45543, author="Oh, Won Jae and Kim, Mi Sun and Lee, Deokjong and Son, Nak-Hoon and Uh, Jinsun and Yoon, Hong Ju and Choi, Yukyung and Lee, San", title="Evaluating the Modified Patient Health Questionnaire-2 and Insomnia Severity Index-2 for Daily Digital Screening of Depression and Insomnia: Validation Study", journal="JMIR Ment Health", year="2023", month="May", day="22", volume="10", pages="e45543", keywords="Patient Health Questionnaire-2", keywords="PHQ-2", keywords="Insomnia Severity Index", keywords="ISI-2", keywords="depression", keywords="insomnia", keywords="mobile health", keywords="mobile phone", abstract="Background: The Patient Health Questionnaire-2 (PHQ-2) and Insomnia Severity Index-2 (ISI-2) are screening assessments that reflect the past 2-week experience of depression and insomnia, respectively. Retrospective assessment has been associated with reduced accuracy owing to recall bias. Objective: This study aimed to increase the reliability of responses by validating the use of the PHQ-2 and ISI-2 for daily screening. Methods: A total of 167 outpatients from the psychiatric department at the Yongin Severance Hospital participated in this study, of which 63 (37.7\%) were male and 104 (62.3\%) were female with a mean age of 35.1 (SD 12.1) years. Participants used a mobile app (``Mental Protector'') for 4 weeks and rated their depressive and insomnia symptoms daily on the modified PHQ-2 and ISI-2 scales. The validation assessments were conducted in 2 blocks, each with a fortnight response from the participants. The modified version of the PHQ-2 was evaluated against the conventional scales of the Patient Health Questionnaire-9 and the Korean version of the Center for Epidemiologic Studies Depression Scale--Revised. Results: According to the sensitivity and specificity analyses, an average score of 3.29 on the modified PHQ-2 was considered valid for screening for depressive symptoms. Similarly, the ISI-2 was evaluated against the conventional scale, Insomnia Severity Index, and a mean score of 3.50 was determined to be a valid threshold for insomnia symptoms when rated daily. Conclusions: This study is one of the first to propose a daily digital screening measure for depression and insomnia delivered through a mobile app. The modified PHQ-2 and ISI-2 were strong candidates for daily screening of depression and insomnia, respectively. ", doi="10.2196/45543", url="https://mental.jmir.org/2023/1/e45543", url="http://www.ncbi.nlm.nih.gov/pubmed/37213186" } @Article{info:doi/10.2196/39720, author="Bui, An Truong and Rosenfelt, Scott Cory and Whitlock, Hope Kerri and Leclercq, Mickael and Weber, Savannah and Droit, Arnaud and Wiebe, A. Sandra and Pei, Jacqueline and Bolduc, V. Francois", title="Long-term Memory Testing in Children With Typical Development and Neurodevelopmental Disorders: Remote Web-based Image Task Feasibility Study", journal="JMIR Pediatr Parent", year="2023", month="May", day="8", volume="6", pages="e39720", keywords="memory", keywords="neurodevelopmental disorder", keywords="autism spectrum disorder", keywords="intellectual disability", keywords="developmental delay", keywords="hippocampus", keywords="recognition", keywords="paired association learning", keywords="remote testing", keywords="autism", keywords="disorder", keywords="genetics", keywords="developmental", keywords="developmental disorder", keywords="game", keywords="remote", keywords="testing", keywords="diagnose", keywords="diagnosis", abstract="Background: Neurodevelopmental disorders (NDD) cause individuals to have difficulty in learning facts, procedures, or social skills. NDD has been linked to several genes, and several animal models have been used to identify potential therapeutic candidates based on specific learning paradigms for long-term and associative memory. In individuals with NDD, however, such testing has not been used so far, resulting in a gap in translating preclinical results to clinical practice. Objective: We aim to assess if individuals with NDD could be tested for paired association learning and long-term memory deficit, as shown in previous animal models. Methods: We developed an image-based paired association task, which can be performed at different time points using remote web-based testing, and evaluated its feasibility in children with typical development (TD), as well as NDD. We included 2 tasks: object recognition as a simpler task and paired association. Learning was tested immediately after training and also the next day for long-term memory. Results: We found that children aged 5-14 years with TD (n=128) and with NDD of different types (n=57) could complete testing using the Memory Game. Children with NDD showed deficits in both recognition and paired association tasks on the first day of learning, in both 5-9--year old (P<.001 and P=.01, respectively) and 10-14--year old groups (P=.001 and P<.001, respectively). The reaction times to stimuli showed no significant difference between individuals with TD or NDD. Children with NDD exhibited a faster 24-hour memory decay for the recognition task than those with TD in the 5-9--year old group. This trend is reversed for the paired association task. Interestingly, we found that children with NDD had their retention for recognition improved and matched with typically developing individuals by 10-14 years of age. The NDD group also showed improved retention deficits in the paired association task at 10-14 years of age compared to the TD group. Conclusions: We showed that web-based learning testing using simple picture association is feasible for children with TD, as well as with NDD. We showed how web-based testing allows us to train children to learn the association between pictures, as shown in immediate test results and those completed 1 day after. This is important as many models for learning deficits in NDD target both short- and long-term memory for therapeutic intervention. We also demonstrated that despite potential confounding factors, such as self-reported diagnosis bias, technical issues, and varied participation, the Memory Game shows significant differences between typically developing children and those with NDD. Future experiments will leverage this potential of web-based testing for larger cohorts and cross-validation with other clinical or preclinical cognitive tasks. ", doi="10.2196/39720", url="https://pediatrics.jmir.org/2023/1/e39720", url="http://www.ncbi.nlm.nih.gov/pubmed/37155237" } @Article{info:doi/10.2196/36590, author="Wen, Bingyang and Wang, Ning and Subbalakshmi, Koduvayur and Chandramouli, Rajarathnam", title="Revealing the Roles of Part-of-Speech Taggers in Alzheimer Disease Detection: Scientific Discovery Using One-Intervention Causal Explanation", journal="JMIR Form Res", year="2023", month="May", day="2", volume="7", pages="e36590", keywords="explainable machine learning", keywords="Alzheimer disease", keywords="natural language processing", keywords="causal inference", abstract="Background: Recently, rich computational methods that use deep learning or machine learning have been developed using linguistic biomarkers for the diagnosis of early-stage Alzheimer disease (AD). Moreover, some qualitative and quantitative studies have indicated that certain part-of-speech (PoS) features or tags could be good indicators of AD. However, there has not been a systematic attempt to discover the underlying relationships between PoS features and AD. Moreover, there has not been any attempt to quantify the relative importance of PoS features in detecting AD. Objective: Our goal was to disclose the underlying relationship between PoS features and AD, understand whether PoS features are useful in AD diagnosis, and explore which PoS features play a vital role in the diagnosis. Methods: The DementiaBank, containing 1049 transcripts from 208 patients with AD and 243 transcripts from 104 older control individuals, was used. A total of 27 PoS features were extracted from each record. Then, the relationship between AD and each of the PoS features was explored. A transformer-based deep learning model for AD prediction using PoS features was trained. Then, a global explainable artificial intelligence method was proposed and used to discover which PoS features were the most important in AD diagnosis using the transformer-based predictor. A global (model-level) feature importance measure was derived as a summary from the local (example-level) feature importance metric, which was obtained using the proposed causally aware counterfactual explanation method. The unique feature of this method is that it considers causal relations among PoS features and can, hence, preclude counterfactuals that are improbable and result in more reliable explanations. Results: The deep learning--based AD predictor achieved an accuracy of 92.2\% and an F1-score of 0.955 when distinguishing patients with AD from healthy controls. The proposed explanation method identified 12 PoS features as being important for distinguishing patients with AD from healthy controls. Of these 12 features, 3 (25\%) have been identified by other researchers in previous works in psychology and natural language processing. The remaining 75\% (9/12) of PoS features have not been previously identified. We believe that this is an interesting finding that can be used in creating tests that might aid in the diagnosis of AD. Note that although our method is focused on PoS features, it should be possible to extend it to more types of features, perhaps even those derived from other biomarkers, such as syntactic features. Conclusions: The high classification accuracy of the proposed deep learner indicates that PoS features are strong clues in AD diagnosis. There are 12 PoS features that are strongly tied to AD, and because language is a noninvasive and potentially cheap method for detecting AD, this work shows some promising directions in this field. ", doi="10.2196/36590", url="https://formative.jmir.org/2023/1/e36590", url="http://www.ncbi.nlm.nih.gov/pubmed/37129944" } @Article{info:doi/10.2196/37269, author="Sudre, Gustavo and Bagi{\'c}, I. Anto and Becker, T. James and Ford, P. John", title="An Emerging Screening Method for Interrogating Human Brain Function: Tutorial", journal="JMIR Form Res", year="2023", month="Apr", day="27", volume="7", pages="e37269", keywords="screening", keywords="brain function", keywords="cognition", keywords="magnetoencephalography", keywords="MEG", keywords="neuroimaging", keywords="tutorial", keywords="tool", keywords="cognitive test", keywords="signal", keywords="cognitive function", doi="10.2196/37269", url="https://formative.jmir.org/2023/1/e37269", url="http://www.ncbi.nlm.nih.gov/pubmed/37103988" } @Article{info:doi/10.2196/41005, author="Ghosh, Arka and Cherian, J. Rithwik and Wagle, Surbhit and Sharma, Parth and Kannan, R. Karthikeyan and Bajpai, Alok and Gupta, Nitin", title="An Unguided, Computerized Cognitive Behavioral Therapy Intervention (TreadWill) in a Lower Middle-Income Country: Pragmatic Randomized Controlled Trial", journal="J Med Internet Res", year="2023", month="Apr", day="26", volume="25", pages="e41005", keywords="computerized cognitive behavioral therapy", keywords="cCBT", keywords="depression", keywords="digital intervention", keywords="mobile phone", abstract="Background: Globally, most individuals who are susceptible to depression do not receive adequate or timely treatment. Unguided computerized cognitive behavioral therapy (cCBT) has the potential to bridge this treatment gap. However, the real-world effectiveness of unguided cCBT interventions, particularly in low- and middle-income countries (LMICs), remains inconclusive. Objective: In this study, we aimed to report the design and development of a new unguided cCBT--based multicomponent intervention, TreadWill, and its pragmatic evaluation. TreadWill was designed to be fully automated, engaging, easy to use, and accessible to LMICs. Methods: To evaluate the effectiveness of TreadWill and the engagement level, we performed a double-blind, fully remote, and randomized controlled trial with 598 participants in India and analyzed the data using a completer's analysis. Results: The users who completed at least half of the modules in TreadWill showed significant reduction in depression-related (P=.04) and anxiety-related (P=.02) symptoms compared with the waitlist control. Compared with a plain-text version with the same therapeutic content, the full-featured version of TreadWill showed significantly higher engagement (P=.01). Conclusions: Our study provides a new resource and evidence for the use of unguided cCBT as a scalable intervention in LMICs. Trial Registration: ClinicalTrials.gov NCT03445598; https://clinicaltrials.gov/ct2/show/NCT03445598 ", doi="10.2196/41005", url="https://www.jmir.org/2023/1/e41005", url="http://www.ncbi.nlm.nih.gov/pubmed/37099376" } @Article{info:doi/10.2196/41712, author="Pike, Kerryn and Moller, I. Carl and Bryant, Christina and Farrow, Maree and Dao, P. Duy and Ellis, A. Kathryn", title="Examination of the Feasibility, Acceptability, and Efficacy of the Online Personalised Training in Memory Strategies for Everyday Program for Older Adults: Single-Arm Pre-Post Trial", journal="J Med Internet Res", year="2023", month="Apr", day="20", volume="25", pages="e41712", keywords="cognition", keywords="learning", keywords="internet-based intervention", keywords="social support", keywords="subjective cognitive decline", keywords="mobile phone", abstract="Background: Memory strategy training for older adults helps maintain and improve cognitive health but is traditionally offered face-to-face, which is resource intensive, limits accessibility, and is challenging during a pandemic. Web-based interventions, such as the Online Personalised Training in Memory Strategies for Everyday (OPTIMiSE) program, may overcome such barriers. Objective: We report on OPTIMiSE's feasibility, acceptability, and efficacy. Methods: Australians aged ?60 years reporting subjective cognitive decline participated in this single-arm pre-post web-based intervention. OPTIMiSE is a 6-module web-based program offered over 8-weeks with a 3-month booster. It has a problem-solving approach to memory issues, focusing on psychoeducation about memory and aging, knowledge and practice of compensatory memory strategies, and personalized content related to individual priorities. We examined the feasibility (recruitment, attrition, and data collection), acceptability (recommendation to others, suggestions for improvement, and withdrawal reasons), and efficacy (change in goal satisfaction, strategy knowledge and use, self-reported memory, memory satisfaction and knowledge, and mood; thematic content analysis of the most significant change; and the application of knowledge and strategies in daily life) of OPTIMiSE. Results: OPTIMiSE was feasible, demonstrated by strong interest (633 individuals screened), a satisfactory level of attrition (158/312, 50.6\%), and minimal missing data from those completing the intervention. It was acceptable, with 97.4\% (150/154) of participants agreeing they would recommend OPTIMiSE, the main suggestion for improvement being more time to complete modules, and withdrawal reasons similar to those in in-person interventions. OPTIMiSE was also efficacious, with linear mixed-effects analyses revealing improvements, of moderate to large effect sizes, across all primary outcomes (all P<.001): memory goal satisfaction (Cohen d after course=1.24; Cohen d at 3-month booster=1.64), strategy knowledge (Cohen d after course=0.67; Cohen d at 3-month booster=0.72) and use (Cohen d after course=0.79; Cohen d at 3-month booster=0.90), self-reported memory (Cohen d after course=0.80; Cohen d at 3-month booster=0.83), memory satisfaction (Cohen d after course=1.25; Cohen d at 3-month booster=1.29) and knowledge (Cohen d after course=0.96; Cohen d at 3-month booster=0.26), and mood (Cohen d after course=?0.35; nonsignificant Cohen d at booster). Furthermore, the most significant changes reported by participants (strategy use, improvements in daily life, reduced concern about memory, confidence and self-efficacy, and sharing and shame busting with others) reflected the course objectives and were consistent with themes arising from previous in-person interventions. At the 3-month booster, many participants reported continued implementation of knowledge and strategies in their daily lives. Conclusions: This feasible, acceptable, and efficacious web-based program has the potential to enable access to evidence-based memory interventions for older adults worldwide. Notably, the changes in knowledge, beliefs, and strategy use continued beyond the initial program. This is particularly important for supporting the growing number of older adults living with cognitive concerns. Trial Registration: Australian New Zealand Clinical Trials Registry ACTRN12620000979954; https://tinyurl.com/34cdantv International Registered Report Identifier (IRRID): RR2-10.3233/ADR-200251 ", doi="10.2196/41712", url="https://www.jmir.org/2023/1/e41712", url="http://www.ncbi.nlm.nih.gov/pubmed/37079356" } @Article{info:doi/10.2196/45268, author="Costello, Jeremy and Kaur, Manpreet and Reformat, Z. Marek and Bolduc, V. Francois", title="Leveraging Knowledge Graphs and Natural Language Processing for Automated Web Resource Labeling and Knowledge Mobilization in Neurodevelopmental Disorders: Development and Usability Study", journal="J Med Internet Res", year="2023", month="Apr", day="17", volume="25", pages="e45268", keywords="knowledge graph", keywords="natural language processing", keywords="neurodevelopmental disorders", keywords="autism spectrum disorder", keywords="intellectual disability", keywords="attention deficit hyperactivity disorder", keywords="named entity recognition", keywords="topic modeling", keywords="aggregation operator", abstract="Background: Patients and families need to be provided with trusted information more than ever with the abundance of online information. Several organizations aim to build databases that can be searched based on the needs of target groups. One such group is individuals with neurodevelopmental disorders (NDDs) and their families. NDDs affect up to 18\% of the population and have major social and economic impacts. The current limitations in communicating information for individuals with NDDs include the absence of shared terminology and the lack of efficient labeling processes for web resources. Because of these limitations, health professionals, support groups, and families are unable to share, combine, and access resources. Objective: We aimed to develop a natural language--based pipeline to label resources by leveraging standard and free-text vocabularies obtained through text analysis, and then represent those resources as a weighted knowledge graph. Methods: Using a combination of experts and service/organization databases, we created a data set of web resources for NDDs. Text from these websites was scraped and collected into a corpus of textual data on NDDs. This corpus was used to construct a knowledge graph suitable for use by both experts and nonexperts. Named entity recognition, topic modeling, document classification, and location detection were used to extract knowledge from the corpus. Results: We developed a resource annotation pipeline using diverse natural language processing algorithms to annotate web resources and stored them in a structured knowledge graph. The graph contained 78,181 annotations obtained from the combination of standard terminologies and a free-text vocabulary obtained using topic modeling. An application of the constructed knowledge graph is a resource search interface using the ordered weighted averaging operator to rank resources based on a user query. Conclusions: We developed an automated labeling pipeline for web resources on NDDs. This work showcases how artificial intelligence--based methods, such as natural language processing and knowledge graphs for information representation, can enhance knowledge extraction and mobilization, and could be used in other fields of medicine. ", doi="10.2196/45268", url="https://www.jmir.org/2023/1/e45268", url="http://www.ncbi.nlm.nih.gov/pubmed/37067865" } @Article{info:doi/10.2196/37847, author="Moreno-Amador, Beatriz and Cervin, Matti and Mart{\'i}nez-Gonz{\'a}lez, Ernesto Agustin and Piqueras, A. Jose and ", title="Sensory Overresponsivity and Symptoms Across the Obsessive-Compulsive Spectrum: Web-Based Longitudinal Observational Study", journal="J Med Internet Res", year="2023", month="Apr", day="13", volume="25", pages="e37847", keywords="sensory symptoms", keywords="sensory overresponsivity", keywords="obsessive-compulsive", keywords="hair-pulling", keywords="skin-picking", keywords="hoarding", keywords="body dysmorphic", keywords="adolescents", keywords="adults", abstract="Background: Sensory overresponsivity (SOR) has emerged as a potential endophenotype in obsessive-compulsive disorder (OCD), but few studies have examined SOR in relation to the major symptom dimensions of OCD and to symptoms across the full obsessive-compulsive (OC) symptom spectrum. Objective: This study had 2 main objectives. First, we examined the psychometric properties of the SOR Scales in a community-based sample of Spanish adolescents and adults. Second, we identified how SOR difficulties are related to symptoms across the full OC spectrum (eg, OC, body dysmorphic, hoarding, skin-picking, and hair-pulling symptoms), including the heterogeneity of OC symptoms. Methods: We translated the SOR Scales into Spanish---a measure that assesses SOR across the 5 sensory modalities---and created a web-based version of the measure. A sample of 1454 adolescents and adults (mean age 23.84, SD 8.46 years) participated in the study, and 388 (26.69\%) participants completed the survey twice (approximately 8 months apart). The survey also contained a web-based measure that assesses symptoms across the full OC spectrum: harm and checking, taboo obsessions, contamination or cleaning, symmetry and ordering, body dysmorphic, hoarding, hair-pulling, and skin-picking symptoms. Results: The psychometric properties of the SOR Scales were excellent, and the test-retest reliability was adequate. All types of SOR were related to all major symptom dimensions of OCD and to all OC spectrum symptoms. Conclusions: SOR across the sensory modalities can be validly assessed using a web-based measure. SOR emerged as a pure transdiagnostic phenomenon in relation to symptoms across the OC spectrum, with no specific sensory modality being more strongly related to OC symptoms. SOR can shed much needed light on basic mechanisms that are important for the onset and maintenance of OC spectrum symptoms, and this study shows that large-scale web-based studies can aid in this endeavor. Future studies should examine whether SOR precedes or emerges alongside OC symptoms. ", doi="10.2196/37847", url="https://www.jmir.org/2023/1/e37847", url="http://www.ncbi.nlm.nih.gov/pubmed/37052983" } @Article{info:doi/10.2196/44325, author="Teferra, Gashaw Bazen and Rose, Jonathan", title="Predicting Generalized Anxiety Disorder From Impromptu Speech Transcripts Using Context-Aware Transformer-Based Neural Networks: Model Evaluation Study", journal="JMIR Ment Health", year="2023", month="Mar", day="28", volume="10", pages="e44325", keywords="mental health", keywords="generalized anxiety disorder", keywords="impromptu speech", keywords="linguistic features", keywords="anxiety prediction", keywords="neural networks", keywords="natural language processing", keywords="transformer models", keywords="mobile phone", abstract="Background: The ability to automatically detect anxiety disorders from speech could be useful as a screening tool for an anxiety disorder. Prior studies have shown that individual words in textual transcripts of speech have an association with anxiety severity. Transformer-based neural networks are models that have been recently shown to have powerful predictive capabilities based on the context of more than one input word. Transformers detect linguistic patterns and can be separately trained to make specific predictions based on these patterns. Objective: This study aimed to determine whether a transformer-based language model can be used to screen for generalized anxiety disorder from impromptu speech transcripts. Methods: A total of 2000 participants provided an impromptu speech sample in response to a modified version of the Trier Social Stress Test (TSST). They also completed the Generalized Anxiety Disorder 7-item (GAD-7) scale. A transformer-based neural network model (pretrained on large textual corpora) was fine-tuned on the speech transcripts and the GAD-7 to predict whether a participant was above or below a screening threshold of the GAD-7. We reported the area under the receiver operating characteristic curve (AUROC) on the test data and compared the results with a baseline logistic regression model using the Linguistic Inquiry and Word Count (LIWC) features as input. Using the integrated gradient method to determine specific words that strongly affect the predictions, we inferred specific linguistic patterns that influence the predictions. Results: The baseline LIWC-based logistic regression model had an AUROC value of 0.58. The fine-tuned transformer model achieved an AUROC value of 0.64. Specific words that were often implicated in the predictions were also dependent on the context. For example, the first-person singular pronoun ``I'' influenced toward an anxious prediction 88\% of the time and a nonanxious prediction 12\% of the time, depending on the context. Silent pauses in speech, also often implicated in predictions, influenced toward an anxious prediction 20\% of the time and a nonanxious prediction 80\% of the time. Conclusions: There is evidence that a transformer-based neural network model has increased predictive power compared with the single word--based LIWC model. We also showed that the use of specific words in a specific context---a linguistic pattern---is part of the reason for the better prediction. This suggests that such transformer-based models could play a useful role in anxiety screening systems. ", doi="10.2196/44325", url="https://mental.jmir.org/2023/1/e44325", url="http://www.ncbi.nlm.nih.gov/pubmed/36976636" } @Article{info:doi/10.2196/39917, author="Banerjee, Agnik and Mutlu, Cezmi Onur and Kline, Aaron and Surabhi, Saimourya and Washington, Peter and Wall, Paul Dennis", title="Training and Profiling a Pediatric Facial Expression Classifier for Children on Mobile Devices: Machine Learning Study", journal="JMIR Form Res", year="2023", month="Mar", day="21", volume="7", pages="e39917", keywords="edge computing", keywords="affective computing", keywords="autism spectrum disorder", keywords="autism", keywords="ASD", keywords="classifier", keywords="classification", keywords="model", keywords="algorithm", keywords="mobile health", keywords="computer vision", keywords="deep learning", keywords="machine learning for health", keywords="pediatrics", keywords="emotion recognition", keywords="mHealth", keywords="diagnostic tool", keywords="digital therapy", keywords="child", keywords="developmental disorder", keywords="smartphone", keywords="image analysis", keywords="machine learning", keywords="Image classification", keywords="neural network", abstract="Background: Implementing automated facial expression recognition on mobile devices could provide an accessible diagnostic and therapeutic tool for those who struggle to recognize facial expressions, including children with developmental behavioral conditions such as autism. Despite recent advances in facial expression classifiers for children, existing models are too computationally expensive for smartphone use. Objective: We explored several state-of-the-art facial expression classifiers designed for mobile devices, used posttraining optimization techniques for both classification performance and efficiency on a Motorola Moto G6 phone, evaluated the importance of training our classifiers on children versus adults, and evaluated the models' performance against different ethnic groups. Methods: We collected images from 12 public data sets and used video frames crowdsourced from the GuessWhat app to train our classifiers. All images were annotated for 7 expressions: neutral, fear, happiness, sadness, surprise, anger, and disgust. We tested 3 copies for each of 5 different convolutional neural network architectures: MobileNetV3-Small 1.0x, MobileNetV2 1.0x, EfficientNetB0, MobileNetV3-Large 1.0x, and NASNetMobile. We trained the first copy on images of children, second copy on images of adults, and third copy on all data sets. We evaluated each model against the entire Child Affective Facial Expression (CAFE) set and by ethnicity. We performed weight pruning, weight clustering, and quantize-aware training when possible and profiled each model's performance on the Moto G6. Results: Our best model, a MobileNetV3-Large network pretrained on ImageNet, achieved 65.78\% accuracy and 65.31\% F1-score on the CAFE and a 90-millisecond inference latency on a Moto G6 phone when trained on all data. This accuracy is only 1.12\% lower than the current state of the art for CAFE, a model with 13.91x more parameters that was unable to run on the Moto G6 due to its size, even when fully optimized. When trained solely on children, this model achieved 60.57\% accuracy and 60.29\% F1-score. When trained only on adults, the model received 53.36\% accuracy and 53.10\% F1-score. Although the MobileNetV3-Large trained on all data sets achieved nearly a 60\% F1-score across all ethnicities, the data sets for South Asian and African American children achieved lower accuracy (as much as 11.56\%) and F1-score (as much as 11.25\%) than other groups. Conclusions: With specialized design and optimization techniques, facial expression classifiers can become lightweight enough to run on mobile devices and achieve state-of-the-art performance. There is potentially a ``data shift'' phenomenon between facial expressions of children compared with adults; our classifiers performed much better when trained on children. Certain underrepresented ethnic groups (e.g., South Asian and African American) also perform significantly worse than groups such as European Caucasian despite similar data quality. Our models can be integrated into mobile health therapies to help diagnose autism spectrum disorder and provide targeted therapeutic treatment to children. ", doi="10.2196/39917", url="https://formative.jmir.org/2023/1/e39917", url="http://www.ncbi.nlm.nih.gov/pubmed/35962462" } @Article{info:doi/10.2196/36663, author="Campitelli, Anthony and Paulson, Sally and Gills, L. Josh and Jones, D. Megan and Madero, N. Erica and Myers, Jennifer and Glenn, M. Jordan and Gray, Michelle", title="A Novel Digital Digit-Symbol Substitution Test Measuring Processing Speed in Adults At Risk for Alzheimer Disease: Validation Study", journal="JMIR Aging", year="2023", month="Jan", day="27", volume="6", pages="e36663", keywords="Alzheimer disease", keywords="dementia", keywords="processing speed", keywords="digit-symbol substitution", keywords="aging", keywords="cognitive", abstract="Background: Assessing cognitive constructs affected by Alzheimer disease, such as processing speed (PS), is important to screen for potential disease and allow for early detection. Digital PS assessments have been developed to provide widespread, efficient cognitive testing, but all have been validated only based on the correlation between test scores. Best statistical practices dictate that concurrent validity should be assessed for agreement or equivalence rather than using correlation alone. Objective: This study aimed to assess the concurrent validity of a novel digital PS assessment against a gold-standard measure of PS. Methods: Adults aged 45-75 years (n=191) participated in this study. Participants completed the novel digital digit-symbol substitution test (DDSST) and the Repeatable Battery for the Assessment of Neuropsychological Status coding test (RBANS-C). The correlation between the test scores was determined using a Pearson product-moment correlation, and a difference in mean test scores between tests was checked for using a 2-tailed dependent samples t test. Data were analyzed for agreement between the 2 tests using Bland-Altman limits of agreement and equivalency using a two one-sided t tests (TOST) approach. Results: A significant moderate, positive correlation was found between DDSST and RBANS-C scores (r=.577; P<.001), and no difference in mean scores was detected between the tests (P=.93). Bias was nearly zero (0.04). Scores between the tests were found to display adequate agreement with 90\% of score differences falling between --22.66 and 22.75 (90\% limits of agreement=--22.91 to 22.99), and the scores were equivalent (P=.049). Conclusions: Analyses indicate that the DDSST is a valid digital assessment of PS. The DDSST appears to be a suitable option for widespread, immediate, and efficient PS testing. Trial Registration: ClinicalTrials.gov NCT04559789; https://clinicaltrials.gov/ct2/show/NCT04559789 ", doi="10.2196/36663", url="https://aging.jmir.org/2023/1/e36663", url="http://www.ncbi.nlm.nih.gov/pubmed/36705951" } @Article{info:doi/10.2196/42792, author="Yoshii, Kenta and Kimura, Daiki and Kosugi, Akihiro and Shinkawa, Kaoru and Takase, Toshiro and Kobayashi, Masatomo and Yamada, Yasunori and Nemoto, Miyuki and Watanabe, Ryohei and Ota, Miho and Higashi, Shinji and Nemoto, Kiyotaka and Arai, Tetsuaki and Nishimura, Masafumi", title="Screening of Mild Cognitive Impairment Through Conversations With Humanoid Robots: Exploratory Pilot Study", journal="JMIR Form Res", year="2023", month="Jan", day="13", volume="7", pages="e42792", keywords="mild cognitive impairment", keywords="Alzheimer disease", keywords="neuropsychiatric symptoms", keywords="neuropsychological assessment", keywords="simple screening", keywords="humanoid robot", keywords="robot", keywords="symptoms", keywords="neuropsychological", keywords="monitoring", abstract="Background: The rising number of patients with dementia has become a serious social problem worldwide. To help detect dementia at an early stage, many studies have been conducted to detect signs of cognitive decline by prosodic and acoustic features. However, many of these methods are not suitable for everyday use as they focus on cognitive function or conversational speech during the examinations. In contrast, conversational humanoid robots are expected to be used in the care of older people to help reduce the work of care and monitoring through interaction. Objective: This study focuses on early detection of mild cognitive impairment (MCI) through conversations between patients and humanoid robots without a specific examination, such as neuropsychological examination. Methods: This was an exploratory study involving patients with MCI and cognitively normal (CN) older people. We collected the conversation data during neuropsychological examination (Mini-Mental State Examination [MMSE]) and everyday conversation between a humanoid robot and 94 participants (n=47, 50\%, patients with MCI and n=47, 50\%, CN older people). We extracted 17 types of prosodic and acoustic features, such as the duration of response time and jitter, from these conversations. We conducted a statistical significance test for each feature to clarify the speech features that are useful when classifying people into CN people and patients with MCI. Furthermore, we conducted an automatic classification experiment using a support vector machine (SVM) to verify whether it is possible to automatically classify these 2 groups by the features identified in the statistical significance test. Results: We obtained significant differences in 5 (29\%) of 17 types of features obtained from the MMSE conversational speech. The duration of response time, the duration of silent periods, and the proportion of silent periods showed a significant difference (P<.001) and met the reference value r=0.1 (small) of the effect size. Additionally, filler periods (P<.01) and the proportion of fillers (P=.02) showed a significant difference; however, these did not meet the reference value of the effect size. In contrast, we obtained significant differences in 16 (94\%) of 17 types of features obtained from the everyday conversations with the humanoid robot. The duration of response time, the duration of speech periods, jitter (local, relative average perturbation [rap], 5-point period perturbation quotient [ppq5], difference of difference of periods [ddp]), shimmer (local, amplitude perturbation quotient [apq]3, apq5, apq11, average absolute differences between the amplitudes of consecutive periods [dda]), and F0cov (coefficient of variation of the fundamental frequency) showed a significant difference (P<.001). In addition, the duration of response time, the duration of silent periods, the filler period, and the proportion of fillers showed significant differences (P<.05). However, only jitter (local) met the reference value r=0.1 (small) of the effect size. In the automatic classification experiment for the classification of participants into CN and MCI groups, the results showed 66.0\% accuracy in the MMSE conversational speech and 68.1\% accuracy in everyday conversations with the humanoid robot. Conclusions: This study shows the possibility of early and simple screening for patients with MCI using prosodic and acoustic features from everyday conversations with a humanoid robot with the same level of accuracy as the MMSE. ", doi="10.2196/42792", url="https://formative.jmir.org/2023/1/e42792", url="http://www.ncbi.nlm.nih.gov/pubmed/36637896" } @Article{info:doi/10.2196/42416, author="Young, Ruth Stephanie and Lattie, Gardiner Emily and Berry, L. Andrew B. and Bui, Lynn and Byrne, Joseph Greg and Yoshino Benavente, Noelani Julia and Bass, Michael and Gershon, C. Richard and Wolf, S. Michael and Nowinski, J. Cindy", title="Remote Cognitive Screening Of Healthy Older Adults for Primary Care With the MyCog Mobile App: Iterative Design and Usability Evaluation", journal="JMIR Form Res", year="2023", month="Jan", day="10", volume="7", pages="e42416", keywords="human-centered design", keywords="mobile health", keywords="mHealth", keywords="usability", keywords="cognitive screening", keywords="older adults", keywords="mobile phone", abstract="Background: Annual cognitive screening in adults aged >65 years can improve early detection of cognitive impairment, yet less than half of all cases are identified in primary care. Time constraints in primary care settings present a major barrier to routine screening. A remote cognitive screener completed on a patient's own smartphone before a visit has the potential to save primary care clinics time, encourage broader screening practices, and increase early detection of cognitive decline. Objective: We described the iterative design and proposed the implementation of a remote cognitive screening app, MyCog Mobile, to be completed on a patient's smartphone before an annual wellness visit. The research questions were as follows: What would motivate primary care clinicians and clinic administrators to implement a remote cognitive screening process? How might we design a remote cognitive screener to fit well with existing primary care workflows? What would motivate an older adult patient to complete a cognitive screener on a smartphone before a primary care visit? How might we optimize the user experience of completing a remote cognitive screener on a smartphone for older adults? Methods: To address research questions 1 and 2, we conducted individual interviews with clinicians (n=5) and clinic administrators (n=3). We also collaborated with clinic administrators to create user journey maps of their existing and proposed MyCog Mobile workflows. To address research questions 3 and 4, we conducted individual semistructured interviews with cognitively healthy older adults (n=5) and solicited feedback from a community stakeholder panel (n=11). We also tested and refined high-fidelity prototypes of the MyCog Mobile app with the older adult interview participants, who rated the usability on the Simplified System Usability Scale and After-Scenario Questionnaire. Results: Clinicians and clinic administrators were motivated to adopt a remote cognitive screening process if it saved time in their workflows. Findings from interviews and user journey mapping informed the proposed implementation and core functionality of MyCog Mobile. Older adult participants were motivated to complete cognitive screeners to ensure that they were cognitively healthy and saw additional benefits to remote screening, such as saving time during their visit and privacy. Older adults also identified potential challenges to remote smartphone screening, which informed the user experience design of the MyCog Mobile app. The average rating across prototype versions was 91 (SD 5.18) on the Simplified System Usability Scale and 6.13 (SD 8.40) on the After-Scenario Questionnaire, indicating above-average usability. Conclusions: Through an iterative, human-centered design process, we developed a viable remote cognitive screening app and proposed an implementation strategy for primary care settings that was optimized for multiple stakeholders. The next steps include validating the cognitive screener in clinical and healthy populations and piloting the finalized app in a community primary care clinic. ", doi="10.2196/42416", url="https://formative.jmir.org/2023/1/e42416", url="http://www.ncbi.nlm.nih.gov/pubmed/36626223" } @Article{info:doi/10.2196/26665, author="Ha{\ss}denteufel, Kathrin and Lingenfelder, Katrin and Schwarze, E. Cornelia and Feisst, Manuel and Brusniak, Katharina and Matthies, Maria Lina and Goetz, Maren and Wallwiener, Markus and Wallwiener, Stephanie", title="Evaluation of Repeated Web-Based Screening for Predicting Postpartum Depression: Prospective Cohort Study", journal="JMIR Ment Health", year="2021", month="Dec", day="10", volume="8", number="12", pages="e26665", keywords="postpartum depression", keywords="Edinburgh Postnatal Depression Scale", keywords="screening", keywords="pregnancy", keywords="algorithm", abstract="Background: Postpartum depression (PPD) is a severe mental disorder that often results in poor maternal-infant attachment and negatively impacts infant development. Universal screening has recently been recommended to identify women at risk, but the optimal screening time during pregnancy has not been defined so far. Thus, web-based technologies with widespread use among women of childbearing age create new opportunities to detect pregnancies with a high risk for adverse mental health outcomes at an early stage. Objective: The aim of this study was to stratify the risk for PPD and to determine the optimal screening time during pregnancy by using a web-based screening tool collecting electronic patient-reported outcomes (ePROs) as the basis for a screening algorithm. Methods: In total, 214 women were repeatedly tested for depressive symptoms 5 times during and 3 times after pregnancy by using the Edinburgh Postnatal Depression Scale (EPDS), accessible on a web-based pregnancy platform, developed by the authors of this study. For each prenatal assessment, the area under the curve (AUC), sensitivity, specificity, and predictive values for PPD were calculated. Multivariate logistic regression analyses were applied to identify further potential predictors, such as age, education, parity, relationship quality, and anxiety, to increase predictive accuracy. Results: Digitally collected data from 214 pregnant women were analyzed. The predictive accuracy of depressive symptoms 3 and 6 months postpartum was reasonable to good regarding the screening in the second (AUC=0.85) and third (AUC=0.75) trimester. The multivariate logistic regression analyses resulted in an excellent AUC of 0.93 at 3 months and a good AUC of 0.87 at 6 months postpartum. Conclusions: The best predictive accuracy for PPD has been shown for screening between the 24th and the 28th gestational week (GW) and seems to be beneficial for identifying women at risk. In combination with the aforementioned predictive factors, the discriminatory power improved, particularly at 3 months postpartum. Screening for depression during pregnancy, combined with the women's personal risk profile, can be used as a starting point for developing a digital screening algorithm. Thereby, web-based assessment tools constitute feasible, efficient, and cost-effective approaches. Thus, they seem to be beneficial in detecting high-risk pregnancies in order to improve maternal and infant birth outcomes in the long term. ", doi="10.2196/26665", url="https://mental.jmir.org/2021/12/e26665", url="http://www.ncbi.nlm.nih.gov/pubmed/34890349" } @Article{info:doi/10.2196/30439, author="Smrke, Ur{\vs}ka and Mlakar, Izidor and Lin, Simon and Musil, Bojan and Plohl, Nejc", title="Language, Speech, and Facial Expression Features for Artificial Intelligence--Based Detection of Cancer Survivors' Depression: Scoping Meta-Review", journal="JMIR Ment Health", year="2021", month="Dec", day="6", volume="8", number="12", pages="e30439", keywords="artificial intelligence", keywords="cancer", keywords="depression", keywords="facial expression", keywords="language", keywords="oncology", keywords="review", keywords="screening", keywords="speech", keywords="symptom", abstract="Background: Cancer survivors often experience disorders from the depressive spectrum that remain largely unrecognized and overlooked. Even though screening for depression is recognized as essential, several barriers prevent its successful implementation. It is possible that better screening options can be developed. New possibilities have been opening up with advances in artificial intelligence and increasing knowledge on the connection of observable cues and psychological states. Objective: The aim of this scoping meta-review was to identify observable features of depression that can be intercepted using artificial intelligence in order to provide a stepping stone toward better recognition of depression among cancer survivors. Methods: We followed a methodological framework for scoping reviews. We searched SCOPUS and Web of Science for relevant papers on the topic, and data were extracted from the papers that met inclusion criteria. We used thematic analysis within 3 predefined categories of depression cues (ie, language, speech, and facial expression cues) to analyze the papers. Results: The search yielded 1023 papers, of which 9 met the inclusion criteria. Analysis of their findings resulted in several well-supported cues of depression in language, speech, and facial expression domains, which provides a comprehensive list of observable features that are potentially suited to be intercepted by artificial intelligence for early detection of depression. Conclusions: This review provides a synthesis of behavioral features of depression while translating this knowledge into the context of artificial intelligence--supported screening for depression in cancer survivors. ", doi="10.2196/30439", url="https://mental.jmir.org/2021/12/e30439", url="http://www.ncbi.nlm.nih.gov/pubmed/34874883" } @Article{info:doi/10.2196/30919, author="Yan, Mingli and Yin, Huiru and Meng, Qiuyan and Wang, Shuo and Ding, Yiwen and Li, Guichen and Wang, Chunyan and Chen, Li", title="A Virtual Supermarket Program for the Screening of Mild Cognitive Impairment in Older Adults: Diagnostic Accuracy Study", journal="JMIR Serious Games", year="2021", month="Dec", day="3", volume="9", number="4", pages="e30919", keywords="virtual reality", keywords="mild cognitive impairment", keywords="dementia", keywords="ambient intelligence", keywords="digital health", keywords="elderly population", keywords="aging", abstract="Background: Mild cognitive impairment (MCI) is often a precursor of dementia, and patients with MCI develop dementia at a higher rate than healthy older adults. Early detection of cognitive decline at the MCI stage supports better planning of care and interventions. At present, the use of virtual reality (VR) in screening for MCI in older adults is promising, but there is little evidence regarding the use of virtual supermarkets to screen for MCI. Objective: The objectives of this study are to validate a VR game--based test, namely, the Virtual Supermarket Program (VSP), for differentiating patients with MCI and healthy controls and to identify cutoff scores for different age levels. Methods: Subjects were recruited from several nursing homes and communities in Changchun, China. They were divided into a healthy control group (n=64) and an MCI group (n=62). All subjects were administered the VSP and a series of neuropsychological examinations. The study determined the optimal cutoff, discriminating validity, concurrent validity, and retest reliability of the VSP. We used the area under the receiver operating characteristic curve (AUC) to evaluate the discriminating validity and obtain the optimal cutoff values. Pearson correlation analysis and the intraclass correlation coefficient were used to evaluate the concurrent validity and retest reliability, respectively. Results: A cutoff score of 46.4 was optimal for the entire sample, yielding a sensitivity of 85.9\% and specificity of 79.0\% for differentiating individuals with MCI and healthy controls, and the AUC was 0.870 (95\% CI 0.799-0.924). The median index of VSP score was 51.1 (range 42.6-60.0). There was a moderate positive correlation between the VSP total score and Mini-Mental State Examination score (r=0.429, P<.001). There was a strong positive correlation between VSP total score and Montreal Cognitive Assessment score (r=0.645, P<.001). The retest reliability of the VSP was feasible (r=0.588, P=.048). Conclusions: The VSP is interesting and feasible for subjects. It shows high sensitivity and specificity for the identification of MCI in older adults, which makes it a promising screening method. The VSP may be generalized to older adults in other countries, although some cultural adaptation may be necessary. Trial Registration: Chinese Clinical Trial Registry ChiCTR2000040074; https://www.chictr.org.cn/showprojen.aspx?proj=64639 ", doi="10.2196/30919", url="https://games.jmir.org/2021/4/e30919", url="http://www.ncbi.nlm.nih.gov/pubmed/34870610" } @Article{info:doi/10.2196/31053, author="van Gils, M. Aniek and Visser, NC Leonie and Hendriksen, MA Heleen and Georges, Jean and Muller, Majon and Bouwman, H. Femke and van der Flier, M. Wiesje and Rhodius-Meester, FM Hanneke", title="Assessing the Views of Professionals, Patients, and Care Partners Concerning the Use of Computer Tools in Memory Clinics: International Survey Study", journal="JMIR Form Res", year="2021", month="Dec", day="3", volume="5", number="12", pages="e31053", keywords="artificial intelligence", keywords="clinical decision support systems", keywords="dementia", keywords="diagnostic testing", keywords="diagnosis", keywords="prognosis", keywords="communication", abstract="Background: Computer tools based on artificial intelligence could aid clinicians in memory clinics in several ways, such as by supporting diagnostic decision-making, web-based cognitive testing, and the communication of diagnosis and prognosis. Objective: This study aims to identify the preferences as well as the main barriers and facilitators related to using computer tools in memory clinics for all end users, that is, clinicians, patients, and care partners. Methods: Between July and October 2020, we sent out invitations to a web-based survey to clinicians using the European Alzheimer's Disease Centers network and the Dutch Memory Clinic network, and 109 clinicians participated (mean age 45 years, SD 10; 53/109, 48.6\% female). A second survey was created for patients and care partners. They were invited via Alzheimer Europe, Alzheimer's Society United Kingdom, Amsterdam Dementia Cohort, and Amsterdam Aging Cohort. A total of 50 patients with subjective cognitive decline, mild cognitive impairment, or dementia (mean age 73 years, SD 8; 17/34, 34\% female) and 46 care partners (mean age 65 years, SD 12; 25/54, 54\% female) participated in this survey. Results: Most clinicians reported a willingness to use diagnostic (88/109, 80.7\%) and prognostic (83/109, 76.1\%) computer tools. User-friendliness (71/109, 65.1\%); Likert scale mean 4.5, SD 0.7), and increasing diagnostic accuracy (76/109, 69.7\%; mean 4.3, SD 0.7) were reported as the main factors stimulating the adoption of a tool. Tools should also save time and provide clear information on reliability and validity. Inadequate integration with electronic patient records (46/109, 42.2\%; mean 3.8, SD 1.0) and fear of losing important clinical information (48/109, 44\%; mean 3.7, SD 1.2) were most frequently indicated as barriers. Patients and care partners were equally positive about the use of computer tools by clinicians, both for diagnosis (69/96, 72\%) and prognosis (73/96, 76\%). In addition, most of them thought favorably regarding the possibility of using the tools themselves. Conclusions: This study showed that computer tools in memory clinics are positively valued by most end users. For further development and implementation, it is essential to overcome the technical and practical barriers of a tool while paying utmost attention to its reliability and validity. ", doi="10.2196/31053", url="https://formative.jmir.org/2021/12/e31053", url="http://www.ncbi.nlm.nih.gov/pubmed/34870612" } @Article{info:doi/10.2196/31127, author="Piqueras, A. Jose and Vidal-Arenas, Ver{\'o}nica and Falc{\'o}, Raquel and Moreno-Amador, Beatriz and Marzo, C. Juan and Holcomb, M. Juliana and Murphy, Michael", title="Short Form of the Pediatric Symptom Checklist-Youth Self-Report (PSC-17-Y): Spanish Validation Study", journal="J Med Internet Res", year="2021", month="Dec", day="1", volume="23", number="12", pages="e31127", keywords="PSC-17-Y", keywords="psychometric properties", keywords="screening", keywords="mental problems", keywords="adolescents", keywords="adolescent health", keywords="adolescent medicine", keywords="psychiatry", keywords="psychology", keywords="psychosocial issues", abstract="Background: The short form, 17-item version of the Pediatric Symptom Checklist-Youth Self-Report (PSC-17-Y) is a validated measure that assesses psychosocial problems overall (OVR) and in 3 major psychopathological domains (internalizing, externalizing, and attention-deficit/hyperactivity disorder), taking 5-10 min to complete. Prior research has established sound psychometric properties of the PSC-17-Y for English speakers. Objective: This study extends psychometric evidence for the acceptability of the PSC-17-Y in a large sample of Spanish adolescents, providing proof of its reliability and structure, convergent and discriminant validity, and longitudinal and gender invariance. Methods: Data were collected on 5430 adolescents, aged 12-18 years, who filled out the PSC-17-Y twice during 2018-2019 (7-month interval). We calculated the Cronbach alpha and the McDonald omega coefficients to test reliability, the Pearson correlation for convergent (distress) and criterion validity (well-being, quality of life, and socioemotional skills), confirmatory factor analysis (CFA) for structure validity, and multigroup and longitudinal measurement invariance analysis for longitudinal and gender stability. Results: Within structural analysis for the PSC-17-Y, CFA supported a correlated 3-factor solution, which was also invariant longitudinally and across gender. All 3 subscales showed evidence of reliability, with coefficients near or above .70. Moreover, scores of PSC-17-Y subscales were positively related with convergent measures and negatively related with criterion measures. Normative data for the PSC-17-Y are presented in the form of percentiles (75th and 90th). Conclusions: This work provides the first evidence of the reliability and validity of the Spanish version of the PSC-17-Y administered over the internet to assess mental health problems among adolescents, maintaining the same domains as the long version. ", doi="10.2196/31127", url="https://www.jmir.org/2021/12/e31127", url="http://www.ncbi.nlm.nih.gov/pubmed/34855614" } @Article{info:doi/10.2196/29838, author="Saqib, Kiran and Khan, Fozia Amber and Butt, Ahmad Zahid", title="Machine Learning Methods for Predicting Postpartum Depression: Scoping Review", journal="JMIR Ment Health", year="2021", month="Nov", day="24", volume="8", number="11", pages="e29838", keywords="machine learning", keywords="postpartum depression", keywords="big data", keywords="mobile phone", abstract="Background: Machine learning (ML) offers vigorous statistical and probabilistic techniques that can successfully predict certain clinical conditions using large volumes of data. A review of ML and big data research analytics in maternal depression is pertinent and timely, given the rapid technological developments in recent years. Objective: This study aims to synthesize the literature on ML and big data analytics for maternal mental health, particularly the prediction of postpartum depression (PPD). Methods: We used a scoping review methodology using the Arksey and O'Malley framework to rapidly map research activity in ML for predicting PPD. Two independent researchers searched PsycINFO, PubMed, IEEE Xplore, and the ACM Digital Library in September 2020 to identify relevant publications in the past 12 years. Data were extracted from the articles' ML model, data type, and study results. Results: A total of 14 studies were identified. All studies reported the use of supervised learning techniques to predict PPD. Support vector machine and random forest were the most commonly used algorithms in addition to Naive Bayes, regression, artificial neural network, decision trees, and XGBoost (Extreme Gradient Boosting). There was considerable heterogeneity in the best-performing ML algorithm across the selected studies. The area under the receiver operating characteristic curve values reported for different algorithms were support vector machine (range 0.78-0.86), random forest method (0.88), XGBoost (0.80), and logistic regression (0.93). Conclusions: ML algorithms can analyze larger data sets and perform more advanced computations, which can significantly improve the detection of PPD at an early stage. Further clinical research collaborations are required to fine-tune ML algorithms for prediction and treatment. ML might become part of evidence-based practice in addition to clinical knowledge and existing research evidence. ", doi="10.2196/29838", url="https://mental.jmir.org/2021/11/e29838", url="http://www.ncbi.nlm.nih.gov/pubmed/34822337" } @Article{info:doi/10.2196/30249, author="Kim, Euisung and Han, Jieun and Choi, Hojin and Pri{\'e}, Yannick and Vigier, Toinon and Bulteau, Samuel and Kwon, Hyun Gyu", title="Examining the Academic Trends in Neuropsychological Tests for Executive Functions Using Virtual Reality: Systematic Literature Review", journal="JMIR Serious Games", year="2021", month="Nov", day="24", volume="9", number="4", pages="e30249", keywords="virtual reality", keywords="neuropsychological test", keywords="executive function", keywords="cognitive ability", keywords="brain disorder", keywords="immersive", keywords="digital health", keywords="cognition", keywords="academic trends", keywords="neurology", abstract="Background: In neuropsychology, fully immersive virtual reality (VR) has been spotlighted as a promising tool. It is considered that VR not only overcomes the existing limitation of neuropsychological tests but is also appropriate for treating executive functions (EFs) within activities of daily living (ADL) due to its high ecological validity. While fully immersive VR offers new possibilities of neuropsychological tests, there are few studies that overview the intellectual landscape and academic trends in the research related to mainly targeted EFs with fully immersive VR. Objective: The objective of this study is to get an overview of the research trends that use VR in neuropsychological tests and to analyze the research trends using fully immersive VR neuropsychological tests with experimental articles. Methods: This review was carried out according to Preferred Reporting Items for Systematic Reviews and Meta-Analyses (PRISMA) guidelines. Articles were searched in three web databases using keywords related to VR, EFs, and cognitive abilities. The study was conducted in two steps, keyword analysis and in-depth systematic review. In the web database search from 2000 to 2019, 1167 articles were initially collected, of which 234 articles in the eligibility phase were used to conduct keyword analysis and a total of 47 articles were included for systematic review. Results: In keyword analysis, the number of articles focused on dementia including the keywords ``MCI,'' ``SCD,'' and ``dementia'' were highlighted over the period, rather than other symptoms. In addition, we identified that the use of behavioral and physiological data in virtual environments (VEs) has dramatically increased in recent studies. In the systematic review, we focused on the purpose of study, assessment, treatment, and validation of usability and structure. We found that treatment studies and uncategorized studies including presence and cybersickness issues have emerged in the recent period. In addition, the target symptoms and range of participants were diversified. Conclusions: There has been a continuously increasing interest in dealing with neuropsychology by using fully immersive VR. Target cognitive abilities have been diversified, as well as target symptoms. Moreover, the concept of embodied cognition was transplanted in this research area. ", doi="10.2196/30249", url="https://games.jmir.org/2021/4/e30249", url="http://www.ncbi.nlm.nih.gov/pubmed/34822341" } @Article{info:doi/10.2196/29749, author="Jan, Zainab and AI-Ansari, Noor and Mousa, Osama and Abd-alrazaq, Alaa and Ahmed, Arfan and Alam, Tanvir and Househ, Mowafa", title="The Role of Machine Learning in Diagnosing Bipolar Disorder: Scoping Review", journal="J Med Internet Res", year="2021", month="Nov", day="19", volume="23", number="11", pages="e29749", keywords="machine learning", keywords="bipolar disorder", keywords="diagnosis", keywords="support vector machine", keywords="clinical data", keywords="mental health", keywords="scoping review", abstract="Background: Bipolar disorder (BD) is the 10th most common cause of frailty in young individuals and has triggered morbidity and mortality worldwide. Patients with BD have a life expectancy 9 to 17 years lower than that of normal people. BD is a predominant mental disorder, but it can be misdiagnosed as depressive disorder, which leads to difficulties in treating affected patients. Approximately 60\% of patients with BD are treated for depression. However, machine learning provides advanced skills and techniques for better diagnosis of BD. Objective: This review aims to explore the machine learning algorithms used for the detection and diagnosis of bipolar disorder and its subtypes. Methods: The study protocol adopted the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews) guidelines. We explored 3 databases, namely Google Scholar, ScienceDirect, and PubMed. To enhance the search, we performed backward screening of all the references of the included studies. Based on the predefined selection criteria, 2 levels of screening were performed: title and abstract review, and full review of the articles that met the inclusion criteria. Data extraction was performed independently by all investigators. To synthesize the extracted data, a narrative synthesis approach was followed. Results: We retrieved 573 potential articles were from the 3 databases. After preprocessing and screening, only 33 articles that met our inclusion criteria were identified. The most commonly used data belonged to the clinical category (19, 58\%). We identified different machine learning models used in the selected studies, including classification models (18, 55\%), regression models (5, 16\%), model-based clustering methods (2, 6\%), natural language processing (1, 3\%), clustering algorithms (1, 3\%), and deep learning--based models (3, 9\%). Magnetic resonance imaging data were most commonly used for classifying bipolar patients compared to other groups (11, 34\%), whereas microarray expression data sets and genomic data were the least commonly used. The maximum ratio of accuracy was 98\%, whereas the minimum accuracy range was 64\%. Conclusions: This scoping review provides an overview of recent studies based on machine learning models used to diagnose patients with BD regardless of their demographics or if they were compared to patients with psychiatric diagnoses. Further research can be conducted to provide clinical decision support in the health industry. ", doi="10.2196/29749", url="https://www.jmir.org/2021/11/e29749", url="http://www.ncbi.nlm.nih.gov/pubmed/34806996" } @Article{info:doi/10.2196/33599, author="Zong, Hui and Hu, Binyang and Han, Yang and Li, Zuofeng and Zhang, Xiaoyan", title="Prevalence and Temporal Trends Analysis of Screening and Diagnostic Instruments in Posttraumatic Stress Disorder: Text Mining Study", journal="JMIR Ment Health", year="2021", month="Nov", day="17", volume="8", number="11", pages="e33599", keywords="posttraumatic stress disorder", keywords="instruments", keywords="prevalence", keywords="clinical trials", keywords="text mining", abstract="Background: Various instruments for patient screening and diagnosis have been developed for and applied in posttraumatic stress disorder (PTSD). Objective: This study comprehensively investigates the prevalence and temporal trends of the most widely used instruments in PTSD-related studies. Methods: A total of 1345 files of registered clinical trials from ClinicalTrials.gov and 9422 abstracts from the PubMed database from 2005 to 2020 were downloaded for this study. The instruments applied in clinical trials were manually annotated, and instruments in abstracts were recognized using exact string matching. The prevalence score of an instrument in a certain period was calculated as the number of studies divided by the number of instances of the instrument. By calculating the yearly prevalence index of each instrument, we conducted a trends analysis and compared the trends in index change between instruments. Results: A total of 4178 instrument synonyms were annotated, which were mapped to 1423 unique instruments. In the 16 years from 2005 to 2020, only 10 instruments were used more than once per year; the 4 most used instruments were the PTSD Checklist, the Clinician-Administered PTSD Disorder Scale, the Patient Health Questionnaire, and the Beck Depression Inventory. There were 18 instruments whose yearly prevalence index score exceeded 0.1 at least once during the 16 years. The changes in trends and time points of partial instruments in clinical trials and PubMed abstracts were highly consistent. The average time duration of a PTSD-related trial was 1495.5 days or approximately 4 years from submission to ClinicalTrial.gov to publication in a journal. Conclusions: The application of widely accepted and appropriate instruments can help improve the reliability of research results in PTSD-related clinical studies. With extensive text data obtained from real clinical trials and published articles, we investigated and compared the usage of instruments in the PTSD research community. ", doi="10.2196/33599", url="https://mental.jmir.org/2021/11/e33599", url="http://www.ncbi.nlm.nih.gov/pubmed/34666307" } @Article{info:doi/10.2196/30313, author="Lynch, William and Platt, L. Michael and Pardes, Adam", title="Development of a Severity Score and Comparison With Validated Measures for Depression and Anxiety: Validation Study", journal="JMIR Form Res", year="2021", month="Nov", day="10", volume="5", number="11", pages="e30313", keywords="PHQ-9", keywords="GAD-7", keywords="depression assessment", keywords="anxiety assessment", keywords="measurement-based care", keywords="integrated behavioral health", abstract="Background: Less than 10\% of the individuals seeking behavioral health care receive measurement-based care (MBC). Technology has the potential to implement MBC in a secure and efficient manner. To test this idea, a mobile health (mHealth) platform was developed with the goal of making MBC easier to deliver by clinicians and more accessible to patients within integrated behavioral health care. Data from over 3000 users of the mHealth platform were used to develop an output severity score, a robust screening measure for depression and anxiety. Objective: The aim of this study is to compare severity scores with scores from validated assessments for depression and anxiety and scores from clinician review to evaluate the potential added value of this new measure. Methods: The severity score uses patient-reported and passively collected data related to behavioral health on an mHealth platform. An artificial intelligence--derived algorithm was developed that condenses behavioral health data into a single, quantifiable measure for longitudinal tracking of an individual's depression and anxiety symptoms. Linear regression and Bland-Altman analyses were used to evaluate the relationships and differences between severity scores and Personal Health Questionnaire-9 (PHQ-9) or Generalized Anxiety Disorder-7 (GAD-7) scores from over 35,000 mHealth platform users. The severity score was also compared with a review by a panel of expert clinicians for a subset of 250 individuals. Results: Linear regression results showed a strong correlation between the severity score and PHQ-9 (r=0.74; P<.001) and GAD-7 (r=0.80; P<.001) changes. A strong positive correlation was also found between the severity score and expert panel clinical review (r=0.80-0.84; P<.001). However, Bland-Altman analysis and the evaluation of outliers on regression analysis showed that the severity score was significantly different from the PHQ-9. Conclusions: Clinicians can reliably use the mHealth severity score as a proxy measure for screening and monitoring behavioral health symptoms longitudinally. The severity score may identify at-risk individuals who are not identified by the PHQ-9. Further research is warranted to evaluate the sensitivity and specificity of the severity score. ", doi="10.2196/30313", url="https://formative.jmir.org/2021/11/e30313", url="http://www.ncbi.nlm.nih.gov/pubmed/34757319" } @Article{info:doi/10.2196/27908, author="Martin-Key, A. Nayra and Mirea, Dan-Mircea and Olmert, Tony and Cooper, Jason and Han, Sarah Sung Yeon and Barton-Owen, Giles and Farrag, Lynn and Bell, Emily and Eljasz, Pawel and Cowell, Daniel and Tomasik, Jakub and Bahn, Sabine", title="Toward an Extended Definition of Major Depressive Disorder Symptomatology: Digital Assessment and Cross-validation Study", journal="JMIR Form Res", year="2021", month="Oct", day="28", volume="5", number="10", pages="e27908", keywords="major depressive disorder", keywords="subthreshold depression, transdiagnostic symptoms", keywords="digital assessment", keywords="digital mental health", keywords="mobile phone", abstract="Background: Diagnosing major depressive disorder (MDD) is challenging, with diagnostic manuals failing to capture the wide range of clinical symptoms that are endorsed by individuals with this condition. Objective: This study aims to provide evidence for an extended definition of MDD symptomatology. Methods: Symptom data were collected via a digital assessment developed for a delta study. Random forest classification with nested cross-validation was used to distinguish between individuals with MDD and those with subthreshold symptomatology of the disorder using disorder-specific symptoms and transdiagnostic symptoms. The diagnostic performance of the Patient Health Questionnaire--9 was also examined. Results: A depression-specific model demonstrated good predictive performance when distinguishing between individuals with MDD (n=64) and those with subthreshold depression (n=140) (area under the receiver operating characteristic curve=0.89; sensitivity=82.4\%; specificity=81.3\%; accuracy=81.6\%). The inclusion of transdiagnostic symptoms of psychopathology, including symptoms of depression, generalized anxiety disorder, insomnia, emotional instability, and panic disorder, significantly improved the model performance (area under the receiver operating characteristic curve=0.95; sensitivity=86.5\%; specificity=90.8\%; accuracy=89.5\%). The Patient Health Questionnaire--9 was excellent at identifying MDD but overdiagnosed the condition (sensitivity=92.2\%; specificity=54.3\%; accuracy=66.2\%). Conclusions: Our findings are in line with the notion that current diagnostic practices may present an overly narrow conception of mental health. Furthermore, our study provides proof-of-concept support for the clinical utility of a digital assessment to inform clinical decision-making in the evaluation of MDD. ", doi="10.2196/27908", url="https://formative.jmir.org/2021/10/e27908", url="http://www.ncbi.nlm.nih.gov/pubmed/34709182" } @Article{info:doi/10.2196/27706, author="Cilia, Federica and Carette, Romuald and Elbattah, Mahmoud and Dequen, Gilles and Gu{\'e}rin, Jean-Luc and Bosche, J{\'e}r{\^o}me and Vandromme, Luc and Le Driant, Barbara", title="Computer-Aided Screening of Autism Spectrum Disorder: Eye-Tracking Study Using Data Visualization and Deep Learning", journal="JMIR Hum Factors", year="2021", month="Oct", day="25", volume="8", number="4", pages="e27706", keywords="autism spectrum disorder", keywords="screening", keywords="eye tracking", keywords="data visualization", keywords="machine learning", keywords="deep learning", keywords="AI", keywords="ASS", keywords="artificial intelligence", keywords="ML", keywords="adolescent", keywords="diagnosis", abstract="Background: The early diagnosis of autism spectrum disorder (ASD) is highly desirable but remains a challenging task, which requires a set of cognitive tests and hours of clinical examinations. In addition, variations of such symptoms exist, which can make the identification of ASD even more difficult. Although diagnosis tests are largely developed by experts, they are still subject to human bias. In this respect, computer-assisted technologies can play a key role in supporting the screening process. Objective: This paper follows on the path of using eye tracking as an integrated part of screening assessment in ASD based on the characteristic elements of the eye gaze. This study adds to the mounting efforts in using eye tracking technology to support the process of ASD screening Methods: The proposed approach basically aims to integrate eye tracking with visualization and machine learning. A group of 59 school-aged participants took part in the study. The participants were invited to watch a set of age-appropriate photographs and videos related to social cognition. Initially, eye-tracking scanpaths were transformed into a visual representation as a set of images. Subsequently, a convolutional neural network was trained to perform the image classification task. Results: The experimental results demonstrated that the visual representation could simplify the diagnostic task and also attained high accuracy. Specifically, the convolutional neural network model could achieve a promising classification accuracy. This largely suggests that visualizations could successfully encode the information of gaze motion and its underlying dynamics. Further, we explored possible correlations between the autism severity and the dynamics of eye movement based on the maximal information coefficient. The findings primarily show that the combination of eye tracking, visualization, and machine learning have strong potential in developing an objective tool to assist in the screening of ASD. Conclusions: Broadly speaking, the approach we propose could be transferable to screening for other disorders, particularly neurodevelopmental disorders. ", doi="10.2196/27706", url="https://humanfactors.jmir.org/2021/4/e27706", url="http://www.ncbi.nlm.nih.gov/pubmed/34694238" } @Article{info:doi/10.2196/31862, author="Popescu, Christina and Golden, Grace and Benrimoh, David and Tanguay-Sela, Myriam and Slowey, Dominique and Lundrigan, Eryn and Williams, J{\'e}r{\^o}me and Desormeau, Bennet and Kardani, Divyesh and Perez, Tamara and Rollins, Colleen and Israel, Sonia and Perlman, Kelly and Armstrong, Caitrin and Baxter, Jacob and Whitmore, Kate and Fradette, Marie-Jeanne and Felcarek-Hope, Kaelan and Soufi, Ghassen and Fratila, Robert and Mehltretter, Joseph and Looper, Karl and Steiner, Warren and Rej, Soham and Karp, F. Jordan and Heller, Katherine and Parikh, V. Sagar and McGuire-Snieckus, Rebecca and Ferrari, Manuela and Margolese, Howard and Turecki, Gustavo", title="Evaluating the Clinical Feasibility of an Artificial Intelligence--Powered, Web-Based Clinical Decision Support System for the Treatment of Depression in Adults: Longitudinal Feasibility Study", journal="JMIR Form Res", year="2021", month="Oct", day="25", volume="5", number="10", pages="e31862", keywords="clinical decision support system", keywords="major depressive disorder", keywords="artificial intelligence", keywords="feasibility", keywords="usability", keywords="mobile phone", abstract="Background: Approximately two-thirds of patients with major depressive disorder do not achieve remission during their first treatment. There has been increasing interest in the use of digital, artificial intelligence--powered clinical decision support systems (CDSSs) to assist physicians in their treatment selection and management, improving the personalization and use of best practices such as measurement-based care. Previous literature shows that for digital mental health tools to be successful, the tool must be easy for patients and physicians to use and feasible within existing clinical workflows. Objective: This study aims to examine the feasibility of an artificial intelligence--powered CDSS, which combines the operationalized 2016 Canadian Network for Mood and Anxiety Treatments guidelines with a neural network--based individualized treatment remission prediction. Methods: Owing to the COVID-19 pandemic, the study was adapted to be completed entirely remotely. A total of 7 physicians recruited outpatients diagnosed with major depressive disorder according to the Diagnostic and Statistical Manual of Mental Disorders, Fifth Edition criteria. Patients completed a minimum of one visit without the CDSS (baseline) and 2 subsequent visits where the CDSS was used by the physician (visits 1 and 2). The primary outcome of interest was change in appointment length after the introduction of the CDSS as a proxy for feasibility. Feasibility and acceptability data were collected through self-report questionnaires and semistructured interviews. Results: Data were collected between January and November 2020. A total of 17 patients were enrolled in the study; of the 17 patients, 14 (82\%) completed the study. There was no significant difference in appointment length between visits (introduction of the tool did not increase appointment length; F2,24=0.805; mean squared error 58.08; P=.46). In total, 92\% (12/13) of patients and 71\% (5/7) of physicians felt that the tool was easy to use; 62\% (8/13) of patients and 71\% (5/7) of physicians rated that they trusted the CDSS. Of the 13 patients, 6 (46\%) felt that the patient-clinician relationship significantly or somewhat improved, whereas 7 (54\%) felt that it did not change. Conclusions: Our findings confirm that the integration of the tool does not significantly increase appointment length and suggest that the CDSS is easy to use and may have positive effects on the patient-physician relationship for some patients. The CDSS is feasible and ready for effectiveness studies. Trial Registration: ClinicalTrials.gov NCT04061642; http://clinicaltrials.gov/ct2/show/NCT04061642 ", doi="10.2196/31862", url="https://formative.jmir.org/2021/10/e31862", url="http://www.ncbi.nlm.nih.gov/pubmed/34694234" } @Article{info:doi/10.2196/26821, author="Poon, J. Lok Y. and Tsang, H. Hector W. and Chan, J. Tsan Y. and Man, T. Sze W. and Ng, Y. Lok and Wong, E. Yi L. and Lin, Chung-Ying and Chien, Chi-Wen and Griffiths, D. Mark and Pontes, M. Halley and Pakpour, H. Amir", title="Psychometric Properties of the Internet Gaming Disorder Scale--Short-Form (IGDS9-SF): Systematic Review", journal="J Med Internet Res", year="2021", month="Oct", day="18", volume="23", number="10", pages="e26821", keywords="psychometrics", keywords="IGDS9-SF", keywords="gaming addiction", keywords="gaming disorder", keywords="problematic gaming", keywords="internet", keywords="gaming", keywords="internet gaming", abstract="Background: The Internet Gaming Disorder Scale--Short-Form (IGDS9-SF) is among the best with regard to its psychometric properties. Therefore, clinical psychologists are likely guided to use the IGDS9-SF if they want to assess or screen the disordered gaming in their practice. However, the information, especially psychometric evidence, concerning the IGDS9-SF has not been fully examined and summarized. Objective: This systematic review evaluated the psychometric properties of different language versions of the IGDS9-SF and assessed its methodological quality in order to improve the clinicians' understanding of the IGDS9-SF and facilitate its use. Methods: Systematic literature searches were carried out using Embase, MEDLINE, PsycINFO, PubMed, ScienceDirect, Scopus, and Web of Science. The review included English-language studies of any research design that have reported at least one psychometric property of the IGDS9-SF, as defined by the COnsensus-based Standards for the selection of health status Measurement INstrument (COSMIN), and have aimed at testing the psychometric properties of the IGDS9-SF. Results: In total, 21 studies comprising 15 language versions of the IGDS9-SF were included. Overall, the IGDS9-SF showed adequate internal consistency (although some items did not have satisfactory item-total correlation [IT]), excellent criterion validity, and the ability to distinguish different subgroups with measurement invariance being supported across gender and age. In terms of factor structure, the IGDS9-SF was shown to have a unidimensional factor structure across all 21 studies. Conclusions: Although there is insufficient evidence regarding the responsiveness and properties of the IGDS9-SF using item response theory, the existing evidence supports its use in assessing disordered gaming among individuals. ", doi="10.2196/26821", url="https://www.jmir.org/2021/10/e26821", url="http://www.ncbi.nlm.nih.gov/pubmed/34661543" } @Article{info:doi/10.2196/24560, author="Mouchabac, Stephane and Leray, Philippe and Adrien, Vladimir and Gollier-Briant, Fanny and Bonnot, Olivier", title="Prevention of Suicidal Relapses in Adolescents With a Smartphone Application: Bayesian Network Analysis of a Preclinical Trial Using In Silico Patient Simulations", journal="J Med Internet Res", year="2021", month="Sep", day="30", volume="23", number="9", pages="e24560", keywords="suicide", keywords="bayesian network", keywords="smartphone application", keywords="digital psychiatry", keywords="artificial intelligence", abstract="Background: Recently, artificial intelligence technologies and machine learning methods have offered attractive prospects to design and manage crisis response processes, especially in suicide crisis management. In other domains, most algorithms are based on big data to help diagnose and suggest rational treatment options in medicine. But data in psychiatry are related to behavior and clinical evaluation. They are more heterogeneous, less objective, and incomplete compared to other fields of medicine. Consequently, the use of psychiatric clinical data may lead to less accurate and sometimes impossible-to-build algorithms and provide inefficient digital tools. In this case, the Bayesian network (BN) might be helpful and accurate when constructed from expert knowledge. Medical Companion is a government-funded smartphone application based on repeated questions posed to the subject and algorithm-matched advice to prevent relapse of suicide attempts within several months. Objective: Our paper aims to present our development of a BN algorithm as a medical device in accordance with the American Psychiatric Association digital healthcare guidelines and to provide results from a preclinical phase. Methods: The experts are psychiatrists working in university hospitals who are experienced and trained in managing suicidal crises. As recommended when building a BN, we divided the process into 2 tasks. Task 1 is structure determination, representing the qualitative part of the BN. The factors were chosen for their known and demonstrated link with suicidal risk in the literature (clinical, behavioral, and psychometrics) and therapeutic accuracy (advice). Task 2 is parameter elicitation, with the conditional probabilities corresponding to the quantitative part. The 4-step simulation (use case) process allowed us to ensure that the advice was adapted to the clinical states of patients and the context. Results: For task 1, in this formative part, we defined clinical questions related to the mental state of the patients, and we proposed specific factors related to the questions. Subsequently, we suggested specific advice related to the patient's state. We obtained a structure for the BN with a graphical representation of causal relations between variables. For task 2, several runs of simulations confirmed the a priori model of experts regarding mental state, refining the precision of our model. Moreover, we noticed that the advice had the same distribution as the previous state and was clinically relevant. After 2 rounds of simulation, the experts found the exact match. Conclusions: BN is an efficient methodology to build an algorithm for a digital assistant dedicated to suicidal crisis management. Digital psychiatry is an emerging field, but it needs validation and testing before being used with patients. Similar to psychotropics, any medical device requires a phase II (preclinical) trial. With this method, we propose another step to respond to the American Psychiatric Association guidelines. Trial Registration: ClinicalTrials.gov NCT03975881; https://clinicaltrials.gov/ct2/show/NCT03975881 ", doi="10.2196/24560", url="https://www.jmir.org/2021/9/e24560", url="http://www.ncbi.nlm.nih.gov/pubmed/34591030" } @Article{info:doi/10.2196/24352, author="Flanagan, Olivia and Chan, Amy and Roop, Partha and Sundram, Frederick", title="Using Acoustic Speech Patterns From Smartphones to Investigate Mood Disorders: Scoping Review", journal="JMIR Mhealth Uhealth", year="2021", month="Sep", day="17", volume="9", number="9", pages="e24352", keywords="smartphone", keywords="data science", keywords="speech patterns", keywords="mood disorders", keywords="diagnosis", keywords="monitoring", abstract="Background: Mood disorders are commonly underrecognized and undertreated, as diagnosis is reliant on self-reporting and clinical assessments that are often not timely. Speech characteristics of those with mood disorders differs from healthy individuals. With the wide use of smartphones, and the emergence of machine learning approaches, smartphones can be used to monitor speech patterns to help the diagnosis and monitoring of mood disorders. Objective: The aim of this review is to synthesize research on using speech patterns from smartphones to diagnose and monitor mood disorders. Methods: Literature searches of major databases, Medline, PsycInfo, EMBASE, and CINAHL, initially identified 832 relevant articles using the search terms ``mood disorders'', ``smartphone'', ``voice analysis'', and their variants. Only 13 studies met inclusion criteria: use of a smartphone for capturing voice data, focus on diagnosing or monitoring a mood disorder(s), clinical populations recruited prospectively, and in the English language only. Articles were assessed by 2 reviewers, and data extracted included data type, classifiers used, methods of capture, and study results. Studies were analyzed using a narrative synthesis approach. Results: Studies showed that voice data alone had reasonable accuracy in predicting mood states and mood fluctuations based on objectively monitored speech patterns. While a fusion of different sensor modalities revealed the highest accuracy (97.4\%), nearly 80\% of included studies were pilot trials or feasibility studies without control groups and had small sample sizes ranging from 1 to 73 participants. Studies were also carried out over short or varying timeframes and had significant heterogeneity of methods in terms of the types of audio data captured, environmental contexts, classifiers, and measures to control for privacy and ambient noise. Conclusions: Approaches that allow smartphone-based monitoring of speech patterns in mood disorders are rapidly growing. The current body of evidence supports the value of speech patterns to monitor, classify, and predict mood states in real time. However, many challenges remain around the robustness, cost-effectiveness, and acceptability of such an approach and further work is required to build on current research and reduce heterogeneity of methodologies as well as clinical evaluation of the benefits and risks of such approaches. ", doi="10.2196/24352", url="https://mhealth.jmir.org/2021/9/e24352", url="http://www.ncbi.nlm.nih.gov/pubmed/34533465" } @Article{info:doi/10.2196/25660, author="Rubin, H. Leah and Severson, Joan and Marcotte, D. Thomas and Savin, J. Micah and Best, Allen and Johnson, Shane and Cosman, Joshua and Merickel, Michael and Buchholz, Alison and Del Bene, A. Victor and Eldred, Lois and Sacktor, C. Ned and Fuchs, Joelle-Beverlie and Althoff, N. Keri and Moore, D. Richard", title="Tablet-Based Cognitive Impairment Screening for Adults With HIV Seeking Clinical Care: Observational Study", journal="JMIR Ment Health", year="2021", month="Sep", day="9", volume="8", number="9", pages="e25660", keywords="cognitive complications", keywords="people with HIV", keywords="digital assessment", keywords="HIV", keywords="tablet", keywords="screening", abstract="Background: Neurological complications including cognitive impairment persist among people with HIV on antiretrovirals; however, cognitive screening is not routinely conducted in HIV clinics. Objective: Our objective for this study was 3-fold: (1) to determine the feasibility of implementing an iPad-based cognitive impairment screener among adults seeking HIV care, (2) to examine the psychometric properties of the tool, and (3) to examine predictors of cognitive impairment using the tool. Methods: A convenience sample of participants completed Brain Baseline Assessment of Cognition and Everyday Functioning (BRACE), which included (1) Trail Making Test Part A, measuring psychomotor speed; (2) Trail Making Test Part B, measuring set-shifting; (3) Stroop Color, measuring processing speed; and (4) the Visual--Spatial Learning Test. Global neuropsychological function was estimated as mean T score performance on the 4 outcomes. Impairment on each test or for the global mean was defined as a T score ?40. Subgroups of participants repeated the tests 4 weeks or >6 months after completing the first test to evaluate intraperson test--retest reliability and practice effects (improvements in performance due to repeated test exposure). An additional subgroup completed a lengthier cognitive battery concurrently to assess validity. Relevant factors were abstracted from electronic medical records to examine predictors of global neuropsychological function. Results: The study population consisted of 404 people with HIV (age: mean 53.6 years; race: 332/404, 82\% Black; 34/404, 8\% White, 10/404, 2\% American Indian/Alaskan Native; 28/404, 7\% other and 230/404, 58\% male; 174/404, 42\% female) of whom 99\% (402/404) were on antiretroviral therapy. Participants completed BRACE in a mean of 12 minutes (SD 3.2), and impairment was demonstrated by 34\% (136/404) on Trail Making Test A, 44\% (177/404) on Trail Making Test B, 40\% (161/404) on Stroop Color, and 17\% (67/404) on Visual-Spatial Learning Test. Global impairment was demonstrated by 103 out of 404 (25\%). Test--retest reliability for the subset of participants (n=26) repeating the measure at 4 weeks was 0.81 and for the subset of participants (n=67) repeating the measure almost 1 year later (days: median 294, IQR 50) was 0.63. There were no significant practice effects at either time point (P=.20 and P=.68, respectively). With respect for validity, the correlation between global impairment on the lengthier cognitive battery and BRACE was 0.63 (n=61; P<.001), with 84\% sensitivity and 94\% specificity to impairment on the lengthier cognitive battery. Conclusions: We were able to successfully implement BRACE and estimate cognitive impairment burden in the context of routine clinic care. BRACE was also shown to have good psychometric properties. This easy-to-use tool in clinical settings may facilitate the care needs of people with HIV as cognitive impairment continues to remain a concern in people with HIV. ", doi="10.2196/25660", url="https://mental.jmir.org/2021/9/e25660", url="http://www.ncbi.nlm.nih.gov/pubmed/34499048" } @Article{info:doi/10.2196/29328, author="Zhao, Zhong and Tang, Haiming and Zhang, Xiaobin and Qu, Xingda and Hu, Xinyao and Lu, Jianping", title="Classification of Children With Autism and Typical Development Using Eye-Tracking Data From Face-to-Face Conversations: Machine Learning Model Development and Performance Evaluation", journal="J Med Internet Res", year="2021", month="Aug", day="26", volume="23", number="8", pages="e29328", keywords="autism spectrum disorder", keywords="eye tracking", keywords="face-to-face interaction", keywords="machine learning", keywords="visual fixation", abstract="Background: Previous studies have shown promising results in identifying individuals with autism spectrum disorder (ASD) by applying machine learning (ML) to eye-tracking data collected while participants viewed varying images (ie, pictures, videos, and web pages). Although gaze behavior is known to differ between face-to-face interaction and image-viewing tasks, no study has investigated whether eye-tracking data from face-to-face conversations can also accurately identify individuals with ASD. Objective: The objective of this study was to examine whether eye-tracking data from face-to-face conversations could classify children with ASD and typical development (TD). We further investigated whether combining features on visual fixation and length of conversation would achieve better classification performance. Methods: Eye tracking was performed on children with ASD and TD while they were engaged in face-to-face conversations (including 4 conversational sessions) with an interviewer. By implementing forward feature selection, four ML classifiers were used to determine the maximum classification accuracy and the corresponding features: support vector machine (SVM), linear discriminant analysis, decision tree, and random forest. Results: A maximum classification accuracy of 92.31\% was achieved with the SVM classifier by combining features on both visual fixation and session length. The classification accuracy of combined features was higher than that obtained using visual fixation features (maximum classification accuracy 84.62\%) or session length (maximum classification accuracy 84.62\%) alone. Conclusions: Eye-tracking data from face-to-face conversations could accurately classify children with ASD and TD, suggesting that ASD might be objectively screened in everyday social interactions. However, these results will need to be validated with a larger sample of individuals with ASD (varying in severity and balanced sex ratio) using data collected from different modalities (eg, eye tracking, kinematic, electroencephalogram, and neuroimaging). In addition, individuals with other clinical conditions (eg, developmental delay and attention deficit hyperactivity disorder) should be included in similar ML studies for detecting ASD. ", doi="10.2196/29328", url="https://www.jmir.org/2021/8/e29328", url="http://www.ncbi.nlm.nih.gov/pubmed/34435957" } @Article{info:doi/10.2196/28918, author="Di Matteo, Daniel and Fotinos, Kathryn and Lokuge, Sachinthya and Mason, Geneva and Sternat, Tia and Katzman, A. Martin and Rose, Jonathan", title="Automated Screening for Social Anxiety, Generalized Anxiety, and Depression From Objective Smartphone-Collected Data: Cross-sectional Study", journal="J Med Internet Res", year="2021", month="Aug", day="13", volume="23", number="8", pages="e28918", keywords="mobile sensing", keywords="passive EMA", keywords="passive sensing", keywords="psychiatric assessment", keywords="mood and anxiety disorders", keywords="mobile apps", keywords="mhealth", keywords="mobile phone", keywords="digital health", keywords="digital phenotyping", abstract="Background: The lack of access to mental health care could be addressed, in part, through the development of automated screening technologies for detecting the most common mental health disorders without the direct involvement of clinicians. Objective smartphone-collected data may contain sufficient information about individuals' behaviors to infer their mental states and therefore screen for anxiety disorders and depression. Objective: The objective of this study is to compare how a single set of recognized and novel features, extracted from smartphone-collected data, can be used for predicting generalized anxiety disorder (GAD), social anxiety disorder (SAD), and depression. Methods: An Android app was designed, together with a centralized server system, to collect periodic measurements of objective smartphone data. The types of data included samples of ambient audio, GPS location, screen state, and light sensor data. Subjects were recruited into a 2-week observational study in which the app was run on their personal smartphones. The subjects also completed self-report severity measures of SAD, GAD, and depression. The participants were 112 Canadian adults from a nonclinical population. High-level features were extracted from the data of 84 participants, and predictive models of SAD, GAD, and depression were built and evaluated. Results: Models of SAD and depression achieved a significantly greater screening accuracy than uninformative models (area under the receiver operating characteristic means of 0.64, SD 0.13 and 0.72, SD 0.12, respectively), whereas models of GAD failed to be predictive. Investigation of the model coefficients revealed key features that were predictive of SAD and depression. Conclusions: We demonstrate the ability of a common set of features to act as predictors in the models of both SAD and depression. This suggests that the types of behaviors that can be inferred from smartphone-collected data are broad indicators of mental health, which can be used to study, assess, and track psychopathology simultaneously across multiple disorders and diagnostic boundaries. ", doi="10.2196/28918", url="https://www.jmir.org/2021/8/e28918", url="http://www.ncbi.nlm.nih.gov/pubmed/34397386" } @Article{info:doi/10.2196/29500, author="Manabe, Masae and Liew, Kongmeng and Yada, Shuntaro and Wakamiya, Shoko and Aramaki, Eiji", title="Estimation of Psychological Distress in Japanese Youth Through Narrative Writing: Text-Based Stylometric and Sentiment Analyses", journal="JMIR Form Res", year="2021", month="Aug", day="12", volume="5", number="8", pages="e29500", keywords="psychological distress", keywords="youth", keywords="narratives", keywords="natural language processing", keywords="Japan", keywords="mental health", keywords="stress", keywords="distress", keywords="young adult", keywords="teenager", keywords="sentiment", abstract="Background: Internalizing mental illnesses associated with psychological distress are often underdetected. Text-based detection using natural language processing (NLP) methods is increasingly being used to complement conventional detection efforts. However, these approaches often rely on self-disclosure through autobiographical narratives that may not always be possible, especially in the context of the collectivistic Japanese culture. Objective: We propose the use of narrative writing as an alternative resource for mental illness detection in youth. Accordingly, in this study, we investigated the textual characteristics of narratives written by youth with psychological distress; our research focuses on the detection of psychopathological tendencies in written imaginative narratives. Methods: Using NLP tools such as stylometric measures and lexicon-based sentiment analysis, we examined short narratives from 52 Japanese youth (mean age 19.8 years, SD 3.1) obtained through crowdsourcing. Participants wrote a short narrative introduction to an imagined story before completing a questionnaire to quantify their tendencies toward psychological distress. Based on this score, participants were categorized into higher distress and lower distress groups. The written narratives were then analyzed using NLP tools and examined for between-group differences. Although outside the scope of this study, we also carried out a supplementary analysis of narratives written by adults using the same procedure. Results: Youth demonstrating higher tendencies toward psychological distress used significantly more positive (happiness-related) words, revealing differences in valence of the narrative content. No other significant differences were observed between the high and low distress groups. Conclusions: Youth with tendencies toward mental illness were found to write more positive stories that contained more happiness-related terms. These results may potentially have widespread implications on psychological distress screening on online platforms, particularly in cultures such as Japan that are not accustomed to self-disclosure. Although the mechanisms that we propose in explaining our results are speculative, we believe that this interpretation paves the way for future research in online surveillance and detection efforts. ", doi="10.2196/29500", url="https://formative.jmir.org/2021/8/e29500", url="http://www.ncbi.nlm.nih.gov/pubmed/34387556" } @Article{info:doi/10.2196/29368, author="Schueller, M. Stephen and Neary, Martha and Lai, Jocelyn and Epstein, A. Daniel", title="Understanding People's Use of and Perspectives on Mood-Tracking Apps: Interview Study", journal="JMIR Ment Health", year="2021", month="Aug", day="11", volume="8", number="8", pages="e29368", keywords="mental health", keywords="mobile apps", keywords="mHealth", keywords="emotions", keywords="affect", keywords="self-tracking", abstract="Background: Supporting mental health and wellness is of increasing interest due to a growing recognition of the prevalence and burden of mental health issues. Mood is a central aspect of mental health, and several technologies, especially mobile apps, have helped people track and understand it. However, despite formative work on and dissemination of mood-tracking apps, it is not well understood how mood-tracking apps used in real-world contexts might benefit people and what people hope to gain from them. Objective: To address this gap, the purpose of this study was to understand motivations for and experiences in using mood-tracking apps from people who used them in real-world contexts. Methods: We interviewed 22 participants who had used mood-tracking apps using a semistructured interview and card sorting task. The interview focused on their experiences using a mood-tracking app. We then conducted a card sorting task using screenshots of various data entry and data review features from mood-tracking apps. We used thematic analysis to identify themes around why people use mood-tracking apps, what they found useful about them, and where people felt these apps fell short. Results: Users of mood-tracking apps were primarily motivated by negative life events or shifts in their own mental health that prompted them to engage in tracking and improve their situation. In general, participants felt that using a mood-tracking app facilitated self-awareness and helped them to look back on a previous emotion or mood experience to understand what was happening. Interestingly, some users reported less inclination to document their negative mood states and preferred to document their positive moods. There was a range of preferences for personalization and simplicity of tracking. Overall, users also liked features in which their previous tracked emotions and moods were visualized in figures or calendar form to understand trends. One gap in available mood-tracking apps was the lack of app-facilitated recommendations or suggestions for how to interpret their own data or improve their mood. Conclusions: Although people find various features of mood-tracking apps helpful, the way people use mood-tracking apps, such as avoiding entering negative moods, tracking infrequently, or wanting support to understand or change their moods, demonstrate opportunities for improvement. Understanding why and how people are using current technologies can provide insights to guide future designs and implementations. ", doi="10.2196/29368", url="https://mental.jmir.org/2021/8/e29368", url="http://www.ncbi.nlm.nih.gov/pubmed/34383678" } @Article{info:doi/10.2196/26348, author="An{\'y}?, Ji?{\'i} and Bak{\vs}tein, Eduard and Dally, Andrea and Koleni{\v c}, Mari{\'a}n and Hlinka, Jaroslav and Hartmannov{\'a}, Tereza and Urbanov{\'a}, Kate?ina and Correll, U. Christoph and Nov{\'a}k, Daniel and {\vS}paniel, Filip", title="Validity of the Aktibipo Self-rating Questionnaire for the Digital Self-assessment of Mood and Relapse Detection in Patients With Bipolar Disorder: Instrument Validation Study", journal="JMIR Ment Health", year="2021", month="Aug", day="9", volume="8", number="8", pages="e26348", keywords="bipolar disorder", keywords="symptom monitoring", keywords="ecological mood assessment", keywords="relapse detection", keywords="mobile application", keywords="mobile phone", abstract="Background: Self-reported mood is a valuable clinical data source regarding disease state and course in patients with mood disorders. However, validated, quick, and scalable digital self-report measures that can also detect relapse are still not available for clinical care. Objective: In this study, we aim to validate the newly developed ASERT (Aktibipo Self-rating) questionnaire---a 10-item, mobile app--based, self-report mood questionnaire consisting of 4 depression, 4 mania, and 2 nonspecific symptom items, each with 5 possible answers. The validation data set is a subset of the ongoing observational longitudinal AKTIBIPO400 study for the long-term monitoring of mood and activity (via actigraphy) in patients with bipolar disorder (BD). Patients with confirmed BD are included and monitored with weekly ASERT questionnaires and monthly clinical scales (Montgomery-{\AA}sberg Depression Rating Scale [MADRS] and Young Mania Rating Scale [YMRS]). Methods: The content validity of the ASERT questionnaire was assessed using principal component analysis, and the Cronbach $\alpha$ was used to assess the internal consistency of each factor. The convergent validity of the depressive or manic items of the ASERT questionnaire with the MADRS and YMRS, respectively, was assessed using a linear mixed-effects model and linear correlation analyses. In addition, we investigated the capability of the ASERT questionnaire to distinguish relapse (YMRS?15 and MADRS?15) from a nonrelapse (interepisode) state (YMRS<15 and MADRS<15) using a logistic mixed-effects model. Results: A total of 99 patients with BD were included in this study (follow-up: mean 754 days, SD 266) and completed an average of 78.1\% (SD 18.3\%) of the requested ASERT assessments (completion time for the 10 ASERT questions: median 24.0 seconds) across all patients in this study. The ASERT depression items were highly associated with MADRS total scores (P<.001; bootstrap). Similarly, ASERT mania items were highly associated with YMRS total scores (P<.001; bootstrap). Furthermore, the logistic mixed-effects regression model for scale-based relapse detection showed high detection accuracy in a repeated holdout validation for both depression (accuracy=85\%; sensitivity=69.9\%; specificity=88.4\%; area under the receiver operating characteristic curve=0.880) and mania (accuracy=87.5\%; sensitivity=64.9\%; specificity=89.9\%; area under the receiver operating characteristic curve=0.844). Conclusions: The ASERT questionnaire is a quick and acceptable mood monitoring tool that is administered via a smartphone app. The questionnaire has a good capability to detect the worsening of clinical symptoms in a long-term monitoring scenario. ", doi="10.2196/26348", url="https://mental.jmir.org/2021/8/e26348", url="http://www.ncbi.nlm.nih.gov/pubmed/34383689" } @Article{info:doi/10.2196/26449, author="Wiley, Katelyn and Robinson, Raquel and Mandryk, L. Regan", title="The Making and Evaluation of Digital Games Used for the Assessment of Attention: Systematic Review", journal="JMIR Serious Games", year="2021", month="Aug", day="9", volume="9", number="3", pages="e26449", keywords="cognitive assessment", keywords="attention", keywords="serious games", keywords="gamification", keywords="systematic review", keywords="mobile phone", abstract="Background: Serious games are now widely used in many contexts, including psychological research and clinical use. One area of growing interest is that of cognitive assessment, which seeks to measure different cognitive functions such as memory, attention, and perception. Measuring these functions at both the population and individual levels can inform research and indicate health issues. Attention is an important function to assess, as an accurate measure of attention can help diagnose many common disorders, such as attention-deficit/hyperactivity disorder and dementia. However, using games to assess attention poses unique problems, as games inherently manipulate attention through elements such as sound effects, graphics, and rewards, and research on adding game elements to assessments (ie, gamification) has shown mixed results. The process for developing cognitive tasks is robust, with high psychometric standards that must be met before these tasks are used for assessment. Although games offer more diverse approaches for assessment, there is no standard for how they should be developed or evaluated. Objective: To better understand the field and provide guidance to interdisciplinary researchers, we aim to answer the question: How are digital games used for the cognitive assessment of attention made and measured? Methods: We searched several databases for papers that described a digital game used to assess attention that could be deployed remotely without specialized hardware. We used Rayyan, a systematic review software, to screen the records before conducting a systematic review. Results: The initial database search returned 49,365 papers. Our screening process resulted in a total of 74 papers that used a digital game to measure cognitive functions related to attention. Across the studies in our review, we found three approaches to making assessment games: gamifying cognitive tasks, creating custom games based on theories of cognition, and exploring potential assessment properties of commercial games. With regard to measuring the assessment properties of these games (eg, how accurately they assess attention), we found three approaches: comparison to a traditional cognitive task, comparison to a clinical diagnosis, and comparison to knowledge of cognition; however, most studies in our review did not evaluate the game's properties (eg, if participants enjoyed the game). Conclusions: Our review provides an overview of how games used for the assessment of attention are developed and evaluated. We further identified three barriers to advancing the field: reliance on assumptions, lack of evaluation, and lack of integration and standardization. We then recommend the best practices to address these barriers. Our review can act as a resource to help guide the field toward more standardized approaches and rigorous evaluation required for the widespread adoption of assessment games. ", doi="10.2196/26449", url="https://games.jmir.org/2021/3/e26449", url="http://www.ncbi.nlm.nih.gov/pubmed/34383674" } @Article{info:doi/10.2196/17971, author="Oxholm, Christina and Christensen, Soendergaard Anne-Marie and Christiansen, Regina and Wiil, Kock Uffe and Nielsen, S{\o}gaard Anette", title="Attitudes of Patients and Health Professionals Regarding Screening Algorithms: Qualitative Study", journal="JMIR Form Res", year="2021", month="Aug", day="9", volume="5", number="8", pages="e17971", keywords="screening", keywords="algorithms", keywords="alcohol", keywords="qualitative study", keywords="attitudes", keywords="opinions", keywords="patients", keywords="health professionals", abstract="Background: As a preamble to an attempt to develop a tool that can aid health professionals at hospitals in identifying whether the patient may have an alcohol abuse problem, this study investigates opinions and attitudes among both health professionals and patients about using patient data from electronic health records (EHRs) in an algorithm screening for alcohol problems. Objective: The aim of this study was to investigate the attitudes and opinions of patients and health professionals at hospitals regarding the use of previously collected data in developing and implementing an algorithmic helping tool in EHR for screening inexpedient alcohol habits; in addition, the study aims to analyze how patients would feel about asking and being asked about alcohol by staff, based on a notification in the EHR from such a tool. Methods: Using semistructured interviews, we interviewed 9 health professionals and 5 patients to explore their opinions and attitudes about an algorithm-based helping tool and about asking and being asked about alcohol usage when being given a reminder from this type of tool. The data were analyzed using an ad hoc method consistent with a close reading and meaning condensing. Results: The health professionals were both positive and negative about a helping tool grounded in algorithms. They were optimistic about the potential of such a tool to save some time by providing a quick overview if it was easy to use but, on the negative side, noted that this type of helping tool might take away the professionals' instinct. The patients were overall positive about the helping tool, stating that they would find this tool beneficial for preventive care. Some of the patients expressed concerns that the information provided by the tool could be misused. Conclusions: When developing and implementing an algorithmic helping tool, the following aspects should be considered: (1) making the helping tool as transparent in its recommendations as possible, avoiding black boxing, and ensuring room for professional discretion in clinical decision making; and (2) including and taking into account the attitudes and opinions of patients and health professionals in the design and development process of such an algorithmic helping tool. ", doi="10.2196/17971", url="https://formative.jmir.org/2021/8/e17971", url="http://www.ncbi.nlm.nih.gov/pubmed/34383666" } @Article{info:doi/10.2196/19824, author="Wongkoblap, Akkapon and Vadillo, A. Miguel and Curcin, Vasa", title="Deep Learning With Anaphora Resolution for the Detection of Tweeters With Depression: Algorithm Development and Validation Study", journal="JMIR Ment Health", year="2021", month="Aug", day="6", volume="8", number="8", pages="e19824", keywords="depression", keywords="mental health", keywords="Twitter", keywords="social media", keywords="deep learning", keywords="anaphora resolution", keywords="multiple-instance learning", keywords="depression markers", abstract="Background: Mental health problems are widely recognized as a major public health challenge worldwide. This concern highlights the need to develop effective tools for detecting mental health disorders in the population. Social networks are a promising source of data wherein patients publish rich personal information that can be mined to extract valuable psychological cues; however, these data come with their own set of challenges, such as the need to disambiguate between statements about oneself and third parties. Traditionally, natural language processing techniques for social media have looked at text classifiers and user classification models separately, hence presenting a challenge for researchers who want to combine text sentiment and user sentiment analysis. Objective: The objective of this study is to develop a predictive model that can detect users with depression from Twitter posts and instantly identify textual content associated with mental health topics. The model can also address the problem of anaphoric resolution and highlight anaphoric interpretations. Methods: We retrieved the data set from Twitter by using a regular expression or stream of real-time tweets comprising 3682 users, of which 1983 self-declared their depression and 1699 declared no depression. Two multiple instance learning models were developed---one with and one without an anaphoric resolution encoder---to identify users with depression and highlight posts related to the mental health of the author. Several previously published models were applied to our data set, and their performance was compared with that of our models. Results: The maximum accuracy, F1 score, and area under the curve of our anaphoric resolution model were 92\%, 92\%, and 90\%, respectively. The model outperformed alternative predictive models, which ranged from classical machine learning models to deep learning models. Conclusions: Our model with anaphoric resolution shows promising results when compared with other predictive models and provides valuable insights into textual content that is relevant to the mental health of the tweeter. ", doi="10.2196/19824", url="https://mental.jmir.org/2021/8/e19824", url="http://www.ncbi.nlm.nih.gov/pubmed/34383688" } @Article{info:doi/10.2196/29021, author="Niculescu, Iulia and Quirt, Hannah and Arora, Twinkle and Borsook, Terry and Green, Robin and Ford, Brett and Iaboni, Andrea", title="Ecological Momentary Assessment of Depression in People With Advanced Dementia: Longitudinal Pilot Study", journal="JMIR Aging", year="2021", month="Aug", day="4", volume="4", number="3", pages="e29021", keywords="dementia", keywords="depression", keywords="ecological momentary assessment", keywords="tool performance", abstract="Background: Barriers to assessing depression in advanced dementia include the presence of informant and patient recall biases. Ecological momentary assessment provides an improved approach for mood assessment by collecting observations in intervals throughout the day, decreasing recall bias, and increasing ecological validity. Objective: This study aims to evaluate the feasibility, reliability, and validity of the modified 4-item Cornell Scale for Depression in Dementia for Momentary Assessment (mCSDD4-MA) tool to assess depression in patients with advanced dementia. Methods: A intensive longitudinal pilot study design was used. A total of 12 participants with advanced dementia were enrolled from an inpatient psychogeriatric unit. Participants were assessed using clinical depression assessments at admission and discharge. Research staff recorded observations four times a day for 6 weeks on phones with access to the mCSDD4-MA tool. Descriptive data related to feasibility were reported (ie, completion rates). Statistical models were used to examine the interrater reliability and construct and predictive validity of the data. Results: Overall, 1923 observations were completed, representing 55.06\% (1923/3496) of all rating opportunities with 2 raters and 66.01\% (1923/2913) with at least one rater. Moderate interrater reliability was demonstrated for all items, except for lack of interest. Moderate correlations were observed between observers and patient-reported outcomes, where observers reported fewer symptoms relative to participants' self-reports. Several items were associated with and able to predict depression. Conclusions: The mCSDD4-MA tool was feasible to use, and most items in the tool showed moderate reliability and validity for assessing depression in dementia. Repeated and real-time depression assessment in advanced dementia holds promise for the identification of clinical depression and depressive symptoms. ", doi="10.2196/29021", url="https://aging.jmir.org/2021/3/e29021", url="http://www.ncbi.nlm.nih.gov/pubmed/34346884" } @Article{info:doi/10.2196/26540, author="Opoku Asare, Kennedy and Terhorst, Yannik and Vega, Julio and Peltonen, Ella and Lagerspetz, Eemil and Ferreira, Denzil", title="Predicting Depression From Smartphone Behavioral Markers Using Machine Learning Methods, Hyperparameter Optimization, and Feature Importance Analysis: Exploratory Study", journal="JMIR Mhealth Uhealth", year="2021", month="Jul", day="12", volume="9", number="7", pages="e26540", keywords="mHealth", keywords="mental health", keywords="mobile phone", keywords="digital biomarkers", keywords="digital phenotyping", keywords="smartphone", keywords="supervised machine learning", keywords="depression", abstract="Background: Depression is a prevalent mental health challenge. Current depression assessment methods using self-reported and clinician-administered questionnaires have limitations. Instrumenting smartphones to passively and continuously collect moment-by-moment data sets to quantify human behaviors has the potential to augment current depression assessment methods for early diagnosis, scalable, and longitudinal monitoring of depression. Objective: The objective of this study was to investigate the feasibility of predicting depression with human behaviors quantified from smartphone data sets, and to identify behaviors that can influence depression. Methods: Smartphone data sets and self-reported 8-item Patient Health Questionnaire (PHQ-8) depression assessments were collected from 629 participants in an exploratory longitudinal study over an average of 22.1 days (SD 17.90; range 8-86). We quantified 22 regularity, entropy, and SD behavioral markers from the smartphone data. We explored the relationship between the behavioral features and depression using correlation and bivariate linear mixed models (LMMs). We leveraged 5 supervised machine learning (ML) algorithms with hyperparameter optimization, nested cross-validation, and imbalanced data handling to predict depression. Finally, with the permutation importance method, we identified influential behavioral markers in predicting depression. Results: Of the 629 participants from at least 56 countries, 69 (10.97\%) were females, 546 (86.8\%) were males, and 14 (2.2\%) were nonbinary. Participants' age distribution is as follows: 73/629 (11.6\%) were aged between 18 and 24, 204/629 (32.4\%) were aged between 25 and 34, 156/629 (24.8\%) were aged between 35 and 44, 166/629 (26.4\%) were aged between 45 and 64, and 30/629 (4.8\%) were aged 65 years and over. Of the 1374 PHQ-8 assessments, 1143 (83.19\%) responses were nondepressed scores (PHQ-8 score <10), while 231 (16.81\%) were depressed scores (PHQ-8 score ?10), as identified based on PHQ-8 cut-off. A significant positive Pearson correlation was found between screen status--normalized entropy and depression (r=0.14, P<.001). LMM demonstrates an intraclass correlation of 0.7584 and a significant positive association between screen status--normalized entropy and depression ($\beta$=.48, P=.03). The best ML algorithms achieved the following metrics: precision, 85.55\%-92.51\%; recall, 92.19\%-95.56\%; F1, 88.73\%-94.00\%; area under the curve receiver operating characteristic, 94.69\%-99.06\%; Cohen $\kappa$, 86.61\%-92.90\%; and accuracy, 96.44\%-98.14\%. Including age group and gender as predictors improved the ML performances. Screen and internet connectivity features were the most influential in predicting depression. Conclusions: Our findings demonstrate that behavioral markers indicative of depression can be unobtrusively identified from smartphone sensors' data. Traditional assessment of depression can be augmented with behavioral markers from smartphones for depression diagnosis and monitoring. ", doi="10.2196/26540", url="https://mhealth.jmir.org/2021/7/e26540", url="http://www.ncbi.nlm.nih.gov/pubmed/34255713" } @Article{info:doi/10.2196/25310, author="Hwang, Sung Ho and Choi, Seong-Youl", title="Development of an Android-Based Self-Report Assessment for Elderly Driving Risk (SAFE-DR) App: Mixed Methods Study", journal="JMIR Mhealth Uhealth", year="2021", month="Jun", day="17", volume="9", number="6", pages="e25310", keywords="Android driving app", keywords="driving safety", keywords="reliability", keywords="self-assessment", keywords="validity", keywords="mHealth", keywords="driving", abstract="Background: Self-report assessments for elderly drivers are used in various countries for accessible, widespread self-monitoring of driving ability in the elderly population. Likewise, in South Korea, a paper-based Self-Report Assessment for Elderly Driving Risk (SAFE-DR) has been developed. Here, we implemented the SAFE-DR through an Android app, which provides the advantages of accessibility, convenience, and provision of diverse information, and verified its reliability and validity. Objective: This study tested the validity and reliability of a mobile app-based version of a self-report assessment for elderly persons contextualized to the South Korean culture and compared it with a paper-based test. Methods: In this mixed methods study, we recruited and interviewed 567 elderly drivers (aged 65 years and older) between August 2018 and May 2019. For participants who provided consent, the app-based test was repeated after 2 weeks and an additional paper-based test (Driver 65 Plus test) was administered. Using the collected data, we analyzed the reliability and validity of the app-based SAFE-DR. The internal consistency of provisional items in each subdomain of the SAFE-DR and the test-retest stability were analyzed to examine reliability. Exploratory factor analysis was performed to examine the validity of the subdomain configuration. To verify the appropriateness of using an app-based test for older drivers possibly unfamiliar with mobile technology, the correlation between the results of the SAFE-DR app and the paper-based offline test was also analyzed. Results: In the reliability analysis, Cronbach $\alpha$ for all items was 0.975 and the correlation of each item with the overall score ranged from r=0.520 to r=0.823; 4 items with low correlations were removed from each of the subdomains. In the retest after 2 weeks, the mean correlation coefficient across all items was r=0.951, showing very high reliability. Exploratory factor analysis on 40 of the 44 items established 5 subdomains: on-road (8 items), coping (16 items), cognitive functions (5 items), general conditions (8 items), and medical health (3 items). A very strong negative correlation of --0.864 was observed between the total score for the app-based SAFE-DR and the paper-based Driver 65 Plus with decorrelation scales. The app-based test was found to be reliable. Conclusions: In this study, we developed an app-based self-report assessment tool for elderly drivers and tested its reliability and validity. This app can help elderly individuals easily assess their own driving skills. Therefore, this assessment can be used to educate drivers and for preventive screening for elderly drivers who want to renew their driver's licenses in South Korea. In addition, the app can contribute to safe driving among elderly drivers. ", doi="10.2196/25310", url="https://mhealth.jmir.org/2021/6/e25310", url="http://www.ncbi.nlm.nih.gov/pubmed/33934068" } @Article{info:doi/10.2196/27407, author="Yuan, Jing and Libon, J. David and Karjadi, Cody and Ang, A. Alvin F. and Devine, Sherral and Auerbach, H. Sanford and Au, Rhoda and Lin, Honghuang", title="Association Between the Digital Clock Drawing Test and Neuropsychological Test Performance: Large Community-Based Prospective Cohort (Framingham Heart Study)", journal="J Med Internet Res", year="2021", month="Jun", day="8", volume="23", number="6", pages="e27407", keywords="clock drawing test", keywords="neuropsychological test", keywords="cognition", keywords="technology", keywords="digital assessment", keywords="mild cognitive impairment", keywords="association", keywords="neurology", keywords="Framingham Heart Study", abstract="Background: The Clock Drawing Test (CDT) has been widely used in clinic for cognitive assessment. Recently, a digital Clock Drawing Text (dCDT) that is able to capture the entire sequence of clock drawing behaviors was introduced. While a variety of domain-specific features can be derived from the dCDT, it has not yet been evaluated in a large community-based population whether the features derived from the dCDT correlate with cognitive function. Objective: We aimed to investigate the association between dCDT features and cognitive performance across multiple domains. Methods: Participants from the Framingham Heart Study, a large community-based cohort with longitudinal cognitive surveillance, who did not have dementia were included. Participants were administered both the dCDT and a standard protocol of neuropsychological tests that measured a wide range of cognitive functions. A total of 105 features were derived from the dCDT, and their associations with 18 neuropsychological tests were assessed with linear regression models adjusted for age and sex. Associations between a composite score from dCDT features were also assessed for associations with each neuropsychological test and cognitive status (clinically diagnosed mild cognitive impairment compared to normal cognition). Results: The study included 2062 participants (age: mean 62, SD 13 years, 51.6\% women), among whom 36 were diagnosed with mild cognitive impairment. Each neuropsychological test was associated with an average of 50 dCDT features. The composite scores derived from dCDT features were significantly associated with both neuropsychological tests and mild cognitive impairment. Conclusions: The dCDT can potentially be used as a tool for cognitive assessment in large community-based populations. ", doi="10.2196/27407", url="https://www.jmir.org/2021/6/e27407", url="http://www.ncbi.nlm.nih.gov/pubmed/34100766" } @Article{info:doi/10.2196/20128, author="Fritz, Jessica and Stochl, Jan and Kievit, A. Rogier and van Harmelen, Anne-Laura and Wilkinson, O. Paul", title="Tracking Stress, Mental Health, and Resilience Factors in Medical Students Before, During, and After a Stress-Inducing Exam Period: Protocol and Proof-of-Principle Analyses for the RESIST Cohort Study", journal="JMIR Form Res", year="2021", month="Jun", day="8", volume="5", number="6", pages="e20128", keywords="exam stress", keywords="perceived stress", keywords="mental distress", keywords="student mental health", keywords="mental health resilience", keywords="protective factors", keywords="resilience factors", abstract="Background: Knowledge of mental distress and resilience factors over the time span from before to after a stressor is important to be able to leverage the most promising resilience factors and promote mental health at the right time. To shed light on this topic, we designed the RESIST (Resilience Study) study, in which we assessed medical students before, during, and after their yearly exam period. Exam time is generally a period of notable stress among medical students, and it has been suggested that exam time triggers mental distress. Objective: In this paper, we aim to describe the study protocol and to examine whether the exam period indeed induces higher perceived stress and mental distress. We also aim to explore whether perceived stress and mental distress coevolve in response to exams. Methods: RESIST is a cohort study in which exam stress functions as a within-subject natural stress manipulation. In this paper, we outline the sample (N=451), procedure, assessed measures (including demographics, perceived stress, mental distress, 13 resilience factors, and adversity), and ethical considerations. Moreover, we conducted a series of latent growth models and bivariate latent change score models to analyze perceived stress and mental distress changes over the 3 time points. Results: We found that perceived stress and mental distress increased from the time before the exams to the exam period and decreased after the exams to a lower level than before the exams. Our findings further suggest that higher mental distress before exams increased the risk of developing more perceived stress during exams. Higher perceived stress during exams, in turn, increased the risk of experiencing a less successful (or quick) recovery of mental distress after exams. Conclusions: As expected, the exam period caused a temporary increase in perceived stress and mental distress. Therefore, the RESIST study lends itself well to exploring resilience factors in response to naturally occurring exam stress. Such knowledge will eventually help researchers to find out which resilience factors lend themselves best as prevention targets and which lend themselves best as treatment targets for the mitigation of mental health problems that are triggered or accelerated by natural exam stress. The findings from the RESIST study may therefore inform student support services, mental health services, and resilience theory. ", doi="10.2196/20128", url="https://formative.jmir.org/2021/6/e20128", url="http://www.ncbi.nlm.nih.gov/pubmed/34100761" } @Article{info:doi/10.2196/23130, author="Kim, Heon Ho and An, Il Jae and Park, Rang Yu", title="A Prediction Model for Detecting Developmental Disabilities in Preschool-Age Children Through Digital Biomarker-Driven Deep Learning in Serious Games: Development Study", journal="JMIR Serious Games", year="2021", month="Jun", day="4", volume="9", number="2", pages="e23130", keywords="developmental delay", keywords="diagnosis prediction", keywords="deep learning", keywords="serious games", keywords="digital health", keywords="digital phenotyping", keywords="digital biomarkers", abstract="Background: Early detection of developmental disabilities in children is essential because early intervention can improve the prognosis of children. Meanwhile, a growing body of evidence has indicated a relationship between developmental disability and motor skill, and thus, motor skill is considered in the early diagnosis of developmental disability. However, there are challenges to assessing motor skill in the diagnosis of developmental disorder, such as a lack of specialists and time constraints, and thus it is commonly conducted through informal questions or surveys to parents. Objective: This study sought to evaluate the possibility of using drag-and-drop data as a digital biomarker and to develop a classification model based on drag-and-drop data with which to classify children with developmental disabilities. Methods: We collected drag-and-drop data from children with typical development and developmental disabilities from May 1, 2018, to May 1, 2020, via a mobile application (DoBrain). We used touch coordinates and extracted kinetic variables from these coordinates. A deep learning algorithm was developed to predict potential development disabilities in children. For interpretability of the model results, we identified which coordinates contributed to the classification results by applying gradient-weighted class activation mapping. Results: Of the 370 children in the study, 223 had typical development, and 147 had developmental disabilities. In all games, the number of changes in the acceleration sign based on the direction of progress both in the x- and y-axes showed significant differences between the 2 groups (P<.001; effect size >0.5). The deep learning convolutional neural network model showed that drag-and-drop data can help diagnose developmental disabilities, with an area under the receiving operating characteristics curve of 0.817. A gradient class activation map, which can interpret the results of a deep learning model, was visualized with the game results for specific children. Conclusions: Through the results of the deep learning model, we confirmed that drag-and-drop data can be a new digital biomarker for the diagnosis of developmental disabilities. ", doi="10.2196/23130", url="https://games.jmir.org/2021/2/e23130", url="http://www.ncbi.nlm.nih.gov/pubmed/34085944" } @Article{info:doi/10.2196/25520, author="McMurray, Josephine and Levy, AnneMarie and Holyoke, Paul", title="Psychometric Evaluation and Workflow Integration Study of a Tablet-Based Tool to Detect Mild Cognitive Impairment in Older Adults: Protocol for a Mixed Methods Study", journal="JMIR Res Protoc", year="2021", month="May", day="21", volume="10", number="5", pages="e25520", keywords="cognitive dysfunction", keywords="dementia", keywords="neuropsychological tests", keywords="evaluation study", keywords="technology", keywords="aged", keywords="primary health care", abstract="Background: With the rapid aging of the global population, experts anticipate a surge in the prevalence of mild cognitive impairment (MCI) and dementia worldwide. It is argued that developing more sensitive, easy to administer, and valid MCI screening tools for use in primary care settings may initiate timely clinical and personal care planning and treatment, enabling early access to programs and services. Including functional competence measures in screening tests makes them more ecologically valid and may help to identify cognitive deficits at an earlier stage. Objective: We aim to conduct a preliminary evaluative study comparing the sensitivity, specificity, and reliability of the BrainFx Screen (referred to as SCREEN hereafter), a novel digital tool designed to assess functional competence and detect early signs of cognitive impairment, with the Quick Mild Cognitive Impairment, a validated and highly sensitive tool that detects MCI in the older adult population. We will also investigate the perceived usefulness and integration of the SCREEN into primary care practice to identify demonstrable impacts on clinical workflow and health care providers' (HCP) perceptions of its success as a screening tool. Patients' perceptions of completing the SCREEN and its impact on their quality of life will also be explored. Methods: This study has a concurrent, mixed methods, prospective, and quasi-experimental design. Participants will be recruited from 5 primary care family health teams (FHTs; defined by multidisciplinary practice and capitated funding) across southwestern Ontario, Canada. Participants will include HCPs, patients, care partners, and FHT administrative executives. Patients 55 years and older with no history of diagnoses for MCI, dementia, or Alzheimer disease rostered in one of the FHTs participating in the study will be eligible to participate. Their care partners will help triangulate the qualitative data collected from patients. Participating FHTs will identify an occupational therapist from their site to participate in the study; this HCP will both administer the research protocol and participate in semistructured in-depth interviews and questionnaires. Principal component analysis will be conducted on the SCREEN data to understand the test components better. Tests comparing sensitivity, specificity, and test-retest reliability will assess the validity of SCREEN as a screening tool for MCI. Results: This paper describes the study protocol and its activities to date. Data collection was halted early because of COVID-19 restrictions on research activity, and data analysis is currently in progress. Conclusions: At the end of the project, we anticipate having an initial comparative evaluation of the SCREEN as a tool for early detection of MCI in primary care older adult patient populations. Resource constraints on this research study limit our ability to conduct a randomized controlled trial; however, the results will assist developers of the SCREEN in determining whether rigorous controlled testing is warranted. International Registered Report Identifier (IRRID): DERR1-10.2196/25520 ", doi="10.2196/25520", url="https://www.researchprotocols.org/2021/5/e25520", url="http://www.ncbi.nlm.nih.gov/pubmed/34018966" } @Article{info:doi/10.2196/27113, author="Jin, Haomiao and Chien, Sandy and Meijer, Erik and Khobragade, Pranali and Lee, Jinkook", title="Learning From Clinical Consensus Diagnosis in India to Facilitate Automatic Classification of Dementia: Machine Learning Study", journal="JMIR Ment Health", year="2021", month="May", day="10", volume="8", number="5", pages="e27113", keywords="dementia", keywords="Alzheimer disease", keywords="machine learning", keywords="artificial intelligence", keywords="diagnosis", keywords="classification", keywords="India", keywords="model", abstract="Background: The Harmonized Diagnostic Assessment of Dementia for the Longitudinal Aging Study in India (LASI-DAD) is the first and only nationally representative study on late-life cognition and dementia in India (n=4096). LASI-DAD obtained clinical consensus diagnosis of dementia for a subsample of 2528 respondents. Objective: This study develops a machine learning model that uses data from the clinical consensus diagnosis in LASI-DAD to support the classification of dementia status. Methods: Clinicians were presented with the extensive data collected from LASI-DAD, including sociodemographic information and health history of respondents, results from the screening tests of cognitive status, and information obtained from informant interviews. Based on the Clinical Dementia Rating (CDR) and using an online platform, clinicians individually evaluated each case and then reached a consensus diagnosis. A 2-step procedure was implemented to train several candidate machine learning models, which were evaluated using a separate test set for predictive accuracy measurement, including the area under receiver operating curve (AUROC), accuracy, sensitivity, specificity, precision, F1 score, and kappa statistic. The ultimate model was selected based on overall agreement as measured by kappa. We further examined the overall accuracy and agreement with the final consensus diagnoses between the selected machine learning model and individual clinicians who participated in the clinical consensus diagnostic process. Finally, we applied the selected model to a subgroup of LASI-DAD participants for whom the clinical consensus diagnosis was not obtained to predict their dementia status. Results: Among the 2528 individuals who received clinical consensus diagnosis, 192 (6.7\% after adjusting for sampling weight) were diagnosed with dementia. All candidate machine learning models achieved outstanding discriminative ability, as indicated by AUROC >.90, and had similar accuracy and specificity (both around 0.95). The support vector machine model outperformed other models with the highest sensitivity (0.81), F1 score (0.72), and kappa (.70, indicating substantial agreement) and the second highest precision (0.65). As a result, the support vector machine was selected as the ultimate model. Further examination revealed that overall accuracy and agreement were similar between the selected model and individual clinicians. Application of the prediction model on 1568 individuals without clinical consensus diagnosis classified 127 individuals as living with dementia. After applying sampling weight, we can estimate the prevalence of dementia in the population as 7.4\%. Conclusions: The selected machine learning model has outstanding discriminative ability and substantial agreement with a clinical consensus diagnosis of dementia. The model can serve as a computer model of the clinical knowledge and experience encoded in the clinical consensus diagnostic process and has many potential applications, including predicting missed dementia diagnoses and serving as a clinical decision support tool or virtual rater to assist diagnosis of dementia. ", doi="10.2196/27113", url="https://mental.jmir.org/2021/5/e27113", url="http://www.ncbi.nlm.nih.gov/pubmed/33970122" } @Article{info:doi/10.2196/20865, author="Manas, Gaur and Aribandi, Vamsi and Kursuncu, Ugur and Alambo, Amanuel and Shalin, L. Valerie and Thirunarayan, Krishnaprasad and Beich, Jonathan and Narasimhan, Meera and Sheth, Amit", title="Knowledge-Infused Abstractive Summarization of Clinical Diagnostic Interviews: Framework Development Study", journal="JMIR Ment Health", year="2021", month="May", day="10", volume="8", number="5", pages="e20865", keywords="knowledge-infusion", keywords="abstractive summarization", keywords="distress clinical diagnostic interviews", keywords="Patient Health Questionnaire-9", keywords="healthcare informatics", keywords="interpretable evaluations", abstract="Background: In clinical diagnostic interviews, mental health professionals (MHPs) implement a care practice that involves asking open questions (eg, ``What do you want from your life?'' ``What have you tried before to bring change in your life?'') while listening empathetically to patients. During these interviews, MHPs attempted to build a trusting human-centered relationship while collecting data necessary for professional medical and psychiatric care. Often, because of the social stigma of mental health disorders, patient discomfort in discussing their presenting problem may add additional complexities and nuances to the language they use, that is, hidden signals among noisy content. Therefore, a focused, well-formed, and elaborative summary of clinical interviews is critical to MHPs in making informed decisions by enabling a more profound exploration of a patient's behavior, especially when it endangers life. Objective: The aim of this study is to propose an unsupervised, knowledge-infused abstractive summarization (KiAS) approach that generates summaries to enable MHPs to perform a well-informed follow-up with patients to improve the existing summarization methods built on frequency heuristics by creating more informative summaries. Methods: Our approach incorporated domain knowledge from the Patient Health Questionnaire-9 lexicon into an integer linear programming framework that optimizes linguistic quality and informativeness. We used 3 baseline approaches: extractive summarization using the SumBasic algorithm, abstractive summarization using integer linear programming without the infusion of knowledge, and abstraction over extractive summarization to evaluate the performance of KiAS. The capability of KiAS on the Distress Analysis Interview Corpus-Wizard of Oz data set was demonstrated through interpretable qualitative and quantitative evaluations. Results: KiAS generates summaries (7 sentences on average) that capture informative questions and responses exchanged during long (58 sentences on average), ambiguous, and sparse clinical diagnostic interviews. The summaries generated using KiAS improved upon the 3 baselines by 23.3\%, 4.4\%, 2.5\%, and 2.2\% for thematic overlap, Flesch Reading Ease, contextual similarity, and Jensen Shannon divergence, respectively. On the Recall-Oriented Understudy for Gisting Evaluation-2 and Recall-Oriented Understudy for Gisting Evaluation-L metrics, KiAS showed an improvement of 61\% and 49\%, respectively. We validated the quality of the generated summaries through visual inspection and substantial interrater agreement from MHPs. Conclusions: Our collaborator MHPs observed the potential utility and significant impact of KiAS in leveraging valuable but voluminous communications that take place outside of normally scheduled clinical appointments. This study shows promise in generating semantically relevant summaries that will help MHPs make informed decisions about patient status. ", doi="10.2196/20865", url="https://mental.jmir.org/2021/5/e20865", url="http://www.ncbi.nlm.nih.gov/pubmed/33970116" } @Article{info:doi/10.2196/15708, author="Le Glaz, Aziliz and Haralambous, Yannis and Kim-Dufor, Deok-Hee and Lenca, Philippe and Billot, Romain and Ryan, C. Taylor and Marsh, Jonathan and DeVylder, Jordan and Walter, Michel and Berrouiguet, Sofian and Lemey, Christophe", title="Machine Learning and Natural Language Processing in Mental Health: Systematic Review", journal="J Med Internet Res", year="2021", month="May", day="4", volume="23", number="5", pages="e15708", keywords="machine learning", keywords="natural language processing", keywords="artificial intelligence", keywords="data mining", keywords="mental health", keywords="psychiatry", abstract="Background: Machine learning systems are part of the field of artificial intelligence that automatically learn models from data to make better decisions. Natural language processing (NLP), by using corpora and learning approaches, provides good performance in statistical tasks, such as text classification or sentiment mining. Objective: The primary aim of this systematic review was to summarize and characterize, in methodological and technical terms, studies that used machine learning and NLP techniques for mental health. The secondary aim was to consider the potential use of these methods in mental health clinical practice Methods: This systematic review follows the PRISMA (Preferred Reporting Items for Systematic Review and Meta-analysis) guidelines and is registered with PROSPERO (Prospective Register of Systematic Reviews; number CRD42019107376). The search was conducted using 4 medical databases (PubMed, Scopus, ScienceDirect, and PsycINFO) with the following keywords: machine learning, data mining, psychiatry, mental health, and mental disorder. The exclusion criteria were as follows: languages other than English, anonymization process, case studies, conference papers, and reviews. No limitations on publication dates were imposed. Results: A total of 327 articles were identified, of which 269 (82.3\%) were excluded and 58 (17.7\%) were included in the review. The results were organized through a qualitative perspective. Although studies had heterogeneous topics and methods, some themes emerged. Population studies could be grouped into 3 categories: patients included in medical databases, patients who came to the emergency room, and social media users. The main objectives were to extract symptoms, classify severity of illness, compare therapy effectiveness, provide psychopathological clues, and challenge the current nosography. Medical records and social media were the 2 major data sources. With regard to the methods used, preprocessing used the standard methods of NLP and unique identifier extraction dedicated to medical texts. Efficient classifiers were preferred rather than transparent functioning classifiers. Python was the most frequently used platform. Conclusions: Machine learning and NLP models have been highly topical issues in medicine in recent years and may be considered a new paradigm in medical research. However, these processes tend to confirm clinical hypotheses rather than developing entirely new information, and only one major category of the population (ie, social media users) is an imprecise cohort. Moreover, some language-specific features can improve the performance of NLP methods, and their extension to other languages should be more closely investigated. However, machine learning and NLP techniques provide useful information from unexplored data (ie, patients' daily habits that are usually inaccessible to care providers). Before considering It as an additional tool of mental health care, ethical issues remain and should be discussed in a timely manner. Machine learning and NLP methods may offer multiple perspectives in mental health research but should also be considered as tools to support clinical practice. ", doi="10.2196/15708", url="https://www.jmir.org/2021/5/e15708", url="http://www.ncbi.nlm.nih.gov/pubmed/33944788" } @Article{info:doi/10.2196/27667, author="Yamada, Yasunori and Shinkawa, Kaoru and Kobayashi, Masatomo and Takagi, Hironobu and Nemoto, Miyuki and Nemoto, Kiyotaka and Arai, Tetsuaki", title="Using Speech Data From Interactions With a Voice Assistant to Predict the Risk of Future Accidents for Older Drivers: Prospective Cohort Study", journal="J Med Internet Res", year="2021", month="Apr", day="8", volume="23", number="4", pages="e27667", keywords="cognitive impairment", keywords="smart speaker", keywords="speech analysis", keywords="accident", keywords="prevention", keywords="older adults", keywords="prediction", keywords="risk", keywords="assistant", abstract="Background: With the rapid growth of the older adult population worldwide, car accidents involving this population group have become an increasingly serious problem. Cognitive impairment, which is assessed using neuropsychological tests, has been reported as a risk factor for being involved in car accidents; however, it remains unclear whether this risk can be predicted using daily behavior data. Objective: The objective of this study was to investigate whether speech data that can be collected in everyday life can be used to predict the risk of an older driver being involved in a car accident. Methods: At baseline, we collected (1) speech data during interactions with a voice assistant and (2) cognitive assessment data---neuropsychological tests (Mini-Mental State Examination, revised Wechsler immediate and delayed logical memory, Frontal Assessment Battery, trail making test-parts A and B, and Clock Drawing Test), Geriatric Depression Scale, magnetic resonance imaging, and demographics (age, sex, education)---from older adults. Approximately one-and-a-half years later, we followed up to collect information about their driving experiences (with respect to car accidents) using a questionnaire. We investigated the association between speech data and future accident risk using statistical analysis and machine learning models. Results: We found that older drivers (n=60) with accident or near-accident experiences had statistically discernible differences in speech features that suggest cognitive impairment such as reduced speech rate (P=.048) and increased response time (P=.040). Moreover, the model that used speech features could predict future accident or near-accident experiences with 81.7\% accuracy, which was 6.7\% higher than that using cognitive assessment data, and could achieve up to 88.3\% accuracy when the model used both types of data. Conclusions: Our study provides the first empirical results that suggest analysis of speech data recorded during interactions with voice assistants could help predict future accident risk for older drivers by capturing subtle impairments in cognitive function. ", doi="10.2196/27667", url="https://www.jmir.org/2021/4/e27667", url="http://www.ncbi.nlm.nih.gov/pubmed/33830066" } @Article{info:doi/10.2196/24754, author="Wang, Haishuai and Avillach, Paul", title="Diagnostic Classification and Prognostic Prediction Using Common Genetic Variants in Autism Spectrum Disorder: Genotype-Based Deep Learning", journal="JMIR Med Inform", year="2021", month="Apr", day="7", volume="9", number="4", pages="e24754", keywords="deep learning", keywords="autism spectrum disorder", keywords="common genetic variants, diagnostic classification", abstract="Background: In the United States, about 3 million people have autism spectrum disorder (ASD), and around 1 out of 59 children are diagnosed with ASD. People with ASD have characteristic social communication deficits and repetitive behaviors. The causes of this disorder remain unknown; however, in up to 25\% of cases, a genetic cause can be identified. Detecting ASD as early as possible is desirable because early detection of ASD enables timely interventions in children with ASD. Identification of ASD based on objective pathogenic mutation screening is the major first step toward early intervention and effective treatment of affected children. Objective: Recent investigation interrogated genomics data for detecting and treating autism disorders, in addition to the conventional clinical interview as a diagnostic test. Since deep neural networks perform better than shallow machine learning models on complex and high-dimensional data, in this study, we sought to apply deep learning to genetic data obtained across thousands of simplex families at risk for ASD to identify contributory mutations and to create an advanced diagnostic classifier for autism screening. Methods: After preprocessing the genomics data from the Simons Simplex Collection, we extracted top ranking common variants that may be protective or pathogenic for autism based on a chi-square test. A convolutional neural network--based diagnostic classifier was then designed using the identified significant common variants to predict autism. The performance was then compared with shallow machine learning--based classifiers and randomly selected common variants. Results: The selected contributory common variants were significantly enriched in chromosome X while chromosome Y was also discriminatory in determining the identification of autistic individuals from nonautistic individuals. The ARSD, MAGEB16, and MXRA5 genes had the largest effect in the contributory variants. Thus, screening algorithms were adapted to include these common variants. The deep learning model yielded an area under the receiver operating characteristic curve of 0.955 and an accuracy of 88\% for identifying autistic individuals from nonautistic individuals. Our classifier demonstrated a considerable improvement of {\textasciitilde}13\% in terms of classification accuracy compared to standard autism screening tools. Conclusions: Common variants are informative for autism identification. Our findings also suggest that the deep learning process is a reliable method for distinguishing the diseased group from the control group based on the common variants of autism. ", doi="10.2196/24754", url="https://medinform.jmir.org/2021/4/e24754", url="http://www.ncbi.nlm.nih.gov/pubmed/33714937" } @Article{info:doi/10.2196/24727, author="Zeghari, Radia and K{\"o}nig, Alexandra and Guerchouche, Rachid and Sharma, Garima and Joshi, Jyoti and Fabre, Roxane and Robert, Philippe and Manera, Valeria", title="Correlations Between Facial Expressivity and Apathy in Elderly People With Neurocognitive Disorders: Exploratory Study", journal="JMIR Form Res", year="2021", month="Mar", day="31", volume="5", number="3", pages="e24727", keywords="apathy", keywords="action units", keywords="assessment", keywords="ICT", keywords="facial video analysis", keywords="neurocognitive disorders", keywords="neurocognitive", keywords="facial analysis", abstract="Background: Neurocognitive disorders are often accompanied by behavioral symptoms such as anxiety, depression, and/or apathy. These symptoms can occur very early in the disease progression and are often difficult to detect and quantify in nonspecialized clinical settings. Objective: We focus in this study on apathy, one of the most common and debilitating neuropsychiatric symptoms in neurocognitive disorders. Specifically, we investigated whether facial expressivity extracted through computer vision software correlates with the severity of apathy symptoms in elderly subjects with neurocognitive disorders. Methods: A total of 63 subjects (38 females and 25 males) with neurocognitive disorder participated in the study. Apathy was assessed using the Apathy Inventory (AI), a scale comprising 3 domains of apathy: loss of interest, loss of initiation, and emotional blunting. The higher the scale score, the more severe the apathy symptoms. Participants were asked to recall a positive and a negative event of their life, while their voice and face were recorded using a tablet device. Action units (AUs), which are basic facial movements, were extracted using OpenFace 2.0. A total of 17 AUs (intensity and presence) for each frame of the video were extracted in both positive and negative storytelling. Average intensity and frequency of AU activation were calculated for each participant in each video. Partial correlations (controlling for the level of depression and cognitive impairment) were performed between these indexes and AI subscales. Results: Results showed that AU intensity and frequency were negatively correlated with apathy scale scores, in particular with the emotional blunting component. The more severe the apathy symptoms, the less expressivity in specific emotional and nonemotional AUs was displayed from participants while recalling an emotional event. Different AUs showed significant correlations depending on the sex of the participant and the task's valence (positive vs negative story), suggesting the importance of assessing male and female participants independently. Conclusions: Our study suggests the interest of employing computer vision-based facial analysis to quantify facial expressivity and assess the severity of apathy symptoms in subjects with neurocognitive disorders. This may represent a useful tool for a preliminary apathy assessment in nonspecialized settings and could be used to complement classical clinical scales. Future studies including larger samples should confirm the clinical relevance of this kind of instrument. ", doi="10.2196/24727", url="https://formative.jmir.org/2021/3/e24727", url="http://www.ncbi.nlm.nih.gov/pubmed/33787499" } @Article{info:doi/10.2196/24465, author="S{\"u}kei, Emese and Norbury, Agnes and Perez-Rodriguez, Mercedes M. and Olmos, M. Pablo and Art{\'e}s, Antonio", title="Predicting Emotional States Using Behavioral Markers Derived From Passively Sensed Data: Data-Driven Machine Learning Approach", journal="JMIR Mhealth Uhealth", year="2021", month="Mar", day="22", volume="9", number="3", pages="e24465", keywords="mental health", keywords="affect", keywords="mobile health", keywords="mobile phone", keywords="digital phenotype", keywords="machine learning", keywords="Bayesian analysis", keywords="probabilistic models", keywords="personalized models", abstract="Background: Mental health disorders affect multiple aspects of patients' lives, including mood, cognition, and behavior. eHealth and mobile health (mHealth) technologies enable rich sets of information to be collected noninvasively, representing a promising opportunity to construct behavioral markers of mental health. Combining such data with self-reported information about psychological symptoms may provide a more comprehensive and contextualized view of a patient's mental state than questionnaire data alone. However, mobile sensed data are usually noisy and incomplete, with significant amounts of missing observations. Therefore, recognizing the clinical potential of mHealth tools depends critically on developing methods to cope with such data issues. Objective: This study aims to present a machine learning--based approach for emotional state prediction that uses passively collected data from mobile phones and wearable devices and self-reported emotions. The proposed methods must cope with high-dimensional and heterogeneous time-series data with a large percentage of missing observations. Methods: Passively sensed behavior and self-reported emotional state data from a cohort of 943 individuals (outpatients recruited from community clinics) were available for analysis. All patients had at least 30 days' worth of naturally occurring behavior observations, including information about physical activity, geolocation, sleep, and smartphone app use. These regularly sampled but frequently missing and heterogeneous time series were analyzed with the following probabilistic latent variable models for data averaging and feature extraction: mixture model (MM) and hidden Markov model (HMM). The extracted features were then combined with a classifier to predict emotional state. A variety of classical machine learning methods and recurrent neural networks were compared. Finally, a personalized Bayesian model was proposed to improve performance by considering the individual differences in the data and applying a different classifier bias term for each patient. Results: Probabilistic generative models proved to be good preprocessing and feature extractor tools for data with large percentages of missing observations. Models that took into account the posterior probabilities of the MM and HMM latent states outperformed those that did not by more than 20\%, suggesting that the underlying behavioral patterns identified were meaningful for individuals' overall emotional state. The best performing generalized models achieved a 0.81 area under the curve of the receiver operating characteristic and 0.71 area under the precision-recall curve when predicting self-reported emotional valence from behavior in held-out test data. Moreover, the proposed personalized models demonstrated that accounting for individual differences through a simple hierarchical model can substantially improve emotional state prediction performance without relying on previous days' data. Conclusions: These findings demonstrate the feasibility of designing machine learning models for predicting emotional states from mobile sensing data capable of dealing with heterogeneous data with large numbers of missing observations. Such models may represent valuable tools for clinicians to monitor patients' mood states. ", doi="10.2196/24465", url="https://mhealth.jmir.org/2021/3/e24465", url="http://www.ncbi.nlm.nih.gov/pubmed/33749612" } @Article{info:doi/10.2196/23456, author="Ridgway, P. Jessica and Uvin, Arno and Schmitt, Jessica and Oliwa, Tomasz and Almirol, Ellen and Devlin, Samantha and Schneider, John", title="Natural Language Processing of Clinical Notes to Identify Mental Illness and Substance Use Among People Living with HIV: Retrospective Cohort Study", journal="JMIR Med Inform", year="2021", month="Mar", day="10", volume="9", number="3", pages="e23456", keywords="natural language processing", keywords="HIV", keywords="substance use", keywords="mental illness", keywords="electronic medical records", abstract="Background: Mental illness and substance use are prevalent among people living with HIV and often lead to poor health outcomes. Electronic medical record (EMR) data are increasingly being utilized for HIV-related clinical research and care, but mental illness and substance use are often underdocumented in structured EMR fields. Natural language processing (NLP) of unstructured text of clinical notes in the EMR may more accurately identify mental illness and substance use among people living with HIV than structured EMR fields alone. Objective: The aim of this study was to utilize NLP of clinical notes to detect mental illness and substance use among people living with HIV and to determine how often these factors are documented in structured EMR fields. Methods: We collected both structured EMR data (diagnosis codes, social history, Problem List) as well as the unstructured text of clinical HIV care notes for adults living with HIV. We developed NLP algorithms to identify words and phrases associated with mental illness and substance use in the clinical notes. The algorithms were validated based on chart review. We compared numbers of patients with documentation of mental illness or substance use identified by structured EMR fields with those identified by the NLP algorithms. Results: The NLP algorithm for detecting mental illness had a positive predictive value (PPV) of 98\% and a negative predictive value (NPV) of 98\%. The NLP algorithm for detecting substance use had a PPV of 92\% and an NPV of 98\%. The NLP algorithm for mental illness identified 54.0\% (420/778) of patients as having documentation of mental illness in the text of clinical notes. Among the patients with mental illness detected by NLP, 58.6\% (246/420) had documentation of mental illness in at least one structured EMR field. Sixty-three patients had documentation of mental illness in structured EMR fields that was not detected by NLP of clinical notes. The NLP algorithm for substance use detected substance use in the text of clinical notes in 18.1\% (141/778) of patients. Among patients with substance use detected by NLP, 73.8\% (104/141) had documentation of substance use in at least one structured EMR field. Seventy-six patients had documentation of substance use in structured EMR fields that was not detected by NLP of clinical notes. Conclusions: Among patients in an urban HIV care clinic, NLP of clinical notes identified high rates of mental illness and substance use that were often not documented in structured EMR fields. This finding has important implications for epidemiologic research and clinical care for people living with HIV. ", doi="10.2196/23456", url="https://medinform.jmir.org/2021/3/e23456", url="http://www.ncbi.nlm.nih.gov/pubmed/33688848" } @Article{info:doi/10.2196/24365, author="Bai, Ran and Xiao, Le and Guo, Yu and Zhu, Xuequan and Li, Nanxi and Wang, Yashen and Chen, Qinqin and Feng, Lei and Wang, Yinghua and Yu, Xiangyi and Wang, Chunxue and Hu, Yongdong and Liu, Zhandong and Xie, Haiyong and Wang, Gang", title="Tracking and Monitoring Mood Stability of Patients With Major Depressive Disorder by Machine Learning Models Using Passive Digital Data: Prospective Naturalistic Multicenter Study", journal="JMIR Mhealth Uhealth", year="2021", month="Mar", day="8", volume="9", number="3", pages="e24365", keywords="digital phenotype", keywords="major depressive disorder", keywords="machine learning", keywords="mobile phone", abstract="Background: Major depressive disorder (MDD) is a common mental illness characterized by persistent sadness and a loss of interest in activities. Using smartphones and wearable devices to monitor the mental condition of patients with MDD has been examined in several studies. However, few studies have used passively collected data to monitor mood changes over time. Objective: The aim of this study is to examine the feasibility of monitoring mood status and stability of patients with MDD using machine learning models trained by passively collected data, including phone use data, sleep data, and step count data. Methods: We constructed 950 data samples representing time spans during three consecutive Patient Health Questionnaire-9 assessments. Each data sample was labeled as Steady or Mood Swing, with subgroups Steady-remission, Steady-depressed, Mood Swing-drastic, and Mood Swing-moderate based on patients' Patient Health Questionnaire-9 scores from three visits. A total of 252 features were extracted, and 4 feature selection models were applied; 6 different combinations of types of data were experimented with using 6 different machine learning models. Results: A total of 334 participants with MDD were enrolled in this study. The highest average accuracy of classification between Steady and Mood Swing was 76.67\% (SD 8.47\%) and that of recall was 90.44\% (SD 6.93\%), with features from all types of data being used. Among the 6 combinations of types of data we experimented with, the overall best combination was using call logs, sleep data, step count data, and heart rate data. The accuracies of predicting between Steady-remission and Mood Swing-drastic, Steady-remission and Mood Swing-moderate, and Steady-depressed and Mood Swing-drastic were over 80\%, and the accuracy of predicting between Steady-depressed and Mood Swing-moderate and the overall Steady to Mood Swing classification accuracy were over 75\%. Comparing all 6 aforementioned combinations, we found that the overall prediction accuracies between Steady-remission and Mood Swing (drastic and moderate) are better than those between Steady-depressed and Mood Swing (drastic and moderate). Conclusions: Our proposed method could be used to monitor mood changes in patients with MDD with promising accuracy by using passively collected data, which can be used as a reference by doctors for adjusting treatment plans or for warning patients and their guardians of a relapse. Trial Registration: Chinese Clinical Trial Registry ChiCTR1900021461; http://www.chictr.org.cn/showprojen.aspx?proj=36173 ", doi="10.2196/24365", url="https://mhealth.jmir.org/2021/3/e24365", url="http://www.ncbi.nlm.nih.gov/pubmed/33683207" } @Article{info:doi/10.2196/26360, author="Jones, Chelsea and Harasym, Jessica and Miguel-Cruz, Antonio and Chisholm, Shannon and Smith-MacDonald, Lorraine and Br{\'e}mault-Phillips, Suzette", title="Neurocognitive Assessment Tools for Military Personnel With Mild Traumatic Brain Injury: Scoping Literature Review", journal="JMIR Ment Health", year="2021", month="Feb", day="22", volume="8", number="2", pages="e26360", keywords="military", keywords="rehabilitation", keywords="head injury", keywords="posttraumatic stress disorder", keywords="cognition", keywords="neurocognitive assessment tool", keywords="traumatic brain injury", keywords="assessment", keywords="brain concussion", keywords="mobile phone", abstract="Background: Mild traumatic brain injury (mTBI) occurs at a higher frequency among military personnel than among civilians. A common symptom of mTBIs is cognitive dysfunction. Health care professionals use neuropsychological assessments as part of a multidisciplinary and best practice approach for mTBI management. Such assessments support clinical diagnosis, symptom management, rehabilitation, and return-to-duty planning. Military health care organizations currently use computerized neurocognitive assessment tools (NCATs). NCATs and more traditional neuropsychological assessments present unique challenges in both clinical and military settings. Many research gaps remain regarding psychometric properties, usability, acceptance, feasibility, effectiveness, sensitivity, and utility of both types of assessments in military environments. Objective: The aims of this study were to explore evidence regarding the use of NCATs among military personnel who have sustained mTBIs; evaluate the psychometric properties of the most commonly tested NCATs for this population; and synthesize the data to explore the range and extent of NCATs among this population, clinical recommendations for use, and knowledge gaps requiring future research. Methods: Studies were identified using MEDLINE, Embase, American Psychological Association PsycINFO, CINAHL Plus with Full Text, Psych Article, Scopus, and Military \& Government Collection. Data were analyzed using descriptive analysis, thematic analysis, and the Randolph Criteria. Narrative synthesis and the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-analyses extension for Scoping Reviews) guided the reporting of findings. The psychometric properties of NCATs were evaluated with specific criteria and summarized. Results: Of the 104 papers, 33 met the inclusion criteria for this scoping review. Thematic analysis and NCAT psychometrics were reported and summarized. Conclusions: When considering the psychometric properties of the most commonly used NCATs in military populations, these assessments have yet to demonstrate adequate validity, reliability, sensitivity, and clinical utility among military personnel with mTBIs. Additional research is needed to further validate NCATs within military populations, especially for those living outside of the United States and individuals experiencing other conditions known to adversely affect cognitive processing. Knowledge gaps remain, warranting further study of psychometric properties and the utility of baseline and normative testing for NCATs. ", doi="10.2196/26360", url="https://mental.jmir.org/2021/2/e26360", url="http://www.ncbi.nlm.nih.gov/pubmed/33616538" } @Article{info:doi/10.2196/23813, author="Mirea, Dan-Mircea and Martin-Key, A. Nayra and Barton-Owen, Giles and Olmert, Tony and Cooper, D. Jason and Han, Sarah Sung Yeon and Farrag, P. Lynn and Bell, Emily and Friend, V. Lauren and Eljasz, Pawel and Cowell, Daniel and Tomasik, Jakub and Bahn, Sabine", title="Impact of a Web-Based Psychiatric Assessment on the Mental Health and Well-Being of Individuals Presenting With Depressive Symptoms: Longitudinal Observational Study", journal="JMIR Ment Health", year="2021", month="Feb", day="22", volume="8", number="2", pages="e23813", keywords="online assessment", keywords="mental health", keywords="e-health", keywords="digital diagnosis", keywords="mood disorders", keywords="bipolar disorder", keywords="major depressive disorder", abstract="Background: Web-based assessments of mental health concerns hold great potential for earlier, more cost-effective, and more accurate diagnoses of psychiatric conditions than that achieved with traditional interview-based methods. Objective: The aim of this study was to assess the impact of a comprehensive web-based mental health assessment on the mental health and well-being of over 2000 individuals presenting with symptoms of depression. Methods: Individuals presenting with depressive symptoms completed a web-based assessment that screened for mood and other psychiatric conditions. After completing the assessment, the study participants received a report containing their assessment results along with personalized psychoeducation. After 6 and 12 months, participants were asked to rate the usefulness of the web-based assessment on different mental health--related outcomes and to self-report on their recent help-seeking behavior, diagnoses, medication, and lifestyle changes. In addition, general mental well-being was assessed at baseline and both follow-ups using the Warwick-Edinburgh Mental Well-being Scale (WEMWBS). Results: Data from all participants who completed either the 6-month or the 12-month follow-up (N=2064) were analyzed. The majority of study participants rated the study as useful for their subjective mental well-being. This included talking more openly (1314/1939, 67.77\%) and understanding one's mental health problems better (1083/1939, 55.85\%). Although most participants (1477/1939, 76.17\%) found their assessment results useful, only a small proportion (302/2064, 14.63\%) subsequently discussed them with a mental health professional, leading to only a small number of study participants receiving a new diagnosis (110/2064, 5.33\%). Among those who were reviewed, new mood disorder diagnoses were predicted by the digital algorithm with high sensitivity (above 70\%), and nearly half of the participants with new diagnoses also had a corresponding change in medication. Furthermore, participants' subjective well-being significantly improved over 12 months (baseline WEMWBS score: mean 35.24, SD 8.11; 12-month WEMWBS score: mean 41.19, SD 10.59). Significant positive predictors of follow-up subjective well-being included talking more openly, exercising more, and having been reviewed by a psychiatrist. Conclusions: Our results suggest that completing a web-based mental health assessment and receiving personalized psychoeducation are associated with subjective mental health improvements, facilitated by increased self-awareness and subsequent use of self-help interventions. Integrating web-based mental health assessments within primary and/or secondary care services could benefit patients further and expedite earlier diagnosis and effective treatment. International Registered Report Identifier (IRRID): RR2-10.2196/18453 ", doi="10.2196/23813", url="https://mental.jmir.org/2021/2/e23813", url="http://www.ncbi.nlm.nih.gov/pubmed/33616546" } @Article{info:doi/10.2196/25456, author="Friis-Healy, A. Elsa and Nagy, A. Gabriela and Kollins, H. Scott", title="It Is Time to REACT: Opportunities for Digital Mental Health Apps to Reduce Mental Health Disparities in Racially and Ethnically Minoritized Groups", journal="JMIR Ment Health", year="2021", month="Jan", day="26", volume="8", number="1", pages="e25456", keywords="digital health", keywords="app", keywords="public mental health", keywords="health disparities", keywords="COVID-19", keywords="pandemic", keywords="mental health", keywords="disparity", keywords="behavior", doi="10.2196/25456", url="http://mental.jmir.org/2021/1/e25456/", url="http://www.ncbi.nlm.nih.gov/pubmed/33406050" } @Article{info:doi/10.2196/25018, author="Sobolev, Michael and Vitale, Rachel and Wen, Hongyi and Kizer, James and Leeman, Robert and Pollak, P. J. and Baumel, Amit and Vadhan, P. Nehal and Estrin, Deborah and Muench, Frederick", title="The Digital Marshmallow Test (DMT) Diagnostic and Monitoring Mobile Health App for Impulsive Behavior: Development and Validation Study", journal="JMIR Mhealth Uhealth", year="2021", month="Jan", day="22", volume="9", number="1", pages="e25018", keywords="impulse control", keywords="impulsivity", keywords="self-regulation", keywords="self-control", keywords="mobile health", keywords="mHealth", keywords="ecological momentary assessment", keywords="active task", keywords="ResearchKit", abstract="Background: The classic Marshmallow Test, where children were offered a choice between one small but immediate reward (eg, one marshmallow) or a larger reward (eg, two marshmallows) if they waited for a period of time, instigated a wealth of research on the relationships among impulsive responding, self-regulation, and clinical and life outcomes. Impulsivity is a hallmark feature of self-regulation failures that lead to poor health decisions and outcomes, making understanding and treating impulsivity one of the most important constructs to tackle in building a culture of health. Despite a large literature base, impulsivity measurement remains difficult due to the multidimensional nature of the construct and limited methods of assessment in daily life. Mobile devices and the rise of mobile health (mHealth) have changed our ability to assess and intervene with individuals remotely, providing an avenue for ambulatory diagnostic testing and interventions. Longitudinal studies with mobile devices can further help to understand impulsive behaviors and variation in state impulsivity in daily life. Objective: The aim of this study was to develop and validate an impulsivity mHealth diagnostics and monitoring app called Digital Marshmallow Test (DMT) using both the Apple and Android platforms for widespread dissemination to researchers, clinicians, and the general public. Methods: The DMT app was developed using Apple's ResearchKit (iOS) and Android's ResearchStack open source frameworks for developing health research study apps. The DMT app consists of three main modules: self-report, ecological momentary assessment, and active behavioral and cognitive tasks. We conducted a study with a 21-day assessment period (N=116 participants) to validate the novel measures of the DMT app. Results: We used a semantic differential scale to develop self-report trait and momentary state measures of impulsivity as part of the DMT app. We identified three state factors (inefficient, thrill seeking, and intentional) that correlated highly with established measures of impulsivity. We further leveraged momentary semantic differential questions to examine intraindividual variability, the effect of daily life, and the contextual effect of mood on state impulsivity and daily impulsive behaviors. Our results indicated validation of the self-report sematic differential and related results, and of the mobile behavioral tasks, including the Balloon Analogue Risk Task and Go-No-Go task, with relatively low validity of the mobile Delay Discounting task. We discuss the design implications of these results to mHealth research. Conclusions: This study demonstrates the potential for assessing different facets of trait and state impulsivity during everyday life and in clinical settings using the DMT mobile app. The DMT app can be further used to enhance our understanding of the individual facets that underlie impulsive behaviors, as well as providing a promising avenue for digital interventions. Trial Registration: ClinicalTrials.gov NCT03006653; https://www.clinicaltrials.gov/ct2/show/NCT03006653 ", doi="10.2196/25018", url="http://mhealth.jmir.org/2021/1/e25018/", url="http://www.ncbi.nlm.nih.gov/pubmed/33480854" } @Article{info:doi/10.2196/25382, author="Martin-Key, A. Nayra and Schei, S. Thea and Barker, J. Eleanor and Spadaro, Benedetta and Funnell, Erin and Benacek, Jiri and Tomasik, Jakub and Bahn, Sabine", title="The Current State and Diagnostic Accuracy of Digital Mental Health Assessment Tools for Psychiatric Disorders: Protocol for a Systematic Review and Meta-analysis", journal="JMIR Res Protoc", year="2021", month="Jan", day="8", volume="10", number="1", pages="e25382", keywords="diagnostic accuracy", keywords="digital mental health", keywords="digital questionnaire", keywords="meta-analysis", keywords="psychiatry", keywords="systematic review", abstract="Background: Despite the rapidly growing number of digital assessment tools for screening and diagnosing mental health disorders, little is known about their diagnostic accuracy. Objective: The purpose of this systematic review and meta-analysis is to establish the diagnostic accuracy of question- and answer-based digital assessment tools for diagnosing a range of highly prevalent psychiatric conditions in the adult population. Methods: The Preferred Reporting Items for Systematic Review and Meta-Analysis Protocols (PRISMA-P) will be used. The focus of the systematic review is guided by the population, intervention, comparator, and outcome framework (PICO). We will conduct a comprehensive systematic literature search of MEDLINE, PsychINFO, Embase, Web of Science Core Collection, Cochrane Library, Applied Social Sciences Index and Abstracts (ASSIA), and Cumulative Index to Nursing and Allied Health Literature (CINAHL) for appropriate articles published from January 1, 2005. Two authors will independently screen the titles and abstracts of identified references and select studies according to the eligibility criteria. Any inconsistencies will be discussed and resolved. The two authors will then extract data into a standardized form. Risk of bias will be assessed using the Quality Assessment of Diagnostic Accuracy Studies-2 (QUADAS-2) tool, and a descriptive analysis and meta-analysis will summarize the diagnostic accuracy of the identified digital assessment tools. Results: The systematic review and meta-analysis commenced in November 2020, with findings expected by May 2021. Conclusions: This systematic review and meta-analysis will summarize the diagnostic accuracy of question- and answer-based digital assessment tools. It will identify implications for clinical practice, areas for improvement, and directions for future research. Trial Registration: PROSPERO International Prospective Register of Systematic Reviews CRD42020214724; https://www.crd.york.ac.uk/prospero/display\_record.php?ID=CRD42020214724. International Registered Report Identifier (IRRID): DERR1-10.2196/25382 ", doi="10.2196/25382", url="http://www.researchprotocols.org/2021/1/e25382/", url="http://www.ncbi.nlm.nih.gov/pubmed/33416508" } @Article{info:doi/10.2196/22637, author="Aboueid, Stephanie and Meyer, Samantha and Wallace, R. James and Mahajan, Shreya and Chaurasia, Ashok", title="Young Adults' Perspectives on the Use of Symptom Checkers for Self-Triage and Self-Diagnosis: Qualitative Study", journal="JMIR Public Health Surveill", year="2021", month="Jan", day="6", volume="7", number="1", pages="e22637", keywords="self-assessment", keywords="symptom checkers", keywords="self-triage", keywords="self-diagnosis", keywords="young adults", keywords="digital platforms", keywords="internet", keywords="user experience", keywords="Google search", abstract="Background: Young adults often browse the internet for self-triage and diagnosis. More sophisticated digital platforms such as symptom checkers have recently become pervasive; however, little is known about their use. Objective: The aim of this study was to understand young adults' (18-34 years old) perspectives on the use of the Google search engine versus a symptom checker, as well as to identify the barriers and enablers for using a symptom checker for self-triage and self-diagnosis. Methods: A qualitative descriptive case study research design was used. Semistructured interviews were conducted with 24 young adults enrolled in a university in Ontario, Canada. All participants were given a clinical vignette and were asked to use a symptom checker (WebMD Symptom Checker or Babylon Health) while thinking out loud, and were asked questions regarding their experience. Interviews were audio-recorded, transcribed, and imported into the NVivo software program. Inductive thematic analysis was conducted independently by two researchers. Results: Using the Google search engine was perceived to be faster and more customizable (ie, ability to enter symptoms freely in the search engine) than a symptom checker; however, a symptom checker was perceived to be useful for a more personalized assessment. After having used a symptom checker, most of the participants believed that the platform needed improvement in the areas of accuracy, security and privacy, and medical jargon used. Given these limitations, most participants believed that symptom checkers could be more useful for self-triage than for self-diagnosis. Interestingly, more than half of the participants were not aware of symptom checkers prior to this study and most believed that this lack of awareness about the existence of symptom checkers hindered their use. Conclusions: Awareness related to the existence of symptom checkers and their integration into the health care system are required to maximize benefits related to these platforms. Addressing the barriers identified in this study is likely to increase the acceptance and use of symptom checkers by young adults. ", doi="10.2196/22637", url="https://publichealth.jmir.org/2021/1/e22637", url="http://www.ncbi.nlm.nih.gov/pubmed/33404515" } @Article{info:doi/10.2196/17332, author="Chan, C. Joyce Y. and Wong, Adrian and Yiu, Brian and Mok, Hazel and Lam, Patti and Kwan, Pauline and Chan, Amany and Mok, T. Vincent C. and Tsoi, F. Kelvin K. and Kwok, Y. Timothy C.", title="Electronic Cognitive Screen Technology for Screening Older Adults With Dementia and Mild Cognitive Impairment in a Community Setting: Development and Validation Study", journal="J Med Internet Res", year="2020", month="Dec", day="18", volume="22", number="12", pages="e17332", keywords="EC-Screen", keywords="cognitive screening", keywords="dementia", keywords="mild cognitive impairment", abstract="Background: A digital cognitive test can be a useful and quick tool for the screening of cognitive impairment. Previous studies have shown that the diagnostic performance of digital cognitive tests is comparable with that of conventional paper-and-pencil tests. However, the use of commercially available digital cognitive tests is not common in Hong Kong, which may be due to the high cost of the tests and the language barrier. Thus, we developed a brief and user-friendly digital cognitive test called the Electronic Cognitive Screen (EC-Screen) for the detection of mild cognitive impairment (MCI) and dementia of older adults. Objective: The aim of this study was to evaluate the performance of the EC-Screen for the detection of MCI and dementia in older adults. Methods: The EC-Screen is a brief digital cognitive test that has been adapted from the Rapid Cognitive Screen test. The EC-Screen uses a cloud-based platform and runs on a tablet. Participants with MCI, dementia, and cognitively healthy controls were recruited from research clinics and the community. The outcomes were the performance of the EC-Screen in distinguishing participants with MCI and dementia from controls, and in distinguishing participants with dementia from those with MCI and controls. The cohort was randomly split into derivation and validation cohorts based on the participants' disease group. In the derivation cohort, the regression-derived score of the EC-Screen was calculated using binomial logistic regression. Two predictive models were produced. The first model was used to distinguish participants with MCI and dementia from controls, and the second model was used to distinguish participants with dementia from those with MCI and controls. Receiver operating characteristic curves were constructed and the areas under the curves (AUCs) were calculated. The performances of the two predictive models were tested using the validation cohorts. The relationship between the EC-Screen and paper-and-pencil Montreal Cognitive Assessment-Hong Kong version (HK-MoCA) was evaluated by the Pearson correlation coefficient. Results: A total of 126 controls, 54 participants with MCI, and 63 participants with dementia were included in the study. In differentiating participants with MCI and dementia from controls, the AUC of the EC-Screen in the derivation and validation cohorts was 0.87 and 0.84, respectively. The optimal sensitivity and specificity in the derivation cohorts were 0.81 and 0.80, respectively. In differentiating participants with dementia from those with MCI and controls, the AUC of the derivation and validation cohorts was 0.90 and 0.88, respectively. The optimal sensitivity and specificity in the derivation cohort were 0.83 and 0.83, respectively. There was a significant correlation between the EC-Screen and HK-MoCA (r=--0.67, P<.001). Conclusions: The EC-Screen is suggested to be a promising tool for the detection of MCI and dementia. This test can be self-administered or assisted by a nonprofessional staff or family member. Therefore, the EC-Screen can be a useful tool for case finding in primary health care and community settings. ", doi="10.2196/17332", url="http://www.jmir.org/2020/12/e17332/", url="http://www.ncbi.nlm.nih.gov/pubmed/33337341" } @Article{info:doi/10.2196/22634, author="Minaeva, Olga and Riese, Harri{\"e}tte and Lamers, Femke and Antypa, Niki and Wichers, Marieke and Booij, H. Sanne", title="Screening for Depression in Daily Life: Development and External Validation of a Prediction Model Based on Actigraphy and Experience Sampling Method", journal="J Med Internet Res", year="2020", month="Dec", day="1", volume="22", number="12", pages="e22634", keywords="actigraphy", keywords="activity tracker", keywords="depression", keywords="experience sampling method", keywords="prediction model", keywords="screening", abstract="Background: In many countries, depressed individuals often first visit primary care settings for consultation, but a considerable number of clinically depressed patients remain unidentified. Introducing additional screening tools may facilitate the diagnostic process. Objective: This study aimed to examine whether experience sampling method (ESM)-based measures of depressive affect and behaviors can discriminate depressed from nondepressed individuals. In addition, the added value of actigraphy-based measures was examined. Methods: We used data from 2 samples to develop and validate prediction models. The development data set included 14 days of ESM and continuous actigraphy of currently depressed (n=43) and nondepressed individuals (n=82). The validation data set included 30 days of ESM and continuous actigraphy of currently depressed (n=27) and nondepressed individuals (n=27). Backward stepwise logistic regression analysis was applied to build the prediction models. Performance of the models was assessed with goodness-of-fit indices, calibration curves, and discriminative ability (area under the receiver operating characteristic curve [AUC]). Results: In the development data set, the discriminative ability was good for the actigraphy model (AUC=0.790) and excellent for both the ESM (AUC=0.991) and the combined-domains model (AUC=0.993). In the validation data set, the discriminative ability was reasonable for the actigraphy model (AUC=0.648) and excellent for both the ESM (AUC=0.891) and the combined-domains model (AUC=0.892). Conclusions: ESM is a good diagnostic predictor and is easy to calculate, and it therefore holds promise for implementation in clinical practice. Actigraphy shows no added value to ESM as a diagnostic predictor but might still be useful when ESM use is restricted. ", doi="10.2196/22634", url="https://www.jmir.org/2020/12/e22634", url="http://www.ncbi.nlm.nih.gov/pubmed/33258783" } @Article{info:doi/10.2196/24169, author="BinDhim, F. Nasser and Althumiri, A. Nora and Basyouni, H. Mada and Sims, T. Omar and Alhusseini, Noara and Alqahtani, A. Saleh", title="Arabic Translation of the Weight Self-Stigma Questionnaire: Instrument Validation Study of Factor Structure and Reliability", journal="JMIR Form Res", year="2020", month="Nov", day="13", volume="4", number="11", pages="e24169", keywords="overweight", keywords="stigma", keywords="weight self-stigma", keywords="Weight Self-Stigma Questionnaire", keywords="obesity", keywords="Saudi Arabia", keywords="questionnaire", keywords="validation", keywords="reliability", keywords="validity", abstract="Background: While it is most often associated with its effects on physical health, obesity is also associated with serious self-stigmatization. The lack of a suitable, validated tool to measure weight-related self-stigma in Arabic countries may be partly responsible for the scarcity of literature about this problem. Objective: This study investigated the reliability and validity of an Arabic version of the Weight Self-Stigma Questionnaire (WSSQ). Methods: Data on the Arabic-translated version of the 12-item WSSQ were collected using two cross-sectional electronic questionnaires distributed among Saudi nationals through the Sharik Association for Health Research's database in June 2020. Internal consistency, test-retest reliability, and exploratory factor analysis of the Arabic WSSQ were assessed and compared with the original English version and other translations. Results: For reliability analysis, 43 participants completed the Arabic WSSQ during two time periods. Internal consistency was $\alpha$=.898 for the overall survey, $\alpha$=.819 for the fear of enacted stigma subscale (factor 1), and $\alpha$=.847 for the self-devaluation subscale (factor 2). The test-retest reliability of the intraclass correlation coef?cient was $\alpha$=.982. In the factor structure analysis, 295 participants completed the questionnaire. The Arabic WSSQ loading of the items was consistent with the original WSSQ, except for the loading of item 9, which was stronger in factor 2 than in factor 1. The two factors accounted for the observed variances of 47.7\% and 10.6\%. Conclusions: The Arabic version of the WSSQ has good internal consistency and reliability, and the factorial structure is similar to that of the original WSSQ. The Arabic WSSQ is adaptable for clinicians seeking to assess weight-related self-stigma in Arabic-speaking people. ", doi="10.2196/24169", url="http://formative.jmir.org/2020/11/e24169/", url="http://www.ncbi.nlm.nih.gov/pubmed/33185558" } @Article{info:doi/10.2196/20976, author="Thabrew, Hiran and Kumar, Harshali and Goldfinch, Mary and Cavadino, Alana and Goodyear-Smith, Felicity", title="Repeated Psychosocial Screening of High School Students Using YouthCHAT: Cohort Study", journal="JMIR Pediatr Parent", year="2020", month="Oct", day="26", volume="3", number="2", pages="e20976", keywords="mass screening", keywords="mental health", keywords="school health services", keywords="eHealth", abstract="Background: Psychosocial problems are common during adolescence and can have long-lasting effects on health and on academic and social functioning. YouthCHAT, an electronic HEEADSSS (home, education, eating, activities, drugs and alcohol, suicide and depression, sexuality and safety)-aligned instrument, has recently been demonstrated to be an acceptable and effective school-based psychosocial screener for 13-year-old (Year 9) high school students. Objective: This study aims to compare acceptability and detection rates with repeated YouthCHAT screenings of high school students when they are 13 years old (Year 9) and 14 years old (Year 10). Methods: We invited all Year-10 students to complete a YouthCHAT screening in 2018. Rates of positively identified issues were compared between the subset of students screened in both 2017 and 2018. Student acceptability toward YouthCHAT was investigated through focus group sessions. Onward clinical referral rates in 2018 were also investigated to explore the potential referral burden following screening. Data analysis for rates of positively identified issues were conducted with the McNemar test. Chi-square, Fisher exact test, and Kruskal-Wallis test were used to analyze the focus group data. Results: Of 141 eligible Year-10 students, 114 (81\%) completed a YouthCHAT screening during 2018, and 97 (85\%) of them completed it for a second time. Apart from depression, which increased (P=.002), and perceived life stress, which decreased (P=.04), rates of identified issues were broadly similar between 13 and 14 years of age. Repeated screenings via YouthCHAT was acceptable to students and time-efficient (mean, 6 minutes and 32 seconds) but did not reduce the overall number of individuals with identified issues. Onward clinical referrals from positive screens were mostly managed by school-based health services without the need for external referrals. Conclusions: Although further evaluation is needed, our results support the value of YouthCHAT as an acceptable and effective instrument with which to achieve routine identification of psychosocial issues and early intervention within a high school environment. ", doi="10.2196/20976", url="http://pediatrics.jmir.org/2020/2/e20976/", url="http://www.ncbi.nlm.nih.gov/pubmed/33104007" } @Article{info:doi/10.2196/20126, author="Dui, Greta Linda and Lunardini, Francesca and Termine, Cristiano and Matteucci, Matteo and Stucchi, Adolfo Natale and Borghese, Alberto Nunzio and Ferrante, Simona", title="A Tablet App for Handwriting Skill Screening at the Preliteracy Stage: Instrument Validation Study", journal="JMIR Serious Games", year="2020", month="Oct", day="22", volume="8", number="4", pages="e20126", keywords="serious game", keywords="tablet", keywords="isochrony", keywords="homothety", keywords="speed-accuracy tradeoff", keywords="steering law", keywords="writing", keywords="prevention", abstract="Background: Difficulties in handwriting, such as dysgraphia, impact several aspects of a child's everyday life. Current methodologies for the detection of such difficulties in children have the following three main weaknesses: (1) they are prone to subjective evaluation; (2) they can be administered only when handwriting is mastered, thus delaying the diagnosis and the possible adoption of countermeasures; and (3) they are not always easily accessible to the entire community. Objective: This work aims at developing a solution able to: (1) quantitatively measure handwriting features whose alteration is typically seen in children with dysgraphia; (2) enable their study in a preliteracy population; and (3) leverage a standard consumer technology to increase the accessibility of both early screening and longitudinal monitoring of handwriting difficulties. Methods: We designed and developed a novel tablet-based app Play Draw Write to assess potential markers of dysgraphia through the quantification of the following three key handwriting laws: isochrony, homothety, and speed-accuracy tradeoff. To extend such an approach to a preliteracy age, the app includes the study of the laws in terms of both word writing and symbol drawing. The app was tested among healthy children with mastered handwriting (third graders) and those at a preliterate age (kindergartners). Results: App testing in 15 primary school children confirmed that the three laws hold on the tablet surface when both writing words and drawing symbols. We found significant speed modulation according to size (P<.001), no relevant changes to fraction time for 67 out of 70 comparisons, and significant regression between movement time and index of difficulty for 44 out of 45 comparisons (P<.05, R2>0.28, 12 degrees of freedom). Importantly, the three laws were verified on symbols among 19 kindergartners. Results from the speed-accuracy exercise showed a significant evolution with age of the global movement time (circle: P=.003, square: P<.001, word: P=.001), the goodness of fit of the regression between movement time and accuracy constraints (square: P<.001, circle: P=.02), and the index of performance (square: P<.001). Our findings show that homothety, isochrony, and speed-accuracy tradeoff principles are present in children even before handwriting acquisition; however, some handwriting-related skills are partially refined with age. Conclusions: The designed app represents a promising solution for the screening of handwriting difficulties, since it allows (1) anticipation of the detection of alteration of handwriting principles at a preliteracy age and (2) provision of broader access to the monitoring of handwriting principles. Such a solution potentially enables the selective strengthening of lacking abilities before they exacerbate and affect the child's whole life. ", doi="10.2196/20126", url="http://games.jmir.org/2020/4/e20126/", url="http://www.ncbi.nlm.nih.gov/pubmed/33090110" } @Article{info:doi/10.2196/19716, author="van Herpen, Marjolein Merel and Boeschoten, A. Manon and te Brake, Hans and van der Aa, Niels and Olff, Miranda", title="Mobile Insight in Risk, Resilience, and Online Referral (MIRROR): Psychometric Evaluation of an Online Self-Help Test", journal="J Med Internet Res", year="2020", month="Sep", day="25", volume="22", number="9", pages="e19716", keywords="potentially traumatic events", keywords="mobile mental health", keywords="self-help", keywords="online", keywords="resilience", keywords="posttraumatic stress disorder", abstract="Background: Most people who experience a potentially traumatic event (PTE) recover on their own. A small group of individuals develops psychological complaints, but this is often not detected in time or guidance to care is suboptimal. To identify these individuals and encourage them to seek help, a web-based self-help test called Mobile Insight in Risk, Resilience, and Online Referral (MIRROR) was developed. MIRROR takes an innovative approach since it integrates both negative and positive outcomes of PTEs and time since the event and provides direct feedback to the user. Objective: The goal of this study was to assess MIRROR's use, examine its psychometric properties (factor structure, internal consistency, and convergent and divergent validity), and evaluate how well it classifies respondents into different outcome categories compared with reference measures. Methods: MIRROR was embedded in the website of Victim Support Netherlands so visitors could use it. We compared MIRROR's outcomes to reference measures of PTSD symptoms (PTSD Checklist for DSM-5), depression, anxiety, stress (Depression Anxiety Stress Scale--21), psychological resilience (Resilience Evaluation Scale), and positive mental health (Mental Health Continuum Short Form). Results: In 6 months, 1112 respondents completed MIRROR, of whom 663 also completed the reference measures. Results showed good internal consistency (interitem correlations range .24 to .55, corrected item-total correlations range .30 to .54, and Cronbach alpha coefficient range .62 to .68), and convergent and divergent validity (Pearson correlations range --.259 to .665). Exploratory and confirmatory factor analyses (EFA+CFA) yielded a 2-factor model with good model fit (CFA model fit indices: $\chi$219=107.8, P<.001, CFI=.965, TLI=.948, RMSEA=.065), conceptual meaning, and parsimony. MIRROR correctly classified respondents into different outcome categories compared with the reference measures. Conclusions: MIRROR is a valid and reliable self-help test to identify negative (PTSD complaints) and positive outcomes (psychosocial functioning and resilience) of PTEs. MIRROR is an easily accessible online tool that can help people who have experienced a PTE to timely identify psychological complaints and find appropriate support, a tool that might be highly needed in times like the coronavirus pandemic. ", doi="10.2196/19716", url="http://www.jmir.org/2020/9/e19716/", url="http://www.ncbi.nlm.nih.gov/pubmed/32975521" } @Article{info:doi/10.2196/18234, author="Middleton, M. Rod and Pearson, R. Owen and Ingram, Gillian and Craig, M. Elaine and Rodgers, J. William and Downing-Wood, Hannah and Hill, Joseph and Tuite-Dalton, Katherine and Roberts, Christopher and Watson, Lynne and Ford, V. David and Nicholas, Richard and ", title="A Rapid Electronic Cognitive Assessment Measure for Multiple Sclerosis: Validation of Cognitive Reaction, an Electronic Version of the Symbol Digit Modalities Test", journal="J Med Internet Res", year="2020", month="Sep", day="23", volume="22", number="9", pages="e18234", keywords="cognition", keywords="multiple sclerosis", keywords="eHealth", keywords="electronic assessment", keywords="patient reported outcomes", keywords="neurology", abstract="Background: Incorporating cognitive testing into routine clinical practice is a challenge in multiple sclerosis (MS), given the wide spectrum of both cognitive and physical impairments people can have and the time that testing requires. Shortened paper and verbal assessments predominate but still are not used routinely. Computer-based tests are becoming more widespread; however, changes in how a paper test is implemented can impact what exactly is being assessed in an individual. The Symbol Digit Modalities Test (SDMT) is one validated test that forms part of the cognitive batteries used in MS and has some computer-based versions. We developed a tablet-based SDMT variant that has the potential to be ultimately deployed to patients' own devices. Objective: This paper aims to develop, validate, and deploy a computer-based SDMT variant, the Cognition Reaction (CoRe) test, that can reliably replicate the characteristics of the paper-based SDMT. Methods: We carried out analysis using Pearson and intraclass correlations, as well as a Bland-Altman comparison, to examine consistency between the SDMT and CoRe tests and for test-retest reliability. The SDMT and CoRe tests were evaluated for sensitivity to disability levels and age. A novel metric in CoRe was found: question answering velocity could be calculated. This was evaluated in relation to disability levels and age for people with MS and compared with a group of healthy control volunteers. Results: SDMT and CoRe test scores were highly correlated and consistent with 1-month retest values. Lower scores were seen in patients with higher age and some effect was seen with increasing disability. There was no learning effect evident. Question answering velocity demonstrated a small increase in speed over the 90-second duration of the test in people with MS and healthy controls. Conclusions: This study validates a computer-based alternative to the SDMT that can be used in clinics and beyond. It enables accurate recording of elements of cognition relevant in MS but offers additional metrics that may offer further value to clinicians and people with MS. ", doi="10.2196/18234", url="http://www.jmir.org/2020/9/e18234/", url="http://www.ncbi.nlm.nih.gov/pubmed/32965240" } @Article{info:doi/10.2196/17963, author="Lunardini, Francesca and Luperto, Matteo and Romeo, Marta and Basilico, Nicola and Daniele, Katia and Azzolino, Domenico and Damanti, Sarah and Abbate, Carlo and Mari, Daniela and Cesari, Matteo and Borghese, Alberto Nunzio and Ferrante, Simona", title="Supervised Digital Neuropsychological Tests for Cognitive Decline in Older Adults: Usability and Clinical Validity Study", journal="JMIR Mhealth Uhealth", year="2020", month="Sep", day="21", volume="8", number="9", pages="e17963", keywords="aging", keywords="Bells Test", keywords="computerized testing", keywords="dementia", keywords="early diagnosis", keywords="eHealth", keywords="mild cognitive impairment", keywords="neuropsychological assessment", keywords="Trail Making Test", abstract="Background: Dementia is a major and growing health problem, and early diagnosis is key to its management. Objective: With the ultimate goal of providing a monitoring tool that could be used to support the screening for cognitive decline, this study aims to develop a supervised, digitized version of 2 neuropsychological tests: Trail Making Test and Bells Test. The system consists of a web app that implements a tablet-based version of the tests and consists of an innovative vocal assistant that acts as the virtual supervisor for the execution of the test. A replay functionality is added to allow inspection of the user's performance after test completion. Methods: To deploy the system in a nonsupervised environment, extensive functional testing of the platform was conducted, together with a validation of the tablet-based tests. Such validation had the two-fold aim of evaluating system usability and acceptance and investigating the concurrent validity of computerized assessment compared with the corresponding paper-and-pencil counterparts. Results: The results obtained from 83 older adults showed high system acceptance, despite the patients' low familiarity with technology. The system software was successfully validated. A concurrent validation of the system reported good ability of the digitized tests to retain the same predictive power of the corresponding paper-based tests. Conclusions: Altogether, the positive results pave the way for the deployment of the system to a nonsupervised environment, thus representing a potential efficacious and ecological solution to support clinicians in the identification of early signs of cognitive decline. ", doi="10.2196/17963", url="http://mhealth.jmir.org/2020/9/e17963/", url="http://www.ncbi.nlm.nih.gov/pubmed/32955442" } @Article{info:doi/10.2196/21922, author="Yom-Tov, Elad and Cherlow, Yuval", title="Ethical Challenges and Opportunities Associated With the Ability to Perform Medical Screening From Interactions With Search Engines: Viewpoint", journal="J Med Internet Res", year="2020", month="Sep", day="16", volume="22", number="9", pages="e21922", keywords="search engines", keywords="diagnosis", keywords="screening", doi="10.2196/21922", url="http://www.jmir.org/2020/9/e21922/", url="http://www.ncbi.nlm.nih.gov/pubmed/32936082" } @Article{info:doi/10.2196/20581, author="Sin, Jacqueline and Galeazzi, Gian and McGregor, Elicia and Collom, Jennifer and Taylor, Anna and Barrett, Barbara and Lawrence, Vanessa and Henderson, Claire", title="Digital Interventions for Screening and Treating Common Mental Disorders or Symptoms of Common Mental Illness in Adults: Systematic Review and Meta-analysis", journal="J Med Internet Res", year="2020", month="Sep", day="2", volume="22", number="9", pages="e20581", keywords="eHealth", keywords="mHealth", keywords="psychiatric illness", keywords="mental disorders", keywords="common mental illness", keywords="depression", keywords="anxiety", keywords="self-care", abstract="Background: Digital interventions targeting common mental disorders (CMDs) or symptoms of CMDs are growing rapidly and gaining popularity, probably in response to the increased prevalence of CMDs and better awareness of early help-seeking and self-care. However, no previous systematic reviews that focus on these novel interventions were found. Objective: This systematic review aims to scope entirely web-based interventions that provided screening and signposting for treatment, including self-management strategies, for people with CMDs or subthreshold symptoms. In addition, a meta-analysis was conducted to evaluate the effectiveness of these interventions for mental well-being and mental health outcomes. Methods: Ten electronic databases including MEDLINE, PsycINFO, and EMBASE were searched from January 1, 1999, to early April 2020. We included randomized controlled trials (RCTs) that evaluated a digital intervention (1) targeting adults with symptoms of CMDs, (2) providing both screening and signposting to other resources including self-care, and (3) delivered entirely through the internet. Intervention characteristics including target population, platform used, key design features, and outcome measure results were extracted and compared. Trial outcome results were included in a meta-analysis on the effectiveness of users' well-being and mental health outcomes. We also rated the meta-analysis results with the Grading of Recommendations, Assessment, Development, and Evaluations approach to establish the quality of the evidence. Results: The electronic searches yielded 21 papers describing 16 discrete digital interventions. These interventions were investigated in 19 unique trials including 1 (5\%) health economic study. Most studies were conducted in Australia and North America. The targeted populations varied from the general population to allied health professionals. All interventions offered algorithm-driven screening with measures to assess symptom levels and to assign treatment options including automatic web-based psychoeducation, self-care strategies, and signposting to existing services. A meta-analysis of usable trial data showed that digital interventions improved well-being (3 randomized controlled trials [RCTs]; n=1307; standardized mean difference [SMD] 0.40; 95\% CI 0.29 to 0.51; I2=28\%; fixed effect), symptoms of mental illness (6 RCTs; n=992; SMD ?0.29; 95\% CI ?0.49 to ?0.09; I2=51\%; random effects), and work and social functioning (3 RCTs; n=795; SMD ?0.16; 95\% CI ?0.30 to ?0.02; I2=0\%; fixed effect) compared with waitlist or attention control. However, some follow-up data failed to show any sustained effects beyond the post intervention time point. Data on mechanisms of change and cost-effectiveness were also lacking, precluding further analysis. Conclusions: Digital mental health interventions to assess and signpost people experiencing symptoms of CMDs appear to be acceptable to a sufficient number of people and appear to have enough evidence for effectiveness to warrant further study. We recommend that future studies incorporate economic analysis and process evaluation to assess the mechanisms of action and cost-effectiveness to aid scaling of the implementation. ", doi="10.2196/20581", url="https://www.jmir.org/2020/9/e20581", url="http://www.ncbi.nlm.nih.gov/pubmed/32876577" } @Article{info:doi/10.2196/19348, author="Birnbaum, Leo Michael and Kulkarni, ``Param'' Prathamesh and Van Meter, Anna and Chen, Victor and Rizvi, F. Asra and Arenare, Elizabeth and De Choudhury, Munmun and Kane, M. John", title="Utilizing Machine Learning on Internet Search Activity to Support the Diagnostic Process and Relapse Detection in Young Individuals With Early Psychosis: Feasibility Study", journal="JMIR Ment Health", year="2020", month="Sep", day="1", volume="7", number="9", pages="e19348", keywords="schizophrenia spectrum disorders", keywords="internet search activity", keywords="Google", keywords="diagnostic prediction", keywords="relapse prediction", keywords="machine learning", keywords="digital data", keywords="digital phenotyping", keywords="digital biomarkers", abstract="Background: Psychiatry is nearly entirely reliant on patient self-reporting, and there are few objective and reliable tests or sources of collateral information available to help diagnostic and assessment procedures. Technology offers opportunities to collect objective digital data to complement patient experience and facilitate more informed treatment decisions. Objective: We aimed to develop computational algorithms based on internet search activity designed to support diagnostic procedures and relapse identification in individuals with schizophrenia spectrum disorders. Methods: We extracted 32,733 time-stamped search queries across 42 participants with schizophrenia spectrum disorders and 74 healthy volunteers between the ages of 15 and 35 (mean 24.4 years, 44.0\% male), and built machine-learning diagnostic and relapse classifiers utilizing the timing, frequency, and content of online search activity. Results: Classifiers predicted a diagnosis of schizophrenia spectrum disorders with an area under the curve value of 0.74 and predicted a psychotic relapse in individuals with schizophrenia spectrum disorders with an area under the curve of 0.71. Compared with healthy participants, those with schizophrenia spectrum disorders made fewer searches and their searches consisted of fewer words. Prior to a relapse hospitalization, participants with schizophrenia spectrum disorders were more likely to use words related to hearing, perception, and anger, and were less likely to use words related to health. Conclusions: Online search activity holds promise for gathering objective and easily accessed indicators of psychiatric symptoms. Utilizing search activity as collateral behavioral health information would represent a major advancement in efforts to capitalize on objective digital data to improve mental health monitoring. ", doi="10.2196/19348", url="https://mental.jmir.org/2020/9/e19348", url="http://www.ncbi.nlm.nih.gov/pubmed/32870161" } @Article{info:doi/10.2196/19962, author="Adler, A. Daniel and Ben-Zeev, Dror and Tseng, W-S Vincent and Kane, M. John and Brian, Rachel and Campbell, T. Andrew and Hauser, Marta and Scherer, A. Emily and Choudhury, Tanzeem", title="Predicting Early Warning Signs of Psychotic Relapse From Passive Sensing Data: An Approach Using Encoder-Decoder Neural Networks", journal="JMIR Mhealth Uhealth", year="2020", month="Aug", day="31", volume="8", number="8", pages="e19962", keywords="psychotic disorders", keywords="schizophrenia", keywords="mHealth", keywords="mental health", keywords="mobile health", keywords="smartphone applications", keywords="machine learning", keywords="passive sensing", keywords="digital biomarkers", keywords="digital phenotyping", keywords="artificial intelligence", keywords="deep learning", keywords="mobile phone", abstract="Background: Schizophrenia spectrum disorders (SSDs) are chronic conditions, but the severity of symptomatic experiences and functional impairments vacillate over the course of illness. Developing unobtrusive remote monitoring systems to detect early warning signs of impending symptomatic relapses would allow clinicians to intervene before the patient's condition worsens. Objective: In this study, we aim to create the first models, exclusively using passive sensing data from a smartphone, to predict behavioral anomalies that could indicate early warning signs of a psychotic relapse. Methods: Data used to train and test the models were collected during the CrossCheck study. Hourly features derived from smartphone passive sensing data were extracted from 60 patients with SSDs (42 nonrelapse and 18 relapse >1 time throughout the study) and used to train models and test performance. We trained 2 types of encoder-decoder neural network models and a clustering-based local outlier factor model to predict behavioral anomalies that occurred within the 30-day period before a participant's date of relapse (the near relapse period). Models were trained to recreate participant behavior on days of relative health (DRH, outside of the near relapse period), following which a threshold to the recreation error was applied to predict anomalies. The neural network model architecture and the percentage of relapse participant data used to train all models were varied. Results: A total of 20,137 days of collected data were analyzed, with 726 days of data (0.037\%) within any 30-day near relapse period. The best performing model used a fully connected neural network autoencoder architecture and achieved a median sensitivity of 0.25 (IQR 0.15-1.00) and specificity of 0.88 (IQR 0.14-0.96; a median 108\% increase in behavioral anomalies near relapse). We conducted a post hoc analysis using the best performing model to identify behavioral features that had a medium-to-large effect (Cohen d>0.5) in distinguishing anomalies near relapse from DRH among 4 participants who relapsed multiple times throughout the study. Qualitative validation using clinical notes collected during the original CrossCheck study showed that the identified features from our analysis were presented to clinicians during relapse events. Conclusions: Our proposed method predicted a higher rate of anomalies in patients with SSDs within the 30-day near relapse period and can be used to uncover individual-level behaviors that change before relapse. This approach will enable technologists and clinicians to build unobtrusive digital mental health tools that can predict incipient relapse in SSDs. ", doi="10.2196/19962", url="https://mhealth.jmir.org/2020/8/e19962", url="http://www.ncbi.nlm.nih.gov/pubmed/32865506" } @Article{info:doi/10.2196/18136, author="Kim, Woon Ko and Lee, Yun Sung and Choi, Jongdoo and Chin, Juhee and Lee, Hwa Byung and Na, L. Duk and Choi, Hyun Jee", title="A Comprehensive Evaluation of the Process of Copying a Complex Figure in Early- and Late-Onset Alzheimer Disease: A Quantitative Analysis of Digital Pen Data", journal="J Med Internet Res", year="2020", month="Aug", day="12", volume="22", number="8", pages="e18136", keywords="alzheimer disease", keywords="Rey-Osterrieth Complex Figure", keywords="digital biomarkers", keywords="copying process", abstract="Background: The Rey-Osterrieth Complex Figure Test (RCFT) is a neuropsychological test that is widely used to assess visual memory and visuoconstructional deficits in patients with cognitive impairment, including Alzheimer disease (AD). Patients with AD have an increased tendency for exhibiting extraordinary behaviors in the RCFT for selecting the drawing area, organizing the figure, and deciding the order of images, among other activities. However, the conventional scoring system based on pen and paper has a limited ability to reflect these detailed behaviors. Objective: This study aims to establish a scoring system that addresses not only the spatial arrangement of the finished drawing but also the drawing process of patients with AD by using digital pen data. Methods: A digital pen and tablet were used to copy complex figures. The stroke patterns and kinetics of normal controls (NCs) and patients with early-onset AD (EOAD) and late-onset AD (LOAD) were analyzed by comparing the pen tip trajectory, spatial arrangement, and similarity of the finished drawings. Results: Patients with AD copied the figure in a more fragmented way with a longer pause than NCs (EOAD: P=.045; LOAD: P=.01). Patients with AD showed an increased tendency to draw the figures closer toward the target image in comparison with the NCs (EOAD: P=.005; LOAD: P=.01) Patients with AD showed the lower accuracy than NCs (EOAD: P=.004; LOAD: P=.002). Patients with EOAD and LOAD showed similar but slightly different drawing behaviors, especially in space use and in the initial stage of drawing. Conclusions: The digitalized complex figure test evaluated copying performance quantitatively and further elucidated the patients' ongoing process during copying. We believe that this novel approach can be used as a digital biomarker of AD. In addition, the repeatability of the test will delineate the process of executive functions and constructional organization abilities with disease progression. ", doi="10.2196/18136", url="https://www.jmir.org/2020/8/e18136", url="http://www.ncbi.nlm.nih.gov/pubmed/32491988" } @Article{info:doi/10.2196/18392, author="DeForte, Shelly and Huang, Yungui and Bourgeois, Tran and Hussain, Syed-Amad and Lin, Simon", title="The Association Between App-Administered Depression Assessments and Suicidal Ideation in User Comments: Retrospective Observational Study", journal="JMIR Mhealth Uhealth", year="2020", month="Aug", day="4", volume="8", number="8", pages="e18392", keywords="mobile health", keywords="mHealth", keywords="depression", keywords="qualitative research", keywords="mental health", abstract="Background: Many people use apps to help understand and manage their depression symptoms. App-administered questionnaires for the symptoms of depression, such as the Patient Health Questionnaire-9, are easy to score and implement in an app, but may not be accompanied by essential resources and access needed to provide proper support and avoid potential harm. Objective: Our primary goal was to evaluate the differences in risks and helpfulness associated with using an app to self-diagnose depression, comparing assessment-only apps with multifeatured apps. We also investigated whether, what, and how additional app features may mitigate potential risks. Methods: In this retrospective observational study, we identified apps in the Google Play store that provided a depression assessment as a feature and had at least five user comments. We separated apps into two categories based on those having only a depression assessment versus those that offered additional supportive features. We conducted theoretical thematic analyses over the user reviews, with thematic coding indicating the helpfulness of the app, the presence of suicidal ideation, and how and why the apps were used. We compared the results across the two categories of apps and analyzed the differences using chi-square statistical tests. Results: We evaluated 6 apps; 3 provided only a depression assessment (assessment only), and 3 provided features in addition to self-assessment (multifeatured). User comments for assessment-only apps indicated significantly more suicidal ideation or self-harm (n=31, 9.4\%) compared to comments for multifeatured apps (n=48, 2.3\%; X21=43.88, P<.001). Users of multifeatured apps were over three times more likely than assessment-only app users to comment in favor of the app's helpfulness, likely due to features like mood tracking, journaling, and informational resources (n=56, 17\% vs n=1223, 59\% respectively; X21=200.36, P<.001). The number of users under the age of 18 years was significantly higher among assessment-only app users (n=40, 12\%) than multifeatured app users (n=9, 0.04\%; X21=189.09, P<.001). Conclusions: Apps that diagnose depression by self-assessment without context or other supportive features are more likely to be used by those under 18 years of age and more likely to be associated with increased user distress and potential harm. Depression self-assessments in apps should be implemented with caution and accompanied by evidence-based capabilities that establish proper context, increase self-empowerment, and encourage users to seek clinical diagnostics and outside help. ", doi="10.2196/18392", url="https://mhealth.jmir.org/2020/8/e18392", url="http://www.ncbi.nlm.nih.gov/pubmed/32663158" } @Article{info:doi/10.2196/17784, author="Obeid, S. Jihad and Dahne, Jennifer and Christensen, Sean and Howard, Samuel and Crawford, Tami and Frey, J. Lewis and Stecker, Tracy and Bunnell, E. Brian", title="Identifying and Predicting Intentional Self-Harm in Electronic Health Record Clinical Notes: Deep Learning Approach", journal="JMIR Med Inform", year="2020", month="Jul", day="30", volume="8", number="7", pages="e17784", keywords="machine learning", keywords="deep learning", keywords="suicide", keywords="suicide, attempted", keywords="electronic health records", keywords="natural language processing", abstract="Background: Suicide is an important public health concern in the United States and around the world. There has been significant work examining machine learning approaches to identify and predict intentional self-harm and suicide using existing data sets. With recent advances in computing, deep learning applications in health care are gaining momentum. Objective: This study aimed to leverage the information in clinical notes using deep neural networks (DNNs) to (1) improve the identification of patients treated for intentional self-harm and (2) predict future self-harm events. Methods: We extracted clinical text notes from electronic health records (EHRs) of 835 patients with International Classification of Diseases (ICD) codes for intentional self-harm and 1670 matched controls who never had any intentional self-harm ICD codes. The data were divided into training and holdout test sets. We tested a number of algorithms on clinical notes associated with the intentional self-harm codes using the training set, including several traditional bag-of-words--based models and 2 DNN models: a convolutional neural network (CNN) and a long short-term memory model. We also evaluated the predictive performance of the DNNs on a subset of patients who had clinical notes 1 to 6 months before the first intentional self-harm event. Finally, we evaluated the impact of a pretrained model using Word2vec (W2V) on performance. Results: The area under the receiver operating characteristic curve (AUC) for the CNN on the phenotyping task, that is, the detection of intentional self-harm in clinical notes concurrent with the events was 0.999, with an F1 score of 0.985. In the predictive task, the CNN achieved the highest performance with an AUC of 0.882 and an F1 score of 0.769. Although pretraining with W2V shortened the DNN training time, it did not improve performance. Conclusions: The strong performance on the first task, namely, phenotyping based on clinical notes, suggests that such models could be used effectively for surveillance of intentional self-harm in clinical text in an EHR. The modest performance on the predictive task notwithstanding, the results using DNN models on clinical text alone are competitive with other reports in the literature using risk factors from structured EHR data. ", doi="10.2196/17784", url="https://medinform.jmir.org/2020/7/e17784", url="http://www.ncbi.nlm.nih.gov/pubmed/32729840" } @Article{info:doi/10.2196/16455, author="Higuchi, Masakazu and Nakamura, Mitsuteru and Shinohara, Shuji and Omiya, Yasuhiro and Takano, Takeshi and Mitsuyoshi, Shunji and Tokuno, Shinichi", title="Effectiveness of a Voice-Based Mental Health Evaluation System for Mobile Devices: Prospective Study", journal="JMIR Form Res", year="2020", month="Jul", day="20", volume="4", number="7", pages="e16455", keywords="mental health", keywords="monitoring system", keywords="stress evaluation", keywords="voice analysis", abstract="Background: We developed a system for monitoring mental health using voice data from daily phone calls, termed Mind Monitoring System (MIMOSYS), by implementing a method for estimating mental health status from voice data. Objective: The objective of this study was to evaluate the potential of this system for detecting depressive states and monitoring stress-induced mental changes. Methods: We opened our system to the public in the form of a prospective study in which data were collected over 2 years from a large, unspecified sample of users.We used these data to analyze the relationships between the rate of continued use, the men-to-women ratio, and existing psychological tests for this system over the study duration. Moreover, we analyzed changes in mental data over time under stress from particular life events. Results: The system had a high rate of continued use. Voice indicators showed that women have more depressive tendencies than men, matching the rate of depression in Japan. The system's voice indicators and the scores on classical psychological tests were correlated. We confirmed deteriorating mental health for users in areas affected by major earthquakes in Japan around the time of the earthquakes. Conclusions: The results suggest that although this system is insufficient for detecting depression, it may be effective for monitoring changes in mental health due to stress. The greatest feature of our system is mental health monitoring, which is most effectively accomplished by performing long-term time-series analysis of the acquired data considering the user's life events. Such a system can improve the implementation of patient interventions by evaluating objective data along with life events. ", doi="10.2196/16455", url="http://formative.jmir.org/2020/7/e16455/", url="http://www.ncbi.nlm.nih.gov/pubmed/32554367" } @Article{info:doi/10.2196/15901, author="Haines-Delmont, Alina and Chahal, Gurdit and Bruen, Jane Ashley and Wall, Abbie and Khan, Tara Christina and Sadashiv, Ramesh and Fearnley, David", title="Testing Suicide Risk Prediction Algorithms Using Phone Measurements With Patients in Acute Mental Health Settings: Feasibility Study", journal="JMIR Mhealth Uhealth", year="2020", month="Jun", day="26", volume="8", number="6", pages="e15901", keywords="suicide", keywords="suicidal ideation", keywords="smartphone", keywords="cell phone", keywords="machine learning", keywords="nearest neighbor algorithm", keywords="digital phenotyping", abstract="Background: Digital phenotyping and machine learning are currently being used to augment or even replace traditional analytic procedures in many domains, including health care. Given the heavy reliance on smartphones and mobile devices around the world, this readily available source of data is an important and highly underutilized source that has the potential to improve mental health risk prediction and prevention and advance mental health globally. Objective: This study aimed to apply machine learning in an acute mental health setting for suicide risk prediction. This study uses a nascent approach, adding to existing knowledge by using data collected through a smartphone in place of clinical data, which have typically been collected from health care records. Methods: We created a smartphone app called Strength Within Me, which was linked to Fitbit, Apple Health kit, and Facebook, to collect salient clinical information such as sleep behavior and mood, step frequency and count, and engagement patterns with the phone from a cohort of inpatients with acute mental health (n=66). In addition, clinical research interviews were used to assess mood, sleep, and suicide risk. Multiple machine learning algorithms were tested to determine the best fit. Results: K-nearest neighbors (KNN; k=2) with uniform weighting and the Euclidean distance metric emerged as the most promising algorithm, with 68\% mean accuracy (averaged over 10,000 simulations of splitting the training and testing data via 10-fold cross-validation) and an average area under the curve of 0.65. We applied a combined 5{\texttimes}2 F test to test the model performance of KNN against the baseline classifier that guesses training majority, random forest, support vector machine and logistic regression, and achieved F statistics of 10.7 (P=.009) and 17.6 (P=.003) for training majority and random forest, respectively, rejecting the null of performance being the same. Therefore, we have taken the first steps in prototyping a system that could continuously and accurately assess the risk of suicide via mobile devices. Conclusions: Predicting for suicidality is an underaddressed area of research to which this paper makes a useful contribution. This is part of the first generation of studies to suggest that it is feasible to utilize smartphone-generated user input and passive sensor data to generate a risk algorithm among inpatients at suicide risk. The model reveals fair concordance between phone-derived and research-generated clinical data, and with iterative development, it has the potential for accurate discriminant risk prediction. However, although full automation and independence of clinical judgment or input would be a worthy development for those individuals who are less likely to access specialist mental health services, and for providing a timely response in a crisis situation, the ethical and legal implications of such advances in the field of psychiatry need to be acknowledged. ", doi="10.2196/15901", url="https://mhealth.jmir.org/2020/6/e15901", url="http://www.ncbi.nlm.nih.gov/pubmed/32442152" } @Article{info:doi/10.2196/12158, author="Chu, Kuo-Chung and Lu, Hsin-Ke and Huang, Ming-Chun and Lin, Shr-Jie and Liu, Wen-I and Huang, Yu-Shu and Hsu, Jen-Fu and Wang, Chih-Huan", title="Using Mobile Electroencephalography and Actigraphy to Diagnose Attention-Deficit/Hyperactivity Disorder: Case-Control Comparison Study", journal="JMIR Ment Health", year="2020", month="Jun", day="19", volume="7", number="6", pages="e12158", keywords="actigraphy", keywords="ADHD", keywords="attention deficit disorder with hyperactivity", keywords="clinical decision-making", keywords="electroencephalography", keywords="neuropsychological tests", abstract="Background: Children with attention-deficit/hyperactivity disorder (ADHD), a neurobehavioral disorder, display behaviors of inattention, hyperactivity, or impulsivity, which can affect their ability to learn and establish proper family and social relationships. Various tools are currently used by child and adolescent psychiatric clinics to diagnose, evaluate, and collect information and data. The tools allow professional physicians to assess if patients need further treatment, following a thorough and careful clinical diagnosis process. Objective: We aim to determine potential indicators extracted from a mobile electroencephalography (EEG) device (Mindset; NeuroSky) and an actigraph (MotionWatch 8; CamNtech) and to validate them for diagnosis of ADHD. The 3 indicators are (1) attention, measured by the EEG; (2) meditation, measured by the EEG; and (3) activity, measured by the actigraph. Methods: A total of 63 participants were recruited. The case group comprised 40 boys and 9 girls, while the control group comprised 5 boys and 9 girls. The groups were age matched. The test was divided into 3 stages---pretest, in-test, and posttest---with a testing duration of 20 minutes each. We used correlation analysis, repeated measures analysis of variance, and regression analysis to investigate which indicators can be used for ADHD diagnosis. Results: With the EEG indicators, the analysis results show a significant correlation of attention with both hit reaction time (RT) interstimulus interval (ISI) change (r=--0.368; P=.003) and hit standard error (SE) ISI change (r=--0.336; P=.007). This indicates that the higher the attention of the participants, the smaller both the hit RT change and the hit SE ISI change. With the actigraph indicator, confidence index (r=0.352; P=.005), omissions (r=0.322; P=.01), hit RT SE (r=0.393; P=.001), and variability (r=0.351; P=.005) were significant. This indicates that the higher the activity amounts, the higher the impulsive behavior of the participants and the more target omissions in the continuous performance test (CPT). The results show that the participants with ADHD present a significant difference in activity amounts (P<0.001). The actigraph outperforms the EEG in screening ADHD. Conclusions: When the participants with ADHD are stimulated under restricted conditions, they will present different amounts of activity than in unrestricted conditions due to participants' inability to exercise control over their concentration. This finding could be a new electronic physiological biomarker of ADHD. An actigraph can be used to detect the amount of activity exhibited and to help physicians diagnose the disorder in order to develop more objective, rapid auxiliary diagnostic tools. ", doi="10.2196/12158", url="http://mental.jmir.org/2020/6/e12158/", url="http://www.ncbi.nlm.nih.gov/pubmed/32558658" } @Article{info:doi/10.2196/16237, author="Kumar, Anupama and Wang, Michael and Riehm, Alison and Yu, Eileen and Smith, Ted and Kaplin, Adam", title="An Automated Mobile Mood Tracking Technology (Mood 24/7): Validation Study", journal="JMIR Ment Health", year="2020", month="May", day="20", volume="7", number="5", pages="e16237", keywords="depression", keywords="text messaging", keywords="patient monitoring", keywords="mobile phone", keywords="short message service", keywords="ecological momentary assessment", keywords="digital health", abstract="Background: Electronic tracking has been utilized for a variety of health conditions. Previous studies have shown that there is higher adherence to electronic methods vs paper-and-pencil tracking modalities. Electronic tracking also ensures that there are no back-filled entries, where patients have---to appear compliant---entered their responses retrospectively just before their visits with their health care provider. On the basis of the recognition of an unmet need for a Web-based automated platform to track psychiatric outcomes, Johns Hopkins University partnered with Health Central (a subsidiary of Remedy Health Media LLC) to develop Mood 24/7, an electronic, mobile, automated, SMS-based mood tracker. This is a pilot study to validate the use of Mood 24/7 in anticipation of clinical trials to demonstrate the therapeutic benefit on patients' health outcomes of utilizing digital mood-tracking technology. Objective: Mood 24/7 is an electronic mood-monitoring platform developed to accurately and efficiently track mood over time through automated daily SMS texts or emails. This study was designed to assess the accuracy and validity of Mood 24/7 in an outpatient psychiatric setting. Methods: This pilot study involved a retrospective chart review for depressed outpatients (N=9) to compare their self-reported Mood 24/7 daily mood ratings with their psychiatrist's independent clinical mood assessment at the time of the patient's visit. Their mood ratings via Mood 24/7 were collected over 36 weeks. In addition, a mixed model analysis was applied to compare the weekly Montgomery-{\AA}sberg Depression Rating Scale (MADRS) scores with Mood 24/7 scores over an average of 3 months. Results: A 97.2\% (315/324) digital mood reporting adherence was found over 36 weeks, and a significant correlation (r=0.86, P<.001) was observed between patients' Mood 24/7 scores and their psychiatrist's blinded clinical assessment of the patient's mood when seen in the clinic. In addition, a significant concordance (intraclass correlation of 0.69, 95\% CI 0.33-0.91, P<.001) was observed in the mixed model analysis of the clinician-administered MADRS vs Mood 24/7 scores over time. Conclusions: Our chart review and mixed model analyses demonstrate that Mood 24/7 is a valid instrument for convenient, simple, noninvasive, and accurate longitudinal mood assessment in the outpatient clinical setting. ", doi="10.2196/16237", url="https://mental.jmir.org/2020/5/e16237", url="http://www.ncbi.nlm.nih.gov/pubmed/32432558" } @Article{info:doi/10.2196/13852, author="Tonn, Peter and Degani, Yoav and Hershko, Shani and Klein, Amit and Seule, Lea and Schulze, Nina", title="Development of a Digital Content-Free Speech Analysis Tool for the Measurement of Mental Health and Follow-Up for Mental Disorders: Protocol for a Case-Control Study", journal="JMIR Res Protoc", year="2020", month="May", day="14", volume="9", number="5", pages="e13852", keywords="voice detection", keywords="depressive disorder", keywords="content-free speech analysis", keywords="mobile health app", abstract="Background: The prevalence of mental disorders worldwide is very high. The guideline-oriented care of patients depends on early diagnosis and regular and valid evaluation of their treatment to be able to quickly intervene should the patient's mental health deteriorate. To ensure effective treatment, the level of experience of the physician or therapist is of importance, both in the initial diagnosis and in the treatment of mental illnesses. Nevertheless, experienced physicians and psychotherapists are not available in enough numbers everywhere, especially in rural areas or in less developed countries. Human speech can reveal a speaker's mental state by altering its noncontent aspects (speech melody, intonations, speech rate, etc). This is noticeable in both the clinic and everyday life by having prior knowledge of the normal speech patterns of the affected person, and with enough time spent listening to the patient. However, this time and experience are often unavailable, leaving unused opportunities to capture linguistic, noncontent information. To improve the care of patients with mental disorders, we have developed a concept for assessing their most important mental parameters through a noncontent analysis of their active speech. Using speech analysis for the assessment and tracking of mental health patients opens up the possibility of remote, automatic, and ongoing evaluation when used with patients' smartphones, as part of the current trends toward the increasing use of digital and mobile health tools. Objective: The primary objective of this study is to evaluate measurements of participants' mental state by comparing the analysis of noncontent speech parameters to the results of several psychological questionnaires (Symptom Checklist-90 [SCL-90], the Patient Health Questionnaire [PHQ], and the Big 5 Test). Methods: In this paper, we described a case-controlled study (with a case group and one control group). The participants will be recruited in an outpatient neuropsychiatric treatment center. Inclusion criteria are a neurological or psychiatric diagnosis made by a specialist, no terminal or life-threatening illnesses, and fluent use of the German language. Exclusion criteria include psychosis, dementia, speech or language disorders in neurological diseases, addiction history, a suicide attempt recently or in the last 12 months, or insufficient language skills. The measuring instrument will be the VoiceSense digital voice analysis tool, which enables the analysis of 200 specific speech parameters, and the assessment of findings using psychometric instruments and questionnaires (SCL-90, PHQ, Big 5 Test). Results: The study is ongoing as of September 2019, but we have enrolled 254 participants. There have been 161 measurements completed at timepoint 1, and a total of 62 participants have completed every psychological and speech analysis measurement. Conclusions: It appears that the tone and modulation of speech are as important, if not more so, than the content, and should not be underestimated. This is particularly evident in the interpretation of the psychological findings thus far acquired. Therefore, the application of a software analysis tool could increase the accuracy of finding assessments and improve patient care. Trial Registration: ClinicalTrials.gov NCT03700008; https://clinicaltrials.gov/ct2/show/NCT03700008 International Registered Report Identifier (IRRID): PRR1-10.2196/13852 ", doi="10.2196/13852", url="https://www.researchprotocols.org/2020/5/e13852", url="http://www.ncbi.nlm.nih.gov/pubmed/32406862" } @Article{info:doi/10.2196/15767, author="Chen, Tao and Chen, Ye and Yuan, Mengxue and Gerstein, Mark and Li, Tingyu and Liang, Huiying and Froehlich, Tanya and Lu, Long", title="The Development of a Practical Artificial Intelligence Tool for Diagnosing and Evaluating Autism Spectrum Disorder: Multicenter Study", journal="JMIR Med Inform", year="2020", month="May", day="8", volume="8", number="5", pages="e15767", keywords="autism spectrum disorder", keywords="magnetic resonance imaging", keywords="neuroimaging", keywords="brain", keywords="histogram of oriented gradients", keywords="cluster analysis", keywords="classification", keywords="machine learning", abstract="Background: Autism spectrum disorder (ASD) is a complex neurodevelopmental disorder with an unknown etiology. Early diagnosis and intervention are key to improving outcomes for patients with ASD. Structural magnetic resonance imaging (sMRI) has been widely used in clinics to facilitate the diagnosis of brain diseases such as brain tumors. However, sMRI is less frequently used to investigate neurological and psychiatric disorders, such as ASD, owing to the subtle, if any, anatomical changes of the brain. Objective: This study aimed to investigate the possibility of identifying structural patterns in the brain of patients with ASD as potential biomarkers in the diagnosis and evaluation of ASD in clinics. Methods: We developed a novel 2-level histogram-based morphometry (HBM) classification framework in which an algorithm based on a 3D version of the histogram of oriented gradients (HOG) was used to extract features from sMRI data. We applied this framework to distinguish patients with ASD from healthy controls using 4 datasets from the second edition of the Autism Brain Imaging Data Exchange, including the ETH Z{\"u}rich (ETH), NYU Langone Medical Center: Sample 1, Oregon Health and Science University, and Stanford University (SU) sites. We used a stratified 10-fold cross-validation method to evaluate the model performance, and we applied the Naive Bayes approach to identify the predictive ASD-related brain regions based on classification contributions of each HOG feature. Results: On the basis of the 3D HOG feature extraction method, our proposed HBM framework achieved an area under the curve (AUC) of >0.75 in each dataset, with the highest AUC of 0.849 in the ETH site. We compared the 3D HOG algorithm with the original 2D HOG algorithm, which showed an accuracy improvement of >4\% in each dataset, with the highest improvement of 14\% (6/42) in the SU site. A comparison of the 3D HOG algorithm with the scale-invariant feature transform algorithm showed an AUC improvement of >18\% in each dataset. Furthermore, we identified ASD-related brain regions based on the sMRI images. Some of these regions (eg, frontal gyrus, temporal gyrus, cingulate gyrus, postcentral gyrus, precuneus, caudate, and hippocampus) are known to be implicated in ASD in prior neuroimaging literature. We also identified less well-known regions that may play unrecognized roles in ASD and be worth further investigation. Conclusions: Our research suggested that it is possible to identify neuroimaging biomarkers that can distinguish patients with ASD from healthy controls based on the more cost-effective sMRI images of the brain. We also demonstrated the potential of applying data-driven artificial intelligence technology in the clinical setting of neurological and psychiatric disorders, which usually harbor subtle anatomical changes in the brain that are often invisible to the human eye. ", doi="10.2196/15767", url="https://medinform.jmir.org/2020/5/e15767", url="http://www.ncbi.nlm.nih.gov/pubmed/32041690" } @Article{info:doi/10.2196/16854, author="Rawtaer, Iris and Mahendran, Rathi and Kua, Heok Ee and Tan, Pink Hwee and Tan, Xian Hwee and Lee, Tih-Shih and Ng, Pin Tze", title="Early Detection of Mild Cognitive Impairment With In-Home Sensors to Monitor Behavior Patterns in Community-Dwelling Senior Citizens in Singapore: Cross-Sectional Feasibility Study", journal="J Med Internet Res", year="2020", month="May", day="5", volume="22", number="5", pages="e16854", keywords="dementia", keywords="neurocognitive disorder", keywords="pattern recognition, automated/methods", keywords="internet of things", keywords="early diagnosis", abstract="Background: Dementia is a global epidemic and incurs substantial burden on the affected families and the health care system. A window of opportunity for intervention is the predementia stage known as mild cognitive impairment (MCI). Individuals often present to services late in the course of their disease and more needs to be done for early detection; sensor technology is a potential method for detection. Objective: The aim of this cross-sectional study was to establish the feasibility and acceptability of utilizing sensors in the homes of senior citizens to detect changes in behaviors unobtrusively. Methods: We recruited 59 community-dwelling seniors (aged >65 years who live alone) with and without MCI and observed them over the course of 2 months. The frequency of forgetfulness was monitored by tagging personal items and tracking missed doses of medication. Activities such as step count, time spent away from home, television use, sleep duration, and quality were tracked with passive infrared motion sensors, smart plugs, bed sensors, and a wearable activity band. Measures of cognition, depression, sleep, and social connectedness were also administered. Results: Of the 49 participants who completed the study, 28 had MCI and 21 had healthy cognition (HC). Frequencies of various sensor-derived behavior metrics were computed and compared between MCI and HC groups. MCI participants were less active than their HC counterparts and had more sleep interruptions per night. MCI participants had forgotten their medications more times per month compared with HC participants. The sensor system was acceptable to over 80\% (40/49) of study participants, with many requesting for permanent installation of the system. Conclusions: We demonstrated that it was both feasible and acceptable to set up these sensors in the community and unobtrusively collect data. Further studies evaluating such digital biomarkers in the homes in the community are needed to improve the ecological validity of sensor technology. We need to refine the system to yield more clinically impactful information. ", doi="10.2196/16854", url="https://www.jmir.org/2020/5/e16854", url="http://www.ncbi.nlm.nih.gov/pubmed/32369031" } @Article{info:doi/10.2196/17497, author="Morton, Emma and Hou, HJ Sharon and Fogarty, Oonagh and Murray, Greg and Barnes, Steven and Depp, Colin and and Michalak, Erin", title="A Web-Based Adaptation of the Quality of Life in Bipolar Disorder Questionnaire: Psychometric Evaluation Study", journal="JMIR Ment Health", year="2020", month="Apr", day="27", volume="7", number="4", pages="e17497", keywords="bipolar disorder", keywords="survey methodology", keywords="patient reported outcomes", keywords="psychometrics", keywords="questionnaire design", keywords="quality of life", keywords="validation studies", abstract="Background: Quality of life (QoL) is considered a key treatment outcome in bipolar disorder (BD) across research, clinical, and self-management contexts. Web-based assessment of patient-reported outcomes offer numerous pragmatic benefits but require validation to ensure measurement equivalency. A web-based version of the Quality of Life in Bipolar Disorder (QoL.BD) questionnaire was developed (QoL Tool). Objective: This study aimed to evaluate the psychometric properties of a web-based QoL self-report questionnaire for BD (QoL Tool). Key aims were to (1) characterize the QoL of the sample using the QoL Tool, (2) evaluate the internal consistency of the web-based measure, and (3) determine whether the factor structure of the original version of the QoL.BD instrument was replicated in the web-based instrument. Methods: Community-based participatory research methods were used to inform the development of a web-based adaptation of the QoL.BD instrument. Individuals with BD who registered for an account with the QoL Tool were able to opt in to sharing their data for research purposes. The distribution of scores and internal consistency estimates, as indicated by Cronbach alpha, were inspected. An exploratory factor analysis using maximum likelihood and oblique rotation was conducted. Inspection of the scree plot, eigenvalues, and minimum average partial correlation were used to determine the optimal factor structure to extract. Results: A total of 498 people with BD (349/498, 70.1\% female; mean age 39.64, SD 12.54 years; 181/498, 36.3\% BD type I; 195/498, 39.2\% BD type II) consented to sharing their QoL Tool data for the present study. Mean scores across the 14 QoL Tool domains were, in general, significantly lower than that of the original QoL.BD validation sample. Reliability estimates for QoL Tool domains were comparable with that observed for the QoL.BD instrument (Cronbach alpha=.70-.93). Exploratory factor analysis supported the extraction of an 11-factor model, with item loadings consistent with the factor structure suggested by the original study. Findings for the sleep and physical domains differed from the original study, with this analysis suggesting one shared latent construct. Conclusions: The psychometric properties of the web-based QoL Tool are largely concordant with the original pen-and-paper QoL.BD, although some minor differences in the structure of the sleep and physical domains were observed. Despite this small variation from the factor structure identified in the QoL.BD instrument, the latent factor structure of the QoL Tool largely reproduced the original findings and theoretical structure of QoL areas relevant to people with BD. These findings underscore the research and clinical utility of this instrument, but further comparison of the psychometric properties of the QoL Tool relative to the QoL.BD instrument is warranted. Future adaptations of the QoL Tool, including the production of an app-based version of the QoL Tool, are also discussed. ", doi="10.2196/17497", url="http://mental.jmir.org/2020/4/e17497/", url="http://www.ncbi.nlm.nih.gov/pubmed/32338620" } @Article{info:doi/10.2196/16142, author="Lancaster, Claire and Koychev, Ivan and Blane, Jasmine and Chinner, Amy and Wolters, Leona and Hinds, Chris", title="Evaluating the Feasibility of Frequent Cognitive Assessment Using the Mezurio Smartphone App: Observational and Interview Study in Adults With Elevated Dementia Risk", journal="JMIR Mhealth Uhealth", year="2020", month="Apr", day="2", volume="8", number="4", pages="e16142", keywords="technology assessment", keywords="cognition", keywords="smartphone", keywords="mhealth", keywords="mobile phone", keywords="Alzheimer disease", keywords="early diagnosis", keywords="feasibility study", keywords="ecological momentary assessment", abstract="Background: By enabling frequent, sensitive, and economic remote assessment, smartphones will facilitate the detection of early cognitive decline at scale. Previous studies have sustained participant engagement with remote cognitive assessment over a week; extending this to a period of 1 month clearly provides a greater opportunity for measurement. However, as study durations are increased, the need to understand how participant burden and scientific value might be optimally balanced also increases. Objective: This study explored the little but often approach to assessment employed by the Mezurio app when prompting participants to interact every day for over a month. Specifically, this study aimed to understand whether this extended duration of remote study is feasible, and which factors promote sustained participant engagement over such periods. Methods: A total of 35 adults (aged 40-59 years) with no diagnosis of cognitive impairment were prompted to interact with the Mezurio smartphone app platform for up to 36 days, completing short, daily episodic memory tasks in addition to optional executive function and language tests. A subset (n=20) of participants completed semistructured interviews focused on their experience of using the app. Results: Participants complied with 80\% of the daily learning tasks scheduled for subsequent tests of episodic memory, with 88\% of participants still actively engaged by the final task. A thematic analysis of the participants' experiences highlighted schedule flexibility, a clear user interface, and performance feedback as important considerations for engagement with remote digital assessment. Conclusions: Despite the extended study duration, participants demonstrated high compliance with the schedule of daily learning tasks and were extremely positive about their experiences. Long durations of remote digital interaction are therefore definitely feasible but only when careful attention is paid to the design of the users' experience. ", doi="10.2196/16142", url="https://mhealth.jmir.org/2020/4/e16142", url="http://www.ncbi.nlm.nih.gov/pubmed/32238339" } @Article{info:doi/10.2196/15553, author="Maulik, K. Pallab and Devarapalli, Siddhardha and Kallakuri, Sudha and Bhattacharya, Amritendu and Peiris, David and Patel, Anushka", title="The Systematic Medical Appraisal Referral and Treatment Mental Health Project: Quasi-Experimental Study to Evaluate a Technology-Enabled Mental Health Services Delivery Model Implemented in Rural India", journal="J Med Internet Res", year="2020", month="Feb", day="27", volume="22", number="2", pages="e15553", keywords="mental health services", keywords="mHealth", keywords="rural", keywords="India", keywords="mental disorders", keywords="primary health care", abstract="Background: Although around 10\% of Indians experience depression, anxiety, or alcohol use disorders, very few receive adequate mental health care, especially in rural communities. Stigma and limited availability of mental health services contribute to this treatment gap. The Systematic Medical Appraisal Referral and Treatment Mental Health project aimed to address this gap. Objective: This study aimed to evaluate the effectiveness of an intervention in increasing the use of mental health services and reducing depression and anxiety scores among individuals at high risk of common mental disorders. Methods: A before-after study was conducted from 2014 to 2019 in 12 villages in Andhra Pradesh, India. The intervention comprised a community antistigma campaign, with the training of lay village health workers and primary care doctors to identify and manage individuals with stress, depression, and suicide risk using an electronic clinical decision support system. Results: In total, 900 of 22,046 (4.08\%) adults screened by health workers had increased stress, depression, or suicide risk and were referred to a primary care doctor. At follow-up, 731 out of 900 (81.2\%) reported visiting the doctor for their mental health symptoms, compared with 3.3\% (30/900) at baseline (odds ratio 133.3, 95\% CI 89.0 to 199.7; P<.001). Mean depression and anxiety scores were significantly lower postintervention compared with baseline from 13.4 to 3.1 (P<.001) and from 12.9 to 1.9 (P<.001), respectively. Conclusions: The intervention was associated with a marked increase in service uptake and clinically important reductions in depression and anxiety symptom scores. This will be further evaluated in a large-scale cluster randomized controlled trial. ", doi="10.2196/15553", url="http://www.jmir.org/2020/2/e15553/", url="http://www.ncbi.nlm.nih.gov/pubmed/32130125" } @Article{info:doi/10.2196/12503, author="Mei, Guang and Xu, Weisheng and Li, Li and Zhao, Zhen and Li, Hao and Liu, Wenqing and Jiao, Yueming", title="The Role of Campus Data in Representing Depression Among College Students: Exploratory Research", journal="JMIR Ment Health", year="2020", month="Jan", day="27", volume="7", number="1", pages="e12503", keywords="depression", keywords="mental health", keywords="behavior analysis", abstract="Background: Depression is a predominant feature of many psychological problems leading to extreme behaviors and, in some cases, suicide. Campus information systems keep detailed and reliable student behavioral data; however, whether these data can reflect depression and we know the differences in behavior between depressive and nondepressive students are still research problems. Objective: The purpose of this paper is to investigate the behavioral patterns of depressed students by using multisource campus data and exploring the link between behavioral preferences and depressive symptoms. The campus data described in this paper include basic personal information, academic performance, poverty subsidy, consumption habit, daily routine, library behavior, and meal habit, totaling 121 features. Methods: To identify potentially depressive students, we developed an online questionnaire system based on a standard psychometric instrument, the Zung Self-Rating Depression Scale (SDS). To explore the differences in behavior of depressive and nondepressive students, the Mann-Whitney U test was applied. In order to investigate the behavioral features of different depressive symptoms, factor analysis was used to divide the questionnaire items into different symptom groups and then correlation analysis was employed to study the extrinsic characteristics of each depressive symptom. Results: The correlation between these factors and the features were computed. The results indicated that there were 25 features correlated with either 4 factors or SDS score. The statistical results indicated that depressive students were more likely to fail exams, have poor meal habits, have increased night activities and decreased morning activities, and engage less in social activities (eg, avoiding meal times with friends). Correlation analysis showed that the somatic factor 2 (F4) was negatively correlated with the number of library visits (r=--.179, P<.001), and, compared with other factors, had the greatest impact on students' daily schedule, eating and social habits. The biggest influencing factor to poor academic performance was cognitive factor F1, and its score was found to be significantly positively correlated with fail rate (r=.185, P=.02). Conclusions: The results presented in this study indicate that campus data can reflect depression and its symptoms. By collecting a large amount of questionnaire data and combining machine learning algorithms, it is possible to realize an identification method of depression and depressive symptoms based on campus data. ", doi="10.2196/12503", url="http://mental.jmir.org/2020/1/e12503/", url="http://www.ncbi.nlm.nih.gov/pubmed/32012070" } @Article{info:doi/10.2196/14108, author="Moon, Jae Sun and Hwang, Jinseub and Kana, Rajesh and Torous, John and Kim, Won Jung", title="Accuracy of Machine Learning Algorithms for the Diagnosis of Autism Spectrum Disorder: Systematic Review and Meta-Analysis of Brain Magnetic Resonance Imaging Studies", journal="JMIR Ment Health", year="2019", month="Dec", day="20", volume="6", number="12", pages="e14108", keywords="autism spectrum disorder", keywords="machine learning", keywords="sensitivity and specificity", keywords="systematic review", keywords="meta-analysis", abstract="Background: In the recent years, machine learning algorithms have been more widely and increasingly applied in biomedical fields. In particular, their application has been drawing more attention in the field of psychiatry, for instance, as diagnostic tests/tools for autism spectrum disorder (ASD). However, given their complexity and potential clinical implications, there is an ongoing need for further research on their accuracy. Objective: This study aimed to perform a systematic review and meta-analysis to summarize the available evidence for the accuracy of machine learning algorithms in diagnosing ASD. Methods: The following databases were searched on November 28, 2018: MEDLINE, EMBASE, CINAHL Complete (with Open Dissertations), PsycINFO, and Institute of Electrical and Electronics Engineers Xplore Digital Library. Studies that used a machine learning algorithm partially or fully for distinguishing individuals with ASD from control subjects and provided accuracy measures were included in our analysis. The bivariate random effects model was applied to the pooled data in a meta-analysis. A subgroup analysis was used to investigate and resolve the source of heterogeneity between studies. True-positive, false-positive, false-negative, and true-negative values from individual studies were used to calculate the pooled sensitivity and specificity values, draw Summary Receiver Operating Characteristics curves, and obtain the area under the curve (AUC) and partial AUC (pAUC). Results: A total of 43 studies were included for the final analysis, of which a meta-analysis was performed on 40 studies (53 samples with 12,128 participants). A structural magnetic resonance imaging (sMRI) subgroup meta-analysis (12 samples with 1776 participants) showed a sensitivity of 0.83 (95\% CI 0.76-0.89), a specificity of 0.84 (95\% CI 0.74-0.91), and AUC/pAUC of 0.90/0.83. A functional magnetic resonance imaging/deep neural network subgroup meta-analysis (5 samples with 1345 participants) showed a sensitivity of 0.69 (95\% CI 0.62-0.75), specificity of 0.66 (95\% CI 0.61-0.70), and AUC/pAUC of 0.71/0.67. Conclusions: The accuracy of machine learning algorithms for diagnosis of ASD was considered acceptable by few accuracy measures only in cases of sMRI use; however, given the many limitations indicated in our study, further well-designed studies are warranted to extend the potential use of machine learning algorithms to clinical settings. Trial Registration: PROSPERO CRD42018117779; https://www.crd.york.ac.uk/prospero/display\_record.php?RecordID=117779 ", doi="10.2196/14108", url="https://mental.jmir.org/2019/12/e14108", url="http://www.ncbi.nlm.nih.gov/pubmed/31562756" } @Article{info:doi/10.2196/11643, author="Ferreri, Florian and Bourla, Alexis and Peretti, Charles-Siegfried and Segawa, Tomoyuki and Jaafari, Nemat and Mouchabac, St{\'e}phane", title="How New Technologies Can Improve Prediction, Assessment, and Intervention in Obsessive-Compulsive Disorder (e-OCD): Review", journal="JMIR Ment Health", year="2019", month="Dec", day="10", volume="6", number="12", pages="e11643", keywords="obsessive-compulsive disorder", keywords="ecological momentary assessment", keywords="biofeedback", keywords="digital biomarkers", keywords="digital phenotyping", keywords="mobile health", keywords="virtual reality", keywords="machine learning", abstract="Background: New technologies are set to profoundly change the way we understand and manage psychiatric disorders, including obsessive-compulsive disorder (OCD). Developments in imaging and biomarkers, along with medical informatics, may well allow for better assessments and interventions in the future. Recent advances in the concept of digital phenotype, which involves using computerized measurement tools to capture the characteristics of a given psychiatric disorder, is one paradigmatic example. Objective: The impact of new technologies on health professionals' practice in OCD care remains to be determined. Recent developments could disrupt not just their clinical practices, but also their beliefs, ethics, and representations, even going so far as to question their professional culture. This study aimed to conduct an extensive review of new technologies in OCD. Methods: We conducted the review by looking for titles in the PubMed database up to December 2017 that contained the following terms: [Obsessive] AND [Smartphone] OR [phone] OR [Internet] OR [Device] OR [Wearable] OR [Mobile] OR [Machine learning] OR [Artificial] OR [Biofeedback] OR [Neurofeedback] OR [Momentary] OR [Computerized] OR [Heart rate variability] OR [actigraphy] OR [actimetry] OR [digital] OR [virtual reality] OR [Tele] OR [video]. Results: We analyzed 364 articles, of which 62 were included. Our review was divided into 3 parts: prediction, assessment (including diagnosis, screening, and monitoring), and intervention. Conclusions: The review showed that the place of connected objects, machine learning, and remote monitoring has yet to be defined in OCD. Smartphone assessment apps and the Web Screening Questionnaire demonstrated good sensitivity and adequate specificity for detecting OCD symptoms when compared with a full-length structured clinical interview. The ecological momentary assessment procedure may also represent a worthy addition to the current suite of assessment tools. In the field of intervention, CBT supported by smartphone, internet, or computer may not be more effective than that delivered by a qualified practitioner, but it is easy to use, well accepted by patients, reproducible, and cost-effective. Finally, new technologies are enabling the development of new therapies, including biofeedback and virtual reality, which focus on the learning of coping skills. For them to be used, these tools must be properly explained and tailored to individual physician and patient profiles. ", doi="10.2196/11643", url="https://mental.jmir.org/2019/12/e11643", url="http://www.ncbi.nlm.nih.gov/pubmed/31821153" } @Article{info:doi/10.2196/13911, author="Thabrew, Hiran and D'Silva, Simona and Darragh, Margot and Goldfinch, Mary and Meads, Jake and Goodyear-Smith, Felicity", title="Comparison of YouthCHAT, an Electronic Composite Psychosocial Screener, With a Clinician Interview Assessment for Young People: Randomized Trial", journal="J Med Internet Res", year="2019", month="Dec", day="3", volume="21", number="12", pages="e13911", keywords="mass screening", keywords="adolescents", keywords="anxiety", keywords="depression", keywords="substance-related disorders", keywords="primary health care", keywords="school health services", keywords="eHealth", abstract="Background: Psychosocial problems such as depression, anxiety, and substance abuse are common and burdensome in young people. In New Zealand, screening for such problems is undertaken routinely only with year 9 students in low-decile schools and opportunistically in pediatric settings using a nonvalidated and time-consuming clinician-administered Home, Education, Eating, Activities, Drugs and Alcohol, Sexuality, Suicide and Depression, Safety (HEEADSSS) interview. The Youth version, Case-finding and Help Assessment Tool (YouthCHAT) is a relatively new, locally developed, electronic tablet--based composite screener for identifying similar psychosocial issues to HEEADSSS Objective: This study aimed to compare the performance and acceptability of YouthCHAT with face-to-face HEEADSSS assessment among 13-year-old high school students. Methods: A counterbalanced randomized trial of YouthCHAT screening either before or after face-to-face HEEADSSS assessment was undertaken with 129 13-year-old New Zealand high school students of predominantly M?ori and Pacific Island ethnicity. Main outcome measures were comparability of YouthCHAT and HEEADSSS completion times, detection rates, and acceptability to students and school nurses. Results: YouthCHAT screening was more than twice as fast as HEEADSSS assessment (mean 8.57 min vs mean 17.22 min; mean difference 8 min 25 seconds [range 6 min 20 seconds to 11 min 10 seconds]; P<.01) and detected more issues overall on comparable domains. For substance misuse and problems at home, both instruments were roughly comparable. YouthCHAT detected significantly more problems with eating or body image perception (70/110, 63.6\% vs 25/110, 22.7\%; P<.01), sexual health (24/110, 21.8\% vs 10/110, 9.1\%; P=.01), safety (65/110, 59.1\% vs 17/110, 15.5\%; P<.01), and physical inactivity (43/110, 39.1\% vs 21/110, 19.1\%; P<.01). HEEADSSS had a greater rate of detection for a broader set of mental health issues (30/110, 27\%) than YouthCHAT (11/110, 10\%; P=.001), which only assessed clinically relevant anxiety and depression. Assessment order made no significant difference to the duration of assessment or to the rates of YouthCHAT-detected positive screens for anxiety and depression. There were no significant differences in student acceptability survey results between the two assessments. Nurses identified that students found YouthCHAT easy to answer and that it helped students answer face-to-face questions, especially those of a sensitive nature. Difficulties encountered with YouthCHAT included occasional Wi-Fi connectivity and student literacy issues. Conclusions: This study provides preliminary evidence regarding the shorter administration time, detection rates, and acceptability of YouthCHAT as a school-based psychosocial screener for young people. Although further research is needed to confirm its effectiveness in other age and ethnic groups, YouthCHAT shows promise for aiding earlier identification and treatment of common psychosocial problems in young people, including possible use as part of an annual, school-based, holistic health check. Trial Registration: Australian New Zealand Clinical Trials Network Registry (ACTRN) ACTRN12616001243404p; https://www.anzctr.org.au/Trial/Registration/TrialReview.aspx?id=371422. ", doi="10.2196/13911", url="https://www.jmir.org/2019/12/e13911", url="http://www.ncbi.nlm.nih.gov/pubmed/31793890" } @Article{info:doi/10.2196/12942, author="Ford, Elizabeth and Curlewis, Keegan and Wongkoblap, Akkapon and Curcin, Vasa", title="Public Opinions on Using Social Media Content to Identify Users With Depression and Target Mental Health Care Advertising: Mixed Methods Survey", journal="JMIR Ment Health", year="2019", month="Nov", day="13", volume="6", number="11", pages="e12942", keywords="social media", keywords="depression", keywords="mental health", keywords="machine learning", keywords="public opinion", keywords="social license", keywords="survey", abstract="Background: Depression is a common disorder that still remains underdiagnosed and undertreated in the UK National Health Service. Charities and voluntary organizations offer mental health services, but they are still struggling to promote these services to the individuals who need them. By analyzing social media (SM) content using machine learning techniques, it may be possible to identify which SM users are currently experiencing low mood, thus enabling the targeted advertising of mental health services to the individuals who would benefit from them. Objective: This study aimed to understand SM users' opinions of analysis of SM content for depression and targeted advertising on SM for mental health services. Methods: A Web-based, mixed methods, cross-sectional survey was administered to SM users aged 16 years or older within the United Kingdom. It asked participants about their demographics, their usage of SM, and their history of depression and presented structured and open-ended questions on views of SM content being analyzed for depression and views on receiving targeted advertising for mental health services. Results: A total of 183 participants completed the survey, and 114 (62.3\%) of them had previously experienced depression. Participants indicated that they posted less during low moods, and they believed that their SM content would not reflect their depression. They could see the possible benefits of identifying depression from SM content but did not believe that the risks to privacy outweighed these benefits. A majority of the participants would not provide consent for such analysis to be conducted on their data and considered it to be intrusive and exposing. Conclusions: In a climate of distrust of SM platforms' usage of personal data, participants in this survey did not perceive that the benefits of targeting advertisements for mental health services to individuals analyzed as having depression would outweigh the risks to privacy. Future work in this area should proceed with caution and should engage stakeholders at all stages to maximize the transparency and trustworthiness of such research endeavors. ", doi="10.2196/12942", url="http://mental.jmir.org/2019/11/e12942/", url="http://www.ncbi.nlm.nih.gov/pubmed/31719022" } @Article{info:doi/10.2196/13863, author="Jungmann, Maria Stefanie and Klan, Timo and Kuhn, Sebastian and Jungmann, Florian", title="Accuracy of a Chatbot (Ada) in the Diagnosis of Mental Disorders: Comparative Case Study With Lay and Expert Users", journal="JMIR Form Res", year="2019", month="Oct", day="29", volume="3", number="4", pages="e13863", keywords="artificial intelligence", keywords="eHealth", keywords="mental disorders", keywords="mHealth", keywords="screening", keywords="(mobile) app", keywords="diagnostic", abstract="Background: Health apps for the screening and diagnosis of mental disorders have emerged in recent years on various levels (eg, patients, practitioners, and public health system). However, the diagnostic quality of these apps has not been (sufficiently) tested so far. Objective: The objective of this pilot study was to investigate the diagnostic quality of a health app for a broad spectrum of mental disorders and its dependency on expert knowledge. Methods: Two psychotherapists, two psychology students, and two laypersons each read 20 case vignettes with a broad spectrum of mental disorders. They used a health app (Ada---Your Health Guide) to get a diagnosis by entering the symptoms. Interrater reliabilities were computed between the diagnoses of the case vignettes and the results of the app for each user group. Results: Overall, there was a moderate diagnostic agreement (kappa=0.64) between the results of the app and the case vignettes for mental disorders in adulthood and a low diagnostic agreement (kappa=0.40) for mental disorders in childhood and adolescence. When psychotherapists applied the app, there was a good diagnostic agreement (kappa=0.78) regarding mental disorders in adulthood. The diagnostic agreement was moderate (kappa=0.55/0.60) for students and laypersons. For mental disorders in childhood and adolescence, a moderate diagnostic quality was found when psychotherapists (kappa=0.53) and students (kappa=0.41) used the app, whereas the quality was low for laypersons (kappa=0.29). On average, the app required 34 questions to be answered and 7 min to complete. Conclusions: The health app investigated here can represent an efficient diagnostic screening or help function for mental disorders in adulthood and has the potential to support especially diagnosticians in their work in various ways. The results of this pilot study provide a first indication that the diagnostic accuracy is user dependent and improvements in the app are needed especially for mental disorders in childhood and adolescence. ", doi="10.2196/13863", url="http://formative.jmir.org/2019/4/e13863/", url="http://www.ncbi.nlm.nih.gov/pubmed/31663858" } @Article{info:doi/10.2196/13610, author="Jin, Haomiao and Wu, Shinyi", title="Use of Patient-Reported Data to Match Depression Screening Intervals With Depression Risk Profiles in Primary Care Patients With Diabetes: Development and Validation of Prediction Models for Major Depression", journal="JMIR Form Res", year="2019", month="Oct", day="1", volume="3", number="4", pages="e13610", keywords="patient-reported data", keywords="patient-centered decision making", keywords="depression screening", keywords="depression", keywords="diabetes", keywords="health information technology", keywords="data analytics", keywords="predictive modeling", keywords="machine learning", keywords="data mining", abstract="Background: Clinical guidelines recommend screening for depression in the general adult population but recognizes that the optimum interval for screening is unknown. Ideal screening intervals should match the patient risk profiles. Objective: This study describes a predictive analytics approach for mining clinical and patient-reported data from a large clinical study for the identification of primary care patients at high risk for depression to match depression screening intervals with patient risk profiles. Methods: This paper analyzed data from a large safety-net primary care study for diabetes and depression. A regression-based data mining technique was used to examine 53 demographics, clinical variables, and patient-reported variables to develop three prediction models for major depression at 6, 12, and 18 months from baseline. Predictors with the strongest predictive power that require low information collection efforts were selected to develop the prediction models. Predictive accuracy was measured by the area under the receiver operating curve (AUROC) and was evaluated by 10-fold cross-validation. The effectiveness of the prediction algorithms in supporting clinical decision making for six ``typical'' types of patients was demonstrated. Results: The analysis included 923 patients who were nondepressed at the study baseline. Five patient-reported variables were selected in the prediction models to predict major depression at 6, 12, and 18 months: (1) Patient Health Questionnaire 2-item score; (2) the Sheehan Disability Scale; (3) previous problems with depression; (4) the diabetes symptoms scale; and (5) emotional burden of diabetes. All three depression prediction models had an AUROC>0.80, comparable with published depression prediction studies. Among the 6 ``typical'' types of patients, the algorithms suggest that patients who reported impaired daily functioning by health status are at an elevated risk for depression in all three periods. Conclusions: This study demonstrated that leveraging patient-reported data and prediction models can help improve identification of high-risk patients and clinical decisions about the depression screening interval for diabetes patients. Implementation of this approach can be coupled with application of modern technologies such as telehealth and mobile health assessment for collecting patient-reported data to improve privacy, reducing stigma and costs, and promoting a personalized depression screening that matches screening intervals with patient risk profiles. ", doi="10.2196/13610", url="https://formative.jmir.org/2019/4/e13610", url="http://www.ncbi.nlm.nih.gov/pubmed/31573900" } @Article{info:doi/10.2196/12051, author="Ramos, Munoz Roann and Cheng, Ferrer Paula Glenda and Jonas, Michael Stephan", title="Validation of an mHealth App for Depression Screening and Monitoring (Psychologist in a Pocket): Correlational Study and Concurrence Analysis", journal="JMIR Mhealth Uhealth", year="2019", month="Sep", day="16", volume="7", number="9", pages="e12051", keywords="mobile health", keywords="depression", keywords="validation", keywords="Psychologist in a Pocket", keywords="PiaP", abstract="Background: Mobile health (mHealth) is a fast-growing professional sector. As of 2016, there were more than 259,000 mHealth apps available internationally. Although mHealth apps are growing in acceptance, relatively little attention and limited efforts have been invested to establish their scientific integrity through statistical validation. This paper presents the external validation of Psychologist in a Pocket (PiaP), an Android-based mental mHealth app which supports traditional approaches in depression screening and monitoring through the analysis of electronic text inputs in communication apps. Objective: The main objectives of the study were (1) to externally validate the construct of the depression lexicon of PiaP with standardized psychological paper-and-pencil tools and (2) to determine the comparability of PiaP, a new depression measure, with a psychological gold standard in identifying depression. Methods: College participants downloaded PiaP for a 2-week administration. Afterward, they were asked to complete 4 psychological depression instruments. Furthermore, 1-week and 2-week PiaP total scores (PTS) were correlated with (1) Beck Depression Index (BDI)-II and Center for Epidemiological Studies--Depression (CES-D) Scale for congruent construct validation, (2) Affect Balance Scale (ABS)--Negative Affect for convergent construct validation, and (3) Satisfaction With Life Scale (SWLS) and ABS--Positive Affect for divergent construct validation. In addition, concordance analysis between PiaP and BDI-II was performed. Results: On the basis of the Pearson product-moment correlation, significant positive correlations exist between (1) 1-week PTS and CES-D Scale, (2) 2-week PTS and BDI-II, and (3) PiaP 2-week PTS and SWLS. Concordance analysis (Bland-Altman plot and analysis) suggested that PiaP's approach to depression screening is comparable with the gold standard (BDI-II). Conclusions: The evaluation of mental health has historically relied on subjective measurements. With the integration of novel approaches using mobile technology (and, by extension, mHealth apps) in mental health care, the validation process becomes more compelling to ensure their accuracy and credibility. This study suggests that PiaP's approach to depression screening by analyzing electronic data is comparable with traditional and well-established depression instruments and can be used to augment the process of measuring depression symptoms. ", doi="10.2196/12051", url="https://mhealth.jmir.org/2019/9/e12051/", url="http://www.ncbi.nlm.nih.gov/pubmed/31538946" } @Article{info:doi/10.2196/14657, author="Chung, Kyungmi and Park, Young Jin and Joung, DaYoung and Jhung, Kyungun", title="Response Time as an Implicit Self-Schema Indicator for Depression Among Undergraduate Students: Preliminary Findings From a Mobile App--Based Depression Assessment", journal="JMIR Mhealth Uhealth", year="2019", month="Sep", day="13", volume="7", number="9", pages="e14657", keywords="depressive symptoms", keywords="response time", keywords="self-concept", keywords="mobile phone", keywords="mobile apps", keywords="diagnostic screening programs", keywords="self-assessment", keywords="treatment adherence", keywords="compliance", abstract="Background: Response times to depressive symptom items in a mobile-based depression screening instrument has potential as an implicit self-schema indicator for depression but has yet to be determined; the instrument was designed to readily record depressive symptoms experienced on a daily basis. In this study, the well-validated Korean version of the Center for Epidemiologic Studies Depression Scale-Revised (K-CESD-R) was adopted. Objective: The purpose of this study was to investigate the relationship between depression severity (ie, explicit measure: total K-CESD-R Mobile scores) and the latent trait of interest in schematic self-referent processing of depressive symptom items (ie, implicit measure: response times to items in the K-CESD-R Mobile scale). The purpose was to investigate this relationship among undergraduate students who had never been diagnosed with, but were at risk for, major depressive disorder (MDD) or comorbid MDD with other neurological or psychiatric disorders. Methods: A total of 70 participants---36 males (51\%) and 34 females (49\%)---aged 19-29 years (mean 22.66, SD 2.11), were asked to complete both mobile and standard K-CESD-R assessments via their own mobile phones. The mobile K-CESD-R sessions (binary scale: yes or no) were administered on a daily basis for 2 weeks. The standard K-CESD-R assessment (5-point scale) was administered on the final day of the 2-week study period; the assessment was delivered via text message, including a link to the survey, directly to participants' mobile phones. Results: A total of 5 participants were excluded from data analysis. The result of polynomial regression analysis showed that the relationship between total K-CESD-R Mobile scores and the reaction times to the depressive symptom items was better explained by a quadratic trend---F (2, 62)=21.16, P<.001, R2=.41---than by a linear trend---F (1, 63)=25.43, P<.001, R2=.29. It was further revealed that the K-CESD-R Mobile app had excellent internal consistency (Cronbach alpha=.94); at least moderate concurrent validity with other depression scales, such as the Korean version of the Quick Inventory for Depressive Symptomatology-Self Report ($\rho$=.38, P=.002) and the Patient Health Questionnaire-9 ($\rho$=.48, P<.001); a high adherence rate for all participants (65/70, 93\%); and a high follow-up rate for 10 participants whose mobile or standard K-CESD-R score was 13 or greater (8/10, 80\%). Conclusions: As hypothesized, based on a self-schema model for depression that represented both item and person characteristics, the inverted U-shaped relationship between the explicit and implicit self-schema measures for depression showed the potential of an organizational breakdown; this also showed the potential for a subsequent return to efficient processing of schema-consistent information along a continuum, ranging from nondepression through mild depression to severe depression. Further, it is expected that the updated K-CESD-R Mobile app can play an important role in encouraging people at risk for depression to seek professional follow-up for mental health care. ", doi="10.2196/14657", url="https://mhealth.jmir.org/2019/9/e14657/", url="http://www.ncbi.nlm.nih.gov/pubmed/31586362" } @Article{info:doi/10.2196/13887, author="Ijaz, Kiran and Ahmadpour, Naseem and Naismith, L. Sharon and Calvo, A. Rafael", title="An Immersive Virtual Reality Platform for Assessing Spatial Navigation Memory in Predementia Screening: Feasibility and Usability Study", journal="JMIR Ment Health", year="2019", month="Sep", day="03", volume="6", number="9", pages="e13887", keywords="virtual reality", keywords="healthy aging", keywords="memory", keywords="cognition", keywords="dementia", abstract="Background: Traditional methods for assessing memory are expensive and have high administrative costs. Memory assessment is important for establishing cognitive impairment in cases such as detecting dementia in older adults. Virtual reality (VR) technology can assist in establishing better quality outcome in such crucial screening by supporting the well-being of individuals and offering them an engaging, cognitively challenging task that is not stressful. However, unmet user needs can compromise the validity of the outcome. Therefore, screening technology for older adults must address their specific design and usability requirements. Objective: This study aimed to design and evaluate the feasibility of an immersive VR platform to assess spatial navigation memory in older adults and establish its compatibility by comparing the outcome to a standard screening platform on a personal computer (PC). Methods: VR-CogAssess is a platform integrating an Oculus Rift head-mounted display and immersive photorealistic imagery. In a pilot study with healthy older adults (N=42; mean age 73.22 years, SD 9.26), a landmark recall test was conducted, and assessment on the VR-CogAssess was compared against a standard PC (SPC) setup. Results: Results showed that participants in VR were significantly more engaged (P=.003), achieved higher landmark recall scores (P=.004), made less navigational mistakes (P=.04), and reported a higher level of presence (P=.002) than those in SPC setup. In addition, participants in VR indicated no significantly higher stress than SPC setup (P=.87). Conclusions: The study findings suggest immersive VR is feasible and compatible with SPC counterpart for spatial navigation memory assessment. The study provides a set of design guidelines for creating similar platforms in the future. ", doi="10.2196/13887", url="https://mental.jmir.org/2019/9/e13887/", url="http://www.ncbi.nlm.nih.gov/pubmed/31482851" } @Article{info:doi/10.2196/13271, author="Boyle, Anne Jacqueline and Willey, Suzanne and Blackmore, Rebecca and East, Christine and McBride, Jacqueline and Gray, Kylie and Melvin, Glenn and Fradkin, Rebecca and Ball, Natahl and Highet, Nicole and Gibson-Helm, Melanie", title="Improving Mental Health in Pregnancy for Refugee Women: Protocol for the Implementation and Evaluation of a Screening Program in Melbourne, Australia", journal="JMIR Res Protoc", year="2019", month="Aug", day="19", volume="8", number="8", pages="e13271", keywords="mental health", keywords="refugees", keywords="transients and migrants", keywords="pregnancy", keywords="prenatal care", keywords="mass screening", abstract="Background: Identifying mental health disorders in migrant and refugee women during pregnancy provides an opportunity for interventions that may benefit women and their families. Evidence suggests that perinatal mental health disorders impact mother-infant attachment at critical times, which can affect child development. Postnatal depression resulting in suicide is one of the leading causes of maternal mortality postpartum. Routine screening of perinatal mental health is recommended to improve the identification of depression and anxiety and to facilitate early management. However, screening is poorly implemented into routine practice. This study is the first to investigate routine screening for perinatal mental health in a maternity setting designed for refugee women. This study will determine whether symptoms of depression and anxiety are more likely to be detected by the screening program compared with routine care and will evaluate the screening program's feasibility and acceptability to women and health care providers (HCPs). Objective: The objectives of this study are (1) to assess if refugee women are more likely to screen risk-positive for depression and anxiety than nonrefugee women, using the Edinburgh Postnatal Depression Scale (EPDS); (2) to assess if screening in pregnancy using the EPDS enables better detection of symptoms of depression and anxiety in refugee women than current routine care; (3) to determine if a screening program for perinatal mental health in a maternity setting designed for refugee women is acceptable to women; and (4) to evaluate the feasibility and acceptability of the perinatal mental health screening program from the perspective of HCPs (including the barriers and enablers to implementation). Methods: This study uses an internationally recommended screening measure, the EPDS, and a locally developed psychosocial questionnaire, both administered in early pregnancy and again in the third trimester. These measures have been translated into the most common languages used by the women attending the clinic and are administered via an electronic platform (iCOPE). This platform automatically calculates the EPDS score and generates reports for the HCP and woman. A total of 119 refugee women and 155 nonrefugee women have been recruited to evaluate the screening program's ability to detect depression and anxiety symptoms and will be compared with 34 refugee women receiving routine care. A subsample of women will participate in a qualitative assessment of the screening program's acceptability and feasibility. Health service staff have been recruited to evaluate the integration of screening into maternity care. Results: The recruitment is complete, and data collection and analysis are underway. Conclusions: It is anticipated that screening will increase the identification and management of depression and anxiety symptoms in pregnancy. New information will be generated on how to implement such a program in feasible and acceptable ways that will improve health outcomes for refugee women. International Registered Report Identifier (IRRID): DERR1-10.2196/13271 ", doi="10.2196/13271", url="http://www.researchprotocols.org/2019/8/e13271/", url="http://www.ncbi.nlm.nih.gov/pubmed/31429411" } @Article{info:doi/10.2196/14821, author="Chua, Loon Sean Ing and Tan, Chuan Ngiap and Wong, Teen Wei and Allen Jr, Carson John and Quah, Min Joanne Hui and Malhotra, Rahul and {\O}stbye, Truls", title="Virtual Reality for Screening of Cognitive Function in Older Persons: Comparative Study", journal="J Med Internet Res", year="2019", month="Aug", day="01", volume="21", number="8", pages="e14821", keywords="virtual reality", keywords="feasibility studies", keywords="mental status and dementia tests", keywords="technology", keywords="video games", keywords="dementia", keywords="cognitive dysfunction", abstract="Background: The prevalence of dementia, which presents as cognitive decline in one or more cognitive domains affecting function, is increasing worldwide. Traditional cognitive screening tools for dementia have their limitations, with emphasis on memory and, to a lesser extent, on the cognitive domain of executive function. The use of virtual reality (VR) in screening for cognitive function in older persons is promising, but evidence for its use is sparse. Objective: The primary aim was to examine the feasibility and acceptability of using VR to screen for cognitive impairment in older persons in a primary care setting. The secondary aim was to assess the module's ability to discriminate between cognitively intact and cognitively impaired participants. Methods: A comparative study was conducted at a public primary care clinic in Singapore, where persons aged 65-85 years were recruited based on a cut-off score of 26 on the Montreal Cognitive Assessment (MoCA) scale. They participated in a VR module for assessment of their learning and memory, perceptual-motor function, and executive function. Each participant was evaluated by the total performance score (range: 0-700) upon completion of the study. A questionnaire was also administered to assess their perception of and attitude toward VR. Results: A total of 37 participants in Group 1 (cognitively intact; MoCA score?26) and 23 participants in Group 2 (cognitively impaired; MoCA score<26) were assessed. The mean time to completion of the study was 19.1 (SD 3.6) minutes in Group 1 and 20.4 (3.4) minutes in Group 2. Mean feedback scores ranged from 3.80 to 4.48 (max=5) in favor of VR. The total performance score in Group 1 (552.0, SD 57.2) was higher than that in Group 2 (476.1, SD 61.9; P<.001) and exhibited a moderate positive correlation with scores from other cognitive screening tools: Abbreviated Mental Test (0.312), Mini-Mental State Examination (0.373), and MoCA (0.427). A receiver operating characteristic curve analysis for the relationship between the total performance score and the presence of cognitive impairment showed an area under curve of 0.821 (95\% CI 0.714-0.928). Conclusions: We demonstrated the feasibility of using a VR-based screening tool for cognitive function in older persons in primary care, who were largely in favor of this tool. ", doi="10.2196/14821", url="https://www.jmir.org/2019/8/e14821/", url="http://www.ncbi.nlm.nih.gov/pubmed/31373274" } @Article{info:doi/10.2196/13898, author="Hafiz, Pegah and Miskowiak, Woznica Kamilla and Kessing, Vedel Lars and Elleby Jespersen, Andreas and Obenhausen, Kia and Gulyas, Lorant and Zukowska, Katarzyna and Bardram, Eyvind Jakob", title="The Internet-Based Cognitive Assessment Tool: System Design and Feasibility Study", journal="JMIR Form Res", year="2019", month="Jul", day="26", volume="3", number="3", pages="e13898", keywords="screening", keywords="memory", keywords="executive function", keywords="bipolar disorder", keywords="depression", keywords="cognitive impairments", keywords="neuropsychological tests", keywords="computer software", keywords="speech recognition software", abstract="Background: Persistent cognitive impairment is prevalent in unipolar and bipolar disorders and is associated with decreased quality of life and psychosocial dysfunction. The screen for cognitive impairment in psychiatry (SCIP) test is a validated paper-and-pencil instrument for the assessment of cognition in affective disorders. However, there is no digital cognitive screening tool for the brief and accurate assessment of cognitive impairments in this patient group. Objective: In this paper, we present the design process and feasibility study of the internet-based cognitive assessment tool (ICAT) that is designed based on the cognitive tasks of the SCIP. The aims of this feasibility study were to perform the following tasks among healthy individuals: (1) evaluate the usability of the ICAT, (2) investigate the feasibility of the ICAT as a patient-administered cognitive assessment tool, and (3) examine the performance of automatic speech recognition (ASR) for the assessment of verbal recall. Methods: The ICAT was developed in a user-centered design process. The cognitive measures of the ICAT were immediate and delayed recall, working memory, and psychomotor speed. Usability and feasibility studies were conducted separately with 2 groups of healthy individuals (N=21 and N=19, respectively). ICAT tests were available in the English and Danish languages. The participants were asked to fill in the post study system usability questionnaire (PSSUQ) upon completing the ICAT test. Verbal recall in the ICAT was assessed using ASR, and the performance evaluation criterion was word error rate (WER). A Pearson 2-tailed correlation analysis significant at the .05 level was applied to investigate the association between the SCIP and ICAT scores. Results: The overall psychometric factors of PSSUQ for both studies gave scores above 4 (out of 5). The analysis of the feasibility study revealed a moderate to strong correlation between the total scores of the SCIP and ICAT (r=0.63; P=.009). There were also moderate to strong correlations between the SCIP and ICAT subtests for immediate verbal recall (r=0.67; P=.002) and psychomotor speed (r=0.71; P=.001). The associations between the respective subtests for working memory, executive function, and delayed recall, however, were not statistically significant. The corresponding WER for English and Danish responses were 17.8\% and 6.3\%, respectively. Conclusions: The ICAT is the first digital screening instrument modified from the SCIP using Web-based technology and ASR. There was good accuracy of the ASR for verbal memory assessment. The moderate correlation between the ICAT and SCIP scores suggests that the ICAT is a valid tool for assessing cognition, although this should be confirmed in a larger study with greater statistical power. Taken together, the ICAT seems to be a valid Web-based cognitive assessment tool that, after some minor modifications and further validation, may be used to screen for cognitive impairment in clinical settings. ", doi="10.2196/13898", url="http://formative.jmir.org/2019/3/e13898/", url="http://www.ncbi.nlm.nih.gov/pubmed/31350840" } @Article{info:doi/10.2196/13809, author="Kasthurirathne, N. Suranga and Biondich, G. Paul and Grannis, J. Shaun and Purkayastha, Saptarshi and Vest, R. Joshua and Jones, F. Josette", title="Identification of Patients in Need of Advanced Care for Depression Using Data Extracted From a Statewide Health Information Exchange: A Machine Learning Approach", journal="J Med Internet Res", year="2019", month="Jul", day="22", volume="21", number="7", pages="e13809", keywords="depression", keywords="supervised machine learning", keywords="delivery of health care", abstract="Background: As the most commonly occurring form of mental illness worldwide, depression poses significant health and economic burdens to both the individual and community. Different types of depression pose different levels of risk. Individuals who suffer from mild forms of depression may recover without any assistance or be effectively managed by primary care or family practitioners. However, other forms of depression are far more severe and require advanced care by certified mental health providers. However, identifying cases of depression that require advanced care may be challenging to primary care providers and health care team members whose skill sets run broad rather than deep. Objective: This study aimed to leverage a comprehensive range of patient-level diagnostic, behavioral, and demographic data, as well as past visit history data from a statewide health information exchange to build decision models capable of predicting the need of advanced care for depression across patients presenting at Eskenazi Health, the public safety net health system for Marion County, Indianapolis, Indiana. Methods: Patient-level diagnostic, behavioral, demographic, and past visit history data extracted from structured datasets were merged with outcome variables extracted from unstructured free-text datasets and were used to train random forest decision models that predicted the need of advanced care for depression across (1) the overall patient population and (2) various subsets of patients at higher risk for depression-related adverse events; patients with a past diagnosis of depression; patients with a Charlson comorbidity index of ?1; patients with a Charlson comorbidity index of ?2; and all unique patients identified across the 3 above-mentioned high-risk groups. Results: The overall patient population consisted of 84,317 adult (aged ?18 years) patients. A total of 6992 (8.29\%) of these patients were in need of advanced care for depression. Decision models for high-risk patient groups yielded area under the curve (AUC) scores between 86.31\% and 94.43\%. The decision model for the overall patient population yielded a comparatively lower AUC score of 78.87\%. The variance of optimal sensitivity and specificity for all decision models, as identified using Youden J Index, is as follows: sensitivity=68.79\% to 83.91\% and specificity=76.03\% to 92.18\%. Conclusions: This study demonstrates the ability to automate screening for patients in need of advanced care for depression across (1) an overall patient population or (2) various high-risk patient groups using structured datasets covering acute and chronic conditions, patient demographics, behaviors, and past visit history. Furthermore, these results show considerable potential to enable preventative care and can be easily integrated into existing clinical workflows to improve access to wraparound health care services. ", doi="10.2196/13809", url="http://www.jmir.org/2019/7/e13809/", url="http://www.ncbi.nlm.nih.gov/pubmed/31333196" } @Article{info:doi/10.2196/13946, author="Wshah, Safwan and Skalka, Christian and Price, Matthew", title="Predicting Posttraumatic Stress Disorder Risk: A Machine Learning Approach", journal="JMIR Ment Health", year="2019", month="Jul", day="22", volume="6", number="7", pages="e13946", keywords="PTSD", keywords="machine learning", keywords="predictive algorithms", abstract="Background: A majority of adults in the United States are exposed to a potentially traumatic event but only a handful go on to develop impairing mental health conditions such as posttraumatic stress disorder (PTSD). Objective: Identifying those at elevated risk shortly after trauma exposure is a clinical challenge. The aim of this study was to develop computational methods to more effectively identify at-risk patients and, thereby, support better early interventions. Methods: We proposed machine learning (ML) induction of models to automatically predict elevated PTSD symptoms in patients 1 month after a trauma, using self-reported symptoms from data collected via smartphones. Results: We show that an ensemble model accurately predicts elevated PTSD symptoms, with an area under the curve (AUC) of .85, using a bag of support vector machines, naive Bayes, logistic regression, and random forest algorithms. Furthermore, we show that only 7 self-reported items (features) are needed to obtain this AUC. Most importantly, we show that accurate predictions can be made 10 to 20 days posttrauma. Conclusions: These results suggest that simple smartphone-based patient surveys, coupled with automated analysis using ML-trained models, can identify those at risk for developing elevated PTSD symptoms and thus target them for early intervention. ", doi="10.2196/13946", url="http://mental.jmir.org/2019/7/e13946/", url="http://www.ncbi.nlm.nih.gov/pubmed/31333201" } @Article{info:doi/10.2196/13417, author="van Maurik, S. Ingrid and Visser, NC Leonie and Pel-Littel, E. Ruth and van Buchem, M. Marieke and Zwan, D. Marissa and Kunneman, Marleen and Pelkmans, Wiesje and Bouwman, H. Femke and Minkman, Mirella and Schoonenboom, Niki and Scheltens, Philip and Smets, MA Ellen and van der Flier, M. Wiesje", title="Development and Usability of ADappt: Web-Based Tool to Support Clinicians, Patients, and Caregivers in the Diagnosis of Mild Cognitive Impairment and Alzheimer Disease", journal="JMIR Form Res", year="2019", month="Jul", day="08", volume="3", number="3", pages="e13417", keywords="Alzheimer's disease", keywords="biomarkers", keywords="decision aids", keywords="mild cognitive impairment", keywords="precision medicine", keywords="risk", keywords="shared decision making", abstract="Background: As a result of advances in diagnostic testing in the field of Alzheimer disease (AD), patients are diagnosed in earlier stages of the disease, for example, in the stage of mild cognitive impairment (MCI). This poses novel challenges for a clinician during the diagnostic workup with regard to diagnostic testing itself, namely, which tests are to be performed, but also on how to engage patients in this decision and how to communicate test results. As a result, tools to support decision making and improve risk communication could be valuable for clinicians and patients. Objective: The aim of this study was to present the design, development, and testing of a Web-based tool for clinicians in a memory clinic setting and to ascertain whether this tool can (1) facilitate the interpretation of biomarker results in individual patients with MCI regarding their risk of progression to dementia, (2) support clinicians in communicating biomarker test results and risks to MCI patients and their caregivers, and (3) support clinicians in a process of shared decision making regarding the diagnostic workup of AD. Methods: A multiphase mixed-methods approach was used. Phase 1 consisted of a qualitative needs assessment among professionals, patients, and caregivers; phase 2, consisted of an iterative process of development and the design of the tool (ADappt); and phase 3 consisted of a quantitative and qualitative assessment of usability and acceptability of ADappt. Across these phases, co-creation was realized via a user-centered qualitative approach with clinicians, patients, and caregivers. Results: In phase 1, clinicians indicated the need for risk calculation tools and visual aids to communicate test results to patients. Patients and caregivers expressed their needs for more specific information on their risk for developing AD and related consequences. In phase 2, we developed the content and graphical design of ADappt encompassing 3 modules: a risk calculation tool, a risk communication tool including a summary sheet for patients and caregivers, and a conversation starter to support shared decision making regarding the diagnostic workup. In phase 3, ADappt was considered to be clear and user-friendly. Conclusions: Clinicians in a memory clinic setting can use ADappt, a Web-based tool, developed using multiphase design and co-creation, for support that includes an individually tailored interpretation of biomarker test results, communication of test results and risks to patients and their caregivers, and shared decision making on diagnostic testing. ", doi="10.2196/13417", url="https://formative.jmir.org/2019/3/e13417/", url="http://www.ncbi.nlm.nih.gov/pubmed/31287061" } @Article{info:doi/10.2196/12554, author="Cacheda, Fidel and Fernandez, Diego and Novoa, J. Francisco and Carneiro, Victor", title="Early Detection of Depression: Social Network Analysis and Random Forest Techniques", journal="J Med Internet Res", year="2019", month="Jun", day="10", volume="21", number="6", pages="e12554", keywords="depression", keywords="major depressive disorder", keywords="social media", keywords="artificial intelligence", keywords="machine learning", abstract="Background: Major depressive disorder (MDD) or depression is among the most prevalent psychiatric disorders, affecting more than 300 million people globally. Early detection is critical for rapid intervention, which can potentially reduce the escalation of the disorder. Objective: This study used data from social media networks to explore various methods of early detection of MDDs based on machine learning. We performed a thorough analysis of the dataset to characterize the subjects' behavior based on different aspects of their writings: textual spreading, time gap, and time span. Methods: We proposed 2 different approaches based on machine learning singleton and dual. The former uses 1 random forest (RF) classifier with 2 threshold functions, whereas the latter uses 2 independent RF classifiers, one to detect depressed subjects and another to identify nondepressed individuals. In both cases, features are defined from textual, semantic, and writing similarities. Results: The evaluation follows a time-aware approach that rewards early detections and penalizes late detections. The results show how a dual model performs significantly better than the singleton model and is able to improve current state-of-the-art detection models by more than 10\%. Conclusions: Given the results, we consider that this study can help in the development of new solutions to deal with the early detection of depression on social networks. ", doi="10.2196/12554", url="http://www.jmir.org/2019/6/e12554/", url="http://www.ncbi.nlm.nih.gov/pubmed/31199323" } @Article{info:doi/10.2196/11548, author="Karystianis, George and Adily, Armita and Schofield, Peter and Knight, Lee and Galdon, Clara and Greenberg, David and Jorm, Louisa and Nenadic, Goran and Butler, Tony", title="Automatic Extraction of Mental Health Disorders From Domestic Violence Police Narratives: Text Mining Study", journal="J Med Internet Res", year="2018", month="Sep", day="13", volume="20", number="9", pages="e11548", keywords="text mining", keywords="rule-based approach", keywords="police narratives", keywords="mental health disorders", keywords="domestic violence", abstract="Background: Vast numbers of domestic violence (DV) incidents are attended by the New South Wales Police Force each year in New South Wales and recorded as both structured quantitative data and unstructured free text in the WebCOPS (Web-based interface for the Computerised Operational Policing System) database regarding the details of the incident, the victim, and person of interest (POI). Although the structured data are used for reporting purposes, the free text remains untapped for DV reporting and surveillance purposes. Objective: In this paper, we explore whether text mining can automatically identify mental health disorders from this unstructured text. Methods: We used a training set of 200 DV recorded events to design a knowledge-driven approach based on lexical patterns in text suggesting mental health disorders for POIs and victims. Results: The precision returned from an evaluation set of 100 DV events was 97.5\% and 87.1\% for mental health disorders related to POIs and victims, respectively. After applying our approach to a large-scale corpus of almost a half million DV events, we identified 77,995 events (15.83\%) that mentioned mental health disorders, with 76.96\% (60,032/77,995) of those linked to POIs versus 16.47\% (12,852/77,995) for the victims and 6.55\% (5111/77,995) for both. Depression was the most common mental health disorder mentioned in both victims (22.25\%, 3269) and POIs (18.70\%, 8944), followed by alcohol abuse for POIs (12.19\%, 5829) and various anxiety disorders (eg, panic disorder, generalized anxiety disorder) for victims (11.66\%, 1714). Conclusions: The results suggest that text mining can automatically extract targeted information from police-recorded DV events to support further public health research into the nexus between mental health disorders and DV. ", doi="10.2196/11548", url="http://www.jmir.org/2018/9/e11548/", url="http://www.ncbi.nlm.nih.gov/pubmed/30213778" } @Article{info:doi/10.2196/jmir.9966, author="Ospina-Pinillos, Laura and Davenport, Tracey and Iorfino, Frank and Tickell, Ashleigh and Cross, Shane and Scott, M. Elizabeth and Hickie, B. Ian", title="Using New and Innovative Technologies to Assess Clinical Stage in Early Intervention Youth Mental Health Services: Evaluation Study", journal="J Med Internet Res", year="2018", month="Sep", day="10", volume="20", number="9", pages="e259", keywords="staging model", keywords="mental health", keywords="primary health care", keywords="telemedicine", keywords="symptom assessment health service reform", abstract="Background: Globally there is increasing recognition that new strategies are required to reduce disability due to common mental health problems. As 75\% of mental health and substance use disorders emerge during the teenage or early adulthood years, these strategies need to be readily accessible to young people. When considering how to provide such services at scale, new and innovative technologies show promise in augmenting traditional clinic-based services. Objective: The aim of this study was to test new and innovative technologies to assess clinical stage in early intervention youth mental health services using a prototypic online system known as the Mental Health eClinic (MHeC). Methods: The online assessment within the MHeC was compared directly against traditional clinician assessment within 2 Sydney-based youth-specific mental health services (headspace Camperdown and headspace Campbelltown). A total of 204 young people were recruited to the study. Eligible participants completed both face-to-face and online assessments, which were randomly allocated and counterbalanced at a 1-to-3 ratio. These assessments were (1) a traditional 45- to 60-minute headspace face-to-face assessment performed by a Youth Access Clinician and (2) an approximate 60-minute online assessment (including a self-report Web-based survey, immediate dashboard of results, and a video visit with a clinician). All assessments were completed within a 2-week timeframe from initial presentation. Results: Of the 72 participants who completed the study, 71\% (51/72) were female and the mean age was 20.4 years (aged 16 to 25 years); 68\% (49/72) of participants were recruited from headspace Camperdown and the remaining 32\% (23/72) from headspace Campbelltown. Interrater agreement of participants' stage, as determined after face-to-face assessment or online assessment, demonstrated fair agreement (kappa=.39, P<.001) with concordance in 68\% of cases (49/72). Among the discordant cases, those who were allocated to a higher stage by online raters were more likely to report a past history of mental health disorders (P=.001), previous suicide planning (P=.002), and current cannabis misuse (P=.03) compared to those allocated to a lower stage. Conclusions: The MHeC presents a new and innovative method for determining key clinical service parameters. It has the potential to be adapted to varied settings in which young people are connected with traditional clinical services and assist in providing the right care at the right time. ", doi="10.2196/jmir.9966", url="http://www.jmir.org/2018/9/e259/", url="http://www.ncbi.nlm.nih.gov/pubmed/30201602" } @Article{info:doi/10.2196/10104, author="Heraz, Alicia and Clynes, Manfred", title="Recognition of Emotions Conveyed by Touch Through Force-Sensitive Screens: Observational Study of Humans and Machine Learning Techniques", journal="JMIR Ment Health", year="2018", month="Aug", day="30", volume="5", number="3", pages="e10104", keywords="emotional artificial intelligence", keywords="human-computer interaction", keywords="smartphone", keywords="force-sensitive screens", keywords="mental health", keywords="positive computing", keywords="artificial intelligence", keywords="emotions", keywords="emotional intelligence", abstract="Background: Emotions affect our mental health: they influence our perception, alter our physical strength, and interfere with our reason. Emotions modulate our face, voice, and movements. When emotions are expressed through the voice or face, they are difficult to measure because cameras and microphones are not often used in real life in the same laboratory conditions where emotion detection algorithms perform well. With the increasing use of smartphones, the fact that we touch our phones, on average, thousands of times a day, and that emotions modulate our movements, we have an opportunity to explore emotional patterns in passive expressive touches and detect emotions, enabling us to empower smartphone apps with emotional intelligence. Objective: In this study, we asked 2 questions. (1) As emotions modulate our finger movements, will humans be able to recognize emotions by only looking at passive expressive touches? (2) Can we teach machines how to accurately recognize emotions from passive expressive touches? Methods: We were interested in 8 emotions: anger, awe, desire, fear, hate, grief, laughter, love (and no emotion). We conducted 2 experiments with 2 groups of participants: good imagers and emotionally aware participants formed group A, with the remainder forming group B. In the first experiment, we video recorded, for a few seconds, the expressive touches of group A, and we asked group B to guess the emotion of every expressive touch. In the second experiment, we trained group A to express every emotion on a force-sensitive smartphone. We then collected hundreds of thousands of their touches, and applied feature selection and machine learning techniques to detect emotions from the coordinates of participant' finger touches, amount of force, and skin area, all as functions of time. Results: We recruited 117 volunteers: 15 were good imagers and emotionally aware (group A); the other 102 participants formed group B. In the first experiment, group B was able to successfully recognize all emotions (and no emotion) with a high 83.8\% (769/918) accuracy: 49.0\% (50/102) of them were 100\% (450/450) correct and 25.5\% (26/102) were 77.8\% (182/234) correct. In the second experiment, we achieved a high 91.11\% (2110/2316) classification accuracy in detecting all emotions (and no emotion) from 9 spatiotemporal features of group A touches. Conclusions: Emotions modulate our touches on force-sensitive screens, and humans have a natural ability to recognize other people's emotions by watching prerecorded videos of their expressive touches. Machines can learn the same emotion recognition ability and do better than humans if they are allowed to continue learning on new data. It is possible to enable force-sensitive screens to recognize users' emotions and share this emotional insight with users, increasing users' emotional awareness and allowing researchers to design better technologies for well-being. ", doi="10.2196/10104", url="http://mental.jmir.org/2018/3/e10104/", url="http://www.ncbi.nlm.nih.gov/pubmed/30166276" } @Article{info:doi/10.2196/10130, author="Pratap, Abhishek and Renn, N. Brenna and Volponi, Joshua and Mooney, D. Sean and Gazzaley, Adam and Arean, A. Patricia and Anguera, A. Joaquin", title="Using Mobile Apps to Assess and Treat Depression in Hispanic and Latino Populations: Fully Remote Randomized Clinical Trial", journal="J Med Internet Res", year="2018", month="Aug", day="09", volume="20", number="8", pages="e10130", keywords="mobile apps", keywords="smartphone", keywords="depression", keywords="Hispanics", keywords="Latinos", keywords="clinical trial", keywords="cognition", keywords="problem solving", keywords="mHealth", keywords="minority groups", abstract="Background: Most people with mental health disorders fail to receive timely access to adequate care. US Hispanic/Latino individuals are particularly underrepresented in mental health care and are historically a very difficult population to recruit into clinical trials; however, they have increasing access to mobile technology, with over 75\% owning a smartphone. This technology has the potential to overcome known barriers to accessing and utilizing traditional assessment and treatment approaches. Objective: This study aimed to compare recruitment and engagement in a fully remote trial of individuals with depression who either self-identify as Hispanic/Latino or not. A secondary aim was to assess treatment outcomes in these individuals using three different self-guided mobile apps: iPST (based on evidence-based therapeutic principles from problem-solving therapy, PST), Project Evolution (EVO; a cognitive training app based on cognitive neuroscience principles), and health tips (a health information app that served as an information control). Methods: We recruited Spanish and English speaking participants through social media platforms, internet-based advertisements, and traditional fliers in select locations in each state across the United States. Assessment and self-guided treatment was conducted on each participant's smartphone or tablet. We enrolled 389 Hispanic/Latino and 637 non-Hispanic/Latino adults with mild to moderate depression as determined by Patient Health Questionnaire-9 (PHQ-9) score?5 or related functional impairment. Participants were first asked about their preferences among the three apps and then randomized to their top two choices. Outcomes were depressive symptom severity (measured using PHQ-9) and functional impairment (assessed with Sheehan Disability Scale), collected over 3 months. Engagement in the study was assessed based on the number of times participants completed active surveys. Results: We screened 4502 participants and enrolled 1040 participants from throughout the United States over 6 months, yielding a sample of 348 active users. Long-term engagement surfaced as a key issue among Hispanic/Latino participants, who dropped from the study 2 weeks earlier than their non-Hispanic/Latino counterparts (P<.02). No significant differences were observed for treatment outcomes between those identifying as Hispanic/Latino or not. Although depressive symptoms improved (beta=--2.66, P=.006) over the treatment course, outcomes did not vary by treatment app. Conclusions: Fully remote mobile-based studies can attract a diverse participant pool including people from traditionally underserved communities in mental health care and research (here, Hispanic/Latino individuals). However, keeping participants engaged in this type of ``low-touch'' research study remains challenging. Hispanic/Latino populations may be less willing to use mobile apps for assessing and managing depression. Future research endeavors should use a user-centered design to determine the role of mobile apps in the assessment and treatment of depression for this population, app features they would be interested in using, and strategies for long-term engagement. Trial Registration: Clinicaltrials.gov NCT01808976; https://clinicaltrials.gov/ct2/show/NCT01808976 (Archived by WebCite at http://www.webcitation.org/70xI3ILkz) ", doi="10.2196/10130", url="http://www.jmir.org/2018/8/e10130/", url="http://www.ncbi.nlm.nih.gov/pubmed/30093372" } @Article{info:doi/10.2196/11143, author="Bott, Nicholas and Madero, N. Erica and Glenn, Jordan and Lange, Alexander and Anderson, John and Newton, Doug and Brennan, Adam and Buffalo, A. Elizabeth and Rentz, Dorene and Zola, Stuart", title="Device-Embedded Cameras for Eye Tracking--Based Cognitive Assessment: Validation With Paper-Pencil and Computerized Cognitive Composites", journal="J Med Internet Res", year="2018", month="Jul", day="24", volume="20", number="7", pages="e11143", keywords="eye tracking", keywords="visual paired comparison", keywords="preclinical Alzheimer's disease", keywords="neuropsychological testing", abstract="Background: As eye tracking-based assessment of cognition becomes more widely used in older adults, particularly those at risk for dementia, reliable and scalable methods to collect high-quality data are required. Eye tracking-based cognitive tests that utilize device-embedded cameras have the potential to reach large numbers of people as a screening tool for preclinical cognitive decline. However, to fully validate this approach, more empirical evidence about the comparability of eyetracking-based paradigms to existing cognitive batteries is needed. Objective: Using a population of clinically normal older adults, we examined the relationship between a 30-minute Visual Paired Comparison (VPC) recognition memory task and cognitive composite indices sensitive to a subtle decline in domains associated with Alzheimer disease. Additionally, the scoring accuracy between software used with a commercial grade eye tracking camera at 60 frames per second (FPS) and a manually scored procedure used with a laptop-embedded web camera (3 FPS) on the VPC task was compared, as well as the relationship between VPC task performance and domain-specific cognitive function. Methods: A group of 49 clinically normal older adults completed a 30-min VPC recognition memory task with simultaneous recording of eye movements by a commercial-grade eye-tracking camera and a laptop-embedded camera. Relationships between webcam VPC performance and the Preclinical Alzheimer Cognitive Composite (PACC) and National Institutes of Health Toolbox Cognitive Battery (NIHTB-CB) were examined. Inter-rater reliability for manually scored tests was analyzed using Krippendorff's kappa formula, and we used Spearman's Rho correlations to investigate the relationship between VPC performance scores with both cameras. We also examined the relationship between VPC performance with the device-embedded camera and domain-specific cognitive performance. Results: Modest relationships were seen between mean VPC novelty preference and the PACC (r=.39, P=.007) and NIHTB-CB (r=.35, P=.03) composite scores, and additional individual neurocognitive task scores including letter fluency (r=.33, P=.02), category fluency (r=.36, P=.01), and Trail Making Test A (--.40, P=.006). Robust relationships were observed between the 60 FPS eye tracker and 3 FPS webcam on both trial-level VPC novelty preference (r=.82, P<.001) and overall mean VPC novelty preference (r=.92 P<.001). Inter-rater agreement of manually scored web camera data was high (kappa=.84). Conclusions: In a sample of clinically normal older adults, performance on a 30-minute VPC task correlated modestly with computerized and paper-pencil based cognitive composites that serve as preclinical Alzheimer disease cognitive indices. The strength of these relationships did not differ between camera devices. We suggest that using a device-embedded camera is a reliable and valid way to assess performance on VPC tasks accurately and that these tasks correlate with existing cognitive composites. ", doi="10.2196/11143", url="http://www.jmir.org/2018/7/e11143/", url="http://www.ncbi.nlm.nih.gov/pubmed/30042093" } @Article{info:doi/10.2196/jmir.9775, author="Zulueta, John and Piscitello, Andrea and Rasic, Mladen and Easter, Rebecca and Babu, Pallavi and Langenecker, A. Scott and McInnis, Melvin and Ajilore, Olusola and Nelson, C. Peter and Ryan, Kelly and Leow, Alex", title="Predicting Mood Disturbance Severity with Mobile Phone Keystroke Metadata: A BiAffect Digital Phenotyping Study", journal="J Med Internet Res", year="2018", month="Jul", day="20", volume="20", number="7", pages="e241", keywords="digital phenotype", keywords="mHealth", keywords="ecological momentary assessment", keywords="keystroke dynamics", keywords="bipolar disorder", keywords="depression", keywords="mania", keywords="mobile phone", abstract="Background: Mood disorders are common and associated with significant morbidity and mortality. Better tools are needed for their diagnosis and treatment. Deeper phenotypic understanding of these disorders is integral to the development of such tools. This study is the first effort to use passively collected mobile phone keyboard activity to build deep digital phenotypes of depression and mania. Objective: The objective of our study was to investigate the relationship between mobile phone keyboard activity and mood disturbance in subjects with bipolar disorders and to demonstrate the feasibility of using passively collected mobile phone keyboard metadata features to predict manic and depressive signs and symptoms as measured via clinician-administered rating scales. Methods: Using a within-subject design of 8 weeks, subjects were provided a mobile phone loaded with a customized keyboard that passively collected keystroke metadata. Subjects were administered the Hamilton Depression Rating Scale (HDRS) and Young Mania Rating Scale (YMRS) weekly. Linear mixed-effects models were created to predict HDRS and YMRS scores. The total number of keystrokes was 626,641, with a weekly average of 9791 (7861), and that of accelerometer readings was 6,660,890, with a weekly average 104,076 (68,912). Results: A statistically significant mixed-effects regression model for the prediction of HDRS-17 item scores was created: conditional R2=.63, P=.01. A mixed-effects regression model for YMRS scores showed the variance accounted for by random effect was zero, and so an ordinary least squares linear regression model was created: R2=.34, P=.001. Multiple significant variables were demonstrated for each measure. Conclusions: Mood states in bipolar disorder appear to correlate with specific changes in mobile phone usage. The creation of these models provides evidence for the feasibility of using passively collected keyboard metadata to detect and monitor mood disturbances. ", doi="10.2196/jmir.9775", url="http://www.jmir.org/2018/7/e241/", url="http://www.ncbi.nlm.nih.gov/pubmed/30030209" } @Article{info:doi/10.2196/10144, author="DelPozo-Banos, Marcos and John, Ann and Petkov, Nicolai and Berridge, Mark Damon and Southern, Kate and LLoyd, Keith and Jones, Caroline and Spencer, Sarah and Travieso, Manuel Carlos", title="Using Neural Networks with Routine Health Records to Identify Suicide Risk: Feasibility Study", journal="JMIR Ment Health", year="2018", month="Jun", day="22", volume="5", number="2", pages="e10144", keywords="suicide prevention", keywords="risk assessment", keywords="electronic health records", keywords="routine data", keywords="machine learning", keywords="artificial neural networks", abstract="Background: Each year, approximately 800,000 people die by suicide worldwide, accounting for 1--2 in every 100 deaths. It is always a tragic event with a huge impact on family, friends, the community and health professionals. Unfortunately, suicide prevention and the development of risk assessment tools have been hindered by the complexity of the underlying mechanisms and the dynamic nature of a person's motivation and intent. Many of those who die by suicide had contact with health services in the preceding year but identifying those most at risk remains a challenge. Objective: To explore the feasibility of using artificial neural networks with routinely collected electronic health records to support the identification of those at high risk of suicide when in contact with health services. Methods: Using the Secure Anonymised Information Linkage Databank UK, we extracted the data of those who died by suicide between 2001 and 2015 and paired controls. Looking at primary (general practice) and secondary (hospital admissions) electronic health records, we built a binary feature vector coding the presence of risk factors at different times prior to death. Risk factors included: general practice contact and hospital admission; diagnosis of mental health issues; injury and poisoning; substance misuse; maltreatment; sleep disorders; and the prescription of opiates and psychotropics. Basic artificial neural networks were trained to differentiate between the suicide cases and paired controls. We interpreted the output score as the estimated suicide risk. System performance was assessed with 10x10-fold repeated cross-validation, and its behavior was studied by representing the distribution of estimated risk across the cases and controls, and the distribution of factors across estimated risks. Results: We extracted a total of 2604 suicide cases and 20 paired controls per case. Our best system attained a mean error rate of 26.78\% (SD 1.46; 64.57\% of sensitivity and 81.86\% of specificity). While the distribution of controls was concentrated around estimated risks < 0.5, cases were almost uniformly distributed between 0 and 1. Prescription of psychotropics, depression and anxiety, and self-harm increased the estimated risk by {\textasciitilde}0.4. At least 95\% of those presenting these factors were identified as suicide cases. Conclusions: Despite the simplicity of the implemented system, the proposed methodology obtained an accuracy like other published methods based on specialized questionnaire generated data. Most of the errors came from the heterogeneity of patterns shown by suicide cases, some of which were identical to those of the paired controls. Prescription of psychotropics, depression and anxiety, and self-harm were strongly linked with higher estimated risk scores, followed by hospital admission and long-term drug and alcohol misuse. Other risk factors like sleep disorders and maltreatment had more complex effects. ", doi="10.2196/10144", url="http://mental.jmir.org/2018/2/e10144/", url="http://www.ncbi.nlm.nih.gov/pubmed/29934287" } @Article{info:doi/10.2196/mental.9806, author="Wildemeersch, Davina and Bernaerts, Lisa and D'Hondt, Michiel and Hans, Guy", title="Preliminary Evaluation of a Web-Based Psychological Screening Tool in Adolescents Undergoing Minimally Invasive Pectus Surgery: Single-Center Observational Cohort Study", journal="JMIR Ment Health", year="2018", month="May", day="31", volume="5", number="2", pages="e45", keywords="mental health", keywords="telemedicine", keywords="pectus carinatum", keywords="funnel chest", abstract="Background: Preoperative anxiety and depression are predominant risk factors for increased postoperative pain. Thoracic wall deformities in adolescents often cause low self-esteem, which contributes to psychological concerns. Several studies have suggested a relationship between preoperative mental health support and enhanced recovery after surgery. Objective: This study investigated the validity of screening questionnaires concerning psychological trait and state characteristics via a patient-specific online platform. Methods: Patients scheduled for elective pectus surgery between June 2017 and August 2017 were invited to participate in clinical interviews and online self-report questionnaires. All patients were recruited in the Anesthesiology Department, Antwerp University Hospital, Belgium. This single-center observational cohort study was performed in accordance with the ethical standards of the International Council for Harmonisation--Good Clinical Practice guidelines and the Declaration of Helsinki after obtaining study approval by the Institutional Review Board and Ethics Committee of the Antwerp University Hospital, Belgium (study identifier: 17/08/082). An online preoperative psychological inventory was performed using the Rosenberg Self-Esteem Scale, Hospital Anxiety and Depression Scale, and State-Trait Anxiety Inventory. Postoperatively, pain intensity and interference were assessed using the Multidisciplinary Pain Inventory, Coping With Pain Questionnaire, and numeric pain rating scale assessment. Patient satisfaction of the Web-based platform was evaluated. Results: A total of 21 adolescent patients used our Web-based psychological perioperative screening platform. Patients rated the mobile phone app, usability, and accessibility of the digital platform as good or excellent in 85\% (17/20), 89\% (17/19), and 95\% (20/21) of the cases, respectively. A total of 89\% (17/19) of the patients rated the effort of generating answers to the online questionnaires as low. The results from the completed questionnaires indicated a strong negative correlation between self-esteem and the anxiety trait (R=--0.72, P<.001) and overall anxiety characteristics (R=--0.49, P=.04). There was a positive correlation between depressive and anxiety characteristics and the anxiety trait (R=0.52, P=.03 and R=0.6, P=.02, respectively) measured by the online self-report questionnaires. Moreover, preoperative anxiety was positively correlated with postoperative pain interference (R=0.58, P=.02). Finally, there was a negative correlation between self-esteem and pain interference (R=--0.62, P=.01). Conclusions: Perioperative screening of psychological symptoms and trait characteristics with specific treatment, if necessary, could further improve postoperative pain and overall health status. Research on eHealth technology, even for psychological patient care, is rapidly increasing. Trial Registration: ClinicalTrials.gov NCT03100669; https://clinicaltrials.gov/ct2/show/NCT03100669 (Archived by WebCite at http://www.webcitation.org/6zPvHDhU5) ", doi="10.2196/mental.9806", url="http://mental.jmir.org/2018/2/e45/" } @Article{info:doi/10.2196/jmir.9298, author="Feenstra, EM Heleen and Vermeulen, E. Ivar and Murre, MJ Jaap and Schagen, B. Sanne", title="Online Self-Administered Cognitive Testing Using the Amsterdam Cognition Scan: Establishing Psychometric Properties and Normative Data", journal="J Med Internet Res", year="2018", month="May", day="30", volume="20", number="5", pages="e192", keywords="cognition", keywords="neuropsychological tests", keywords="self-assessment", keywords="internet", keywords="reproducibility of results", keywords="reference standards", abstract="Background: Online tests enable efficient self-administered assessments and consequently facilitate large-scale data collection for many fields of research. The Amsterdam Cognition Scan is a new online neuropsychological test battery that measures a broad variety of cognitive functions. Objective: The aims of this study were to evaluate the psychometric properties of the Amsterdam Cognition Scan and to establish regression-based normative data. Methods: The Amsterdam Cognition Scan was self-administrated twice from home---with an interval of 6 weeks---by 248 healthy Dutch-speaking adults aged 18 to 81 years. Results: Test-retest reliability was moderate to high and comparable with that of equivalent traditional tests (intraclass correlation coefficients: .45 to .80; .83 for the Amsterdam Cognition Scan total score). Multiple regression analyses indicated that (1) participants' age negatively influenced all (12) cognitive measures, (2) gender was associated with performance on six measures, and (3) education level was positively associated with performance on four measures. In addition, we observed influences of tested computer skills and of self-reported amount of computer use on cognitive performance. Demographic characteristics that proved to influence Amsterdam Cognition Scan test performance were included in regression-based predictive formulas to establish demographically adjusted normative data. Conclusions: Initial results from a healthy adult sample indicate that the Amsterdam Cognition Scan has high usability and can give reliable measures of various generic cognitive ability areas. For future use, the influence of computer skills and experience should be further studied, and for repeated measurements, computer configuration should be consistent. The reported normative data allow for initial interpretation of Amsterdam Cognition Scan performances. ", doi="10.2196/jmir.9298", url="http://www.jmir.org/2018/5/e192/" } @Article{info:doi/10.2196/10650, author="Hamamura, Toshitaka and Suganuma, Shinichiro and Takano, Ayumi and Matsumoto, Toshihiko and Shimoyama, Haruhiko", title="The Efficacy of a Web-Based Screening and Brief Intervention for Reducing Alcohol Consumption Among Japanese Problem Drinkers: Protocol for a Single-Blind Randomized Controlled Trial", journal="JMIR Res Protoc", year="2018", month="May", day="30", volume="7", number="5", pages="e10650", keywords="problem drinking", keywords="Web-based interventions", keywords="personalized normative feedback", keywords="Japanese drinkers", keywords="randomized controlled trial", abstract="Background: The literature shows that computer-delivered interventions with personalized normative feedback can reduce problem drinking for up to 6 months in the West. Meanwhile, no studies have been conducted to examine the effects of such interventions among Japanese problem drinkers. Possible moderators associated with effectiveness of the intervention need to be also explored. Objective: The purpose of this study is to conduct a trial and examine the efficacy of a brief intervention with personal normative feedback and psychoeducation on several measures of alcohol consumption among Japanese problem drinkers. Additionally, this study will examine whether the level of alcohol use disorder and beliefs about the physical and psychological outcomes of drinking moderate the effect of the intervention on outcome measures. Methods: This study will conduct a single-blind, 2-armed randomized controlled trial. Japanese adults with an Alcohol Use Disorder Identification Test score of 8 or higher will be enrolled in the trial. Participants allocated to the intervention group will receive the intervention immediately after the baseline measurements, and participants allocated to the waitlist group will receive the intervention at the end of the trial. Outcome measures include drinking quantity, drinking frequency, and alcohol-related consequences. Follow-up assessment will take place at 1 month, 2 months, and 6 months following the baseline measurement. The authors will not know the group allocation during trial. The authors will plan to collect a sample of 600 participants. Mixed-effect analyses of variance will be used to examine the main effects of condition, the main effects of time, and the interaction effects between condition and time on outcome variables. Results: Enrollment for the trial began on January 6, 2018 and data are expected to be available by August 2018. Conclusions: This study will contribute to the literature by demonstrating the efficacy of Web-based screenings and brief interventions among Japanese problem drinkers and indicating several possible moderators between the intervention and outcomes. This type of Web-based brief intervention has the possibility of being implemented in Japanese schools and workplaces as a prevention tool. Trial Registration: UMIN Clinical Trials Registry R000034388; https://upload.umin.ac.jp/cgi-open-bin/ctr\_e/ctr\_view.cgi? recptno=R000034388 (Archived by WebCite at http://www.webcitation.org/6xmOoTfTI) Registered Report Identifier: RR1-10.2196/10650 ", doi="10.2196/10650", url="http://www.researchprotocols.org/2018/5/e10650/" } @Article{info:doi/10.2196/mental.9488, author="van Bebber, Jan and Meijer, R. Rob and Wigman, TW Johana and Sytema, Sjoerd and Wunderink, Lex", title="A Smart Screening Device for Patients with Mental Health Problems in Primary Health Care: Development and Pilot Study", journal="JMIR Ment Health", year="2018", month="May", day="28", volume="5", number="2", pages="e41", keywords="screening", keywords="primary health care", keywords="psychopathology", keywords="triage", abstract="Background: Adequate recognition of mental health problems is a prerequisite for successful treatment. Although most people tend to consult their general practitioner (GP) when they first experience mental health problems, GPs are not very well equipped to screen for various forms of psychopathology to help them determine clients' need for treatment. Objective: In this paper, the development and characteristics of CATja, a computerized adaptive test battery built to facilitate triage in primary care settings, are described, and first results of its implementation are reported. Methods: CATja was developed in close collaboration with GPs and mental health assistants (MHAs). During implementation, MHAs were requested to appraise clients' rankings (N=91) on the domains to be tested and to indicate the treatment level they deemed most appropriate for clients before test administration. We compared the agreement between domain score appraisals and domain score computed by CATja and the agreement between initial (before test administration) treatment level advice and final treatment level advice. Results: Agreements (Cohen kappas) between MHAs' appraisals of clients' scores and clients' scores computed by CATja were mostly between .40 and .50 (Cohen kappas=.10-.20), and the agreement between ``initial'' treatment levels and the final treatment level advised was .65 (Cohen kappa=.55). Conclusions: Using CATja, caregivers can efficiently generate summaries of their clients' mental well-being on which decisions about treatment type and care level may be based. Further validation research is needed. ", doi="10.2196/mental.9488", url="http://mental.jmir.org/2018/2/e41/" } @Article{info:doi/10.2196/mental.9768, author="Maunder, G. Robert and Hunter, J. Jonathan", title="An Internet Resource for Self-Assessment of Mental Health and Health Behavior: Development and Implementation of the Self-Assessment Kiosk", journal="JMIR Ment Health", year="2018", month="May", day="16", volume="5", number="2", pages="e39", keywords="self-assessment", keywords="feedback", keywords="surveys and questionnaires", keywords="internet", abstract="Background: Standardized measurement of physical and mental health is useful for identification of health problems. Personalized feedback of the results can influence health behavior, and treatment outcomes can be improved by monitoring feedback over time. However, few resources are available that are free for users, provide feedback from validated measurement instruments, and measure a wide range of health domains. Objective: This study aimed to develop an internet self-assessment resource that fills the identified gap and collects data to generate and test hypotheses about health, to test its feasibility, and to describe the characteristics of its users. Methods: The Self-Assessment Kiosk was built using validated health measurement instruments and implemented on a commercial internet survey platform. Data regarding usage and the characteristics of users were collected over 54 weeks. The rate of accrual of new users, popularity of measurement domains, frequency with which multiple domains were selected for measurement, and characteristics of users who chose particular questionnaires were assessed. Results: Of the 1435 visits, 441 (30.73\%) were visiting for the first time, completed at least 1 measure, indicated that their responses were truthful, and consented to research. Growth in the number of users over time was approximately linear. Users were skewed toward old age and higher income and education. Most (53.9\%, 234/434) reported at least 1 medical condition. The median number of questionnaires completed was 5. Internal reliability of most measures was good (Cronbach alpha>.70), with lower reliability for some subscales of coping (self-distraction alpha=.35, venting alpha=.50, acceptance alpha=.51) and personality (agreeableness alpha=.46, openness alpha=.45). The popular questionnaires measured depression (61.0\%, 269/441), anxiety (60.5\%, 267/441), attachment insecurity (54.2\%, 239/441), and coping (46.0\%, 203/441). Demographic characteristics somewhat influenced choice of instruments, accounting for <9\% of the variance in this choice. Mean depression and anxiety scores were intermediate between previously studied populations with and without mental illness. Modeling to estimate the sample size required to study relationships between variables suggested that the accrual of users required to study the relationship between 3 variables was 2 to 3 times greater than that required to study a single variable. Conclusions: The value of the Self-Assessment Kiosk to users and the feasibility of providing this resource are supported by the steady accumulation of new users over time. The Self-Assessment Kiosk database can be interrogated to understand the relationships between health variables. Users who select particular instruments tend to have scores that are higher than those found in the general population, indicating that instruments are more likely to be selected when they are salient. Self-selection bias limits generalizability and needs to be taken into account when using the Self-Assessment Kiosk database for research. Ethical issues that were considered in developing and implementing the Self-Assessment Kiosk are discussed. ", doi="10.2196/mental.9768", url="http://mental.jmir.org/2018/2/e39/", url="http://www.ncbi.nlm.nih.gov/pubmed/29769171" } @Article{info:doi/10.2196/mhealth.9456, author="March, Sonja and Day, Jamin and Zieschank, Kirsty and Ireland, Michael", title="The Interactive Child Distress Screener: Development and Preliminary Feasibility Testing", journal="JMIR Mhealth Uhealth", year="2018", month="Apr", day="19", volume="6", number="4", pages="e90", keywords="child", keywords="preschool", keywords="mental health", keywords="symptom assessment", keywords="self-assessment (psychology)", abstract="Background: Early identification of child emotional and behavioral concerns is essential for the prevention of mental health problems; however, few suitable child-reported screening measures are available. Digital tools offer an exciting opportunity for obtaining clinical information from the child's perspective. Objective: The aim of this study was to describe the initial development and pilot testing of the Interactive Child Distress Screener (ICDS). The ICDS is a Web-based screening instrument for the early identification of emotional and behavioral problems in children aged between 5 and 12 years. Methods: This paper utilized a mixed-methods approach to (1) develop and refine item content using an expert review process (study 1) and (2) develop and refine prototype animations and an app interface using codesign with child users (study 2). Study 1 involved an iterative process that comprised the following four steps: (1) the initial development of target constructs, (2) preliminary content validation (face validity, item importance, and suitability for animation) from an expert panel of researchers and psychologists (N=9), (3) item refinement, and (4) a follow-up validation with the same expert panel. Study 2 also comprised four steps, which are as follows: (1) the development of prototype animations, (2) the development of the app interface and a response format, (3) child interviews to determine feasibility and obtain feedback, and (4) refinement of animations and interface. Cognitive interviews were conducted with 18 children aged between 4 and 12 years who tested 3 prototype animated items. Children were asked to describe the target behavior, how well the animations captured the intended behavior, and provide suggestions for improvement. Their ability to understand the wording of instructions was also assessed, as well as the general acceptability of character and sound design. Results: In study 1, a revised list of 15 constructs was generated from the first and second round of expert feedback. These were rated highly in terms of importance (mean 6.32, SD 0.42) and perceived compatibility of items (mean 6.41, SD 0.45) on a 7-point scale. In study 2, overall feedback regarding the character design and sounds was positive. Children's ability to understand intended behaviors varied according to target items, and feedback highlighted key objectives for improvements such as adding contextual cues or improving character detail. These design changes were incorporated through an iterative process, with examples presented. Conclusions: The ICDS has potential to obtain clinical information from the child's perspective that may otherwise be overlooked. If effective, the ICDS will provide a quick, engaging, and easy-to-use screener that can be utilized in routine care settings. This project highlights the importance of involving an expert review and user codesign in the development of digital assessment tools for children. ", doi="10.2196/mhealth.9456", url="http://mhealth.jmir.org/2018/4/e90/", url="http://www.ncbi.nlm.nih.gov/pubmed/29674310" } @Article{info:doi/10.2196/mental.9480, author="Choi, Isabella and Milne, N. David and Deady, Mark and Calvo, A. Rafael and Harvey, B. Samuel and Glozier, Nick", title="Impact of Mental Health Screening on Promoting Immediate Online Help-Seeking: Randomized Trial Comparing Normative Versus Humor-Driven Feedback", journal="JMIR Ment Health", year="2018", month="Apr", day="05", volume="5", number="2", pages="e26", keywords="online help-seeking", keywords="screening", keywords="feedback", keywords="randomized trial", keywords="mental health", keywords="resilience", keywords="depression", abstract="Background: Given the widespread availability of mental health screening apps, providing personalized feedback may encourage people at high risk to seek help to manage their symptoms. While apps typically provide personal score feedback only, feedback types that are user-friendly and increase personal relevance may encourage further help-seeking. Objective: The aim of this study was to compare the effects of providing normative and humor-driven feedback on immediate online help-seeking, defined as clicking on a link to an external resource, and to explore demographic predictors that encourage help-seeking. Methods: An online sample of 549 adults were recruited using social media advertisements. Participants downloaded a smartphone app known as ``Mindgauge'' which allowed them to screen their mental wellbeing by completing standardized measures on Symptoms (Kessler 6-item Scale), Wellbeing (World Health Organization [Five] Wellbeing Index), and Resilience (Brief Resilience Scale). Participants were randomized to receive normative feedback that compared their scores to a reference group or humor-driven feedback that presented their scores in a relaxed manner. Those who scored in the moderate or poor ranges in any measure were encouraged to seek help by clicking on a link to an external online resource. Results: A total of 318 participants scored poorly on one or more measures and were provided with an external link after being randomized to receive normative or humor-driven feedback. There was no significant difference of feedback type on clicking on the external link across all measures. A larger proportion of participants from the Wellbeing measure (170/274, 62.0\%) clicked on the links than the Resilience (47/179, 26.3\%) or Symptoms (26/75, 34.7\%) measures ($\chi$2=60.35, P<.001). There were no significant demographic factors associated with help-seeking for the Resilience or Wellbeing measures. Participants with a previous episode of poor mental health were less likely than those without such history to click on the external link in the Symptoms measure (P=.003, odds ratio [OR] 0.83, 95\% CI 0.02-0.44), and younger adults were less likely to click on the link compared to older adults across all measures (P=.005, OR 0.44, 95\% CI 0.25-0.78). Conclusions: This pilot study found that there was no difference between normative and humor-driven feedback on promoting immediate clicks to an external resource, suggesting no impact on online help-seeking. Limitations included: lack of personal score control group, limited measures of predictors and potential confounders, and the fact that other forms of professional help-seeking were not assessed. Further investigation into other predictors and factors that impact on help-seeking is needed. Trial Registration: Australian New Zealand Clinical Trials Registry ACTRN12616000707460; https://www.anzctr.org.au/ Trial/Registration/TrialReview.aspx?id=370187 (Archived by WebCite at http://www.webcitation.org/6y8m8sVxr) ", doi="10.2196/mental.9480", url="http://mental.jmir.org/2018/2/e26/", url="http://www.ncbi.nlm.nih.gov/pubmed/29622528" } @Article{info:doi/10.2196/jmir.9208, author="Diez-Canseco, Francisco and Toyama, Mauricio and Ipince, Alessandra and Perez-Leon, Silvana and Cavero, Victoria and Araya, Ricardo and Miranda, Jaime J.", title="Integration of a Technology-Based Mental Health Screening Program Into Routine Practices of Primary Health Care Services in Peru (The Allillanchu Project): Development and Implementation", journal="J Med Internet Res", year="2018", month="Mar", day="15", volume="20", number="3", pages="e100", keywords="mental health", keywords="mHealth", keywords="SMS", keywords="textmessaging", keywords="screening", keywords="mobile health", keywords="health services research", abstract="Background: Despite their high prevalence and significant burden, mental disorders such as depression remain largely underdiagnosed and undertreated. Objective: The aim of the Allillanchu Project was to design, develop, and test an intervention to promote early detection, opportune referral, and access to treatment of patients with mental disorders attending public primary health care (PHC) services in Lima, Peru. Methods: The project had a multiphase design: formative study, development of intervention components, and implementation. The intervention combined three strategies: training of PHC providers (PHCPs), task shifting the detection and referral of mental disorders, and a mobile health (mHealth) component comprising a screening app followed by motivational and reminder short message service (SMS) to identify at-risk patients. The intervention was implemented by 22 PHCPs from five health centers, working in antenatal care, tuberculosis, chronic diseases, and HIV or AIDS services. Results: Over a period of 9 weeks, from September 2015 to November 2015, 733 patients were screened by the 22 PHCPs during routine consultations, and 762 screening were completed in total. The chronic diseases (49.9\%, 380/762) and antenatal care services (36.7\%, 380/762) had the higher number of screenings. Time constraints and workload were the main barriers to implementing the screening, whereas the use of technology, training, and supervision of the PHCPs by the research team were identified as facilitators. Of the 733 patients, 21.7\% (159/733) screened positively and were advised to seek specialized care. Out of the 159 patients with a positive screening result, 127 had a follow-up interview, 72.4\% (92/127) reported seeking specialized care, and 55.1\% (70/127) stated seeing a specialist. Both patients and PHCPs recognized the utility of the screening and identified some key challenges to its wider implementation. Conclusions: The use of a screening app supported by training and supervision is feasible and uncovers a high prevalence of unidentified psychological symptoms in primary care. To increase its sustainability and utility, this procedure can be incorporated into the routine practices of existing health care services, following tailoring to the resources and features of each service. The early detection of psychological symptoms by a PHCP within a regular consultation, followed by adequate advice and support, can lead to a significant percentage of patients accessing specialized care and reducing the treatment gap of mental disorders. ", doi="10.2196/jmir.9208", url="http://www.jmir.org/2018/3/e100/", url="http://www.ncbi.nlm.nih.gov/pubmed/29588272" } @Article{info:doi/10.2196/jmir.9428, author="Brodey, Benjamin and Purcell, E. Susan and Rhea, Karen and Maier, Philip and First, Michael and Zweede, Lisa and Sinisterra, Manuela and Nunn, Brad M. and Austin, Marie-Paule and Brodey, S. Inger", title="Rapid and Accurate Behavioral Health Diagnostic Screening: Initial Validation Study of a Web-Based, Self-Report Tool (the SAGE-SR)", journal="J Med Internet Res", year="2018", month="Mar", day="23", volume="20", number="3", pages="e108", keywords="mental health", keywords="differential diagnosis", keywords="surveys and questionnaires", keywords="self-report", keywords="primary health care", keywords="computer-assisted diagnosis", abstract="Background: The Structured Clinical Interview for DSM (SCID) is considered the gold standard assessment for accurate, reliable psychiatric diagnoses; however, because of its length, complexity, and training required, the SCID is rarely used outside of research. Objective: This paper aims to describe the development and initial validation of a Web-based, self-report screening instrument (the Screening Assessment for Guiding Evaluation-Self-Report, SAGE-SR) based on the Diagnostic and Statistical Manual of Mental Disorders, Fifth Edition (DSM-5) and the SCID-5-Clinician Version (CV) intended to make accurate, broad-based behavioral health diagnostic screening more accessible within clinical care. Methods: First, study staff drafted approximately 1200 self-report items representing individual granular symptoms in the diagnostic criteria for the 8 primary SCID-CV modules. An expert panel iteratively reviewed, critiqued, and revised items. The resulting items were iteratively administered and revised through 3 rounds of cognitive interviewing with community mental health center participants. In the first 2 rounds, the SCID was also administered to participants to directly compare their Likert self-report and SCID responses. A second expert panel evaluated the final pool of items from cognitive interviewing and criteria in the DSM-5 to construct the SAGE-SR, a computerized adaptive instrument that uses branching logic from a screener section to administer appropriate follow-up questions to refine the differential diagnoses. The SAGE-SR was administered to healthy controls and outpatient mental health clinic clients to assess test duration and test-retest reliability. Cutoff scores for screening into follow-up diagnostic sections and criteria for inclusion of diagnoses in the differential diagnosis were evaluated. Results: The expert panel reduced the initial 1200 test items to 664 items that panel members agreed collectively represented the SCID items from the 8 targeted modules and DSM criteria for the covered diagnoses. These 664 items were iteratively submitted to 3 rounds of cognitive interviewing with 50 community mental health center participants; the expert panel reviewed session summaries and agreed on a final set of 661 clear and concise self-report items representing the desired criteria in the DSM-5. The SAGE-SR constructed from this item pool took an average of 14 min to complete in a nonclinical sample versus 24 min in a clinical sample. Responses to individual items can be combined to generate DSM criteria endorsements and differential diagnoses, as well as provide indices of individual symptom severity. Preliminary measures of test-retest reliability in a small, nonclinical sample were promising, with good to excellent reliability for screener items in 11 of 13 diagnostic screening modules (intraclass correlation coefficient [ICC] or kappa coefficients ranging from .60 to .90), with mania achieving fair test-retest reliability (ICC=.50) and other substance use endorsed too infrequently for analysis. Conclusions: The SAGE-SR is a computerized adaptive self-report instrument designed to provide rigorous differential diagnostic information to clinicians. ", doi="10.2196/jmir.9428", url="http://www.jmir.org/2018/3/e108/", url="http://www.ncbi.nlm.nih.gov/pubmed/29572204" } @Article{info:doi/10.2196/mhealth.9486, author="Kessel, A. Kerstin and Vogel, ME Marco and Alles, Anna and Dobiasch, Sophie and Fischer, Hanna and Combs, E. Stephanie", title="Mobile App Delivery of the EORTC QLQ-C30 Questionnaire to Assess Health-Related Quality of Life in Oncological Patients: Usability Study", journal="JMIR Mhealth Uhealth", year="2018", month="Feb", day="20", volume="6", number="2", pages="e45", keywords="radiation oncology", keywords="healthcare surveys", keywords="mobile applications", keywords="mobile apps", keywords="telemedicine", keywords="health-related quality of life", keywords="questionnaires", keywords="oncology\emspace", abstract="Background: Mobile apps are evolving in the medical field. However, ongoing discussions have questioned whether such apps are really valuable and whether patients will accept their use in day-to-day clinical life. Therefore, we initiated a usability study in our department. Objective: We present our results of the first app prototype and patient testing of health-related quality of life (HRQoL) assessment in oncological patients. Methods: We developed an app prototype for the iOS operating system within eight months in three phases: conception, initial development, and pilot testing. For the HRQoL assessment, we chose to implement only the European Organization for Research and Treatment of Cancer (EORTC) Quality of Life Questionnaire-Core 30 (QLQ-C30; German version 3). Usability testing was conducted for three months. Participation was voluntary and pseudonymized. After completion of the QLQ-C30 questionnaire using iPads provided by our department, we performed a short survey with 10 questions. This survey inquired about patients' opinions regarding general aspects, including technical advances in medicine, mobile and app assistance during cancer treatment, and the app-specific functions (eg, interface and navigation). Results: After logging into the app, the user can choose between starting a questionnaire, reviewing answers (administrators only), and logging out. The questionnaire is displayed with the same information, questions, and answers as on the original QLQ-C30 sheet. No alterations in wording were made. Usability was tested with 81 patients; median age was 55 years. The median time for completing the HRQoL questionnaire on the iPad was 4.0 minutes. Of all participants, 84\% (68/81) owned a mobile device. Similarly, 84\% (68/81) of participants would prefer a mobile version of the HRQoL questionnaire instead of a paper-based version. Using the app in daily life during and after cancer treatment would be supported by 83\% (67/81) of participants. In the prototype version of the app, data were stored on the device; in the future, 79\% (64/81) of the patients would agree to transfer data via the Internet. Conclusions: Our usability test showed good results regarding attractiveness, operability, and understandability. Moreover, our results demonstrate a high overall acceptance of mobile apps and telemedicine in oncology. The HRQoL assessment via the app was accepted thoroughly by patients, and individuals are keen to use it in clinical routines, while data privacy and security must be ensured. ", doi="10.2196/mhealth.9486", url="http://mhealth.jmir.org/2018/2/e45/", url="http://www.ncbi.nlm.nih.gov/pubmed/29463489" } @Article{info:doi/10.2196/mental.8215, author="Ravoux, Hortense and Pereira, Bruno and Brousse, Georges and Dewavrin, Samuel and Cornet, Thomas and Mermillod, Martial and Mondillon, Laurie and Vallet, Guillaume and Moustafa, Far{\`e}s and Dutheil, Fr{\'e}d{\'e}ric", title="Work Addiction Test Questionnaire to Assess Workaholism: Validation of French Version", journal="JMIR Ment Health", year="2018", month="Feb", day="13", volume="5", number="1", pages="e12", keywords="behavior, addictive", keywords="work", keywords="validation studies as topic", keywords="questionnaires", keywords="social welfare", keywords="health", keywords="public health", abstract="Background: Work addiction is a significant public health problem with a growing prevalence. The Work Addiction Risk Test (WART) is the gold standard questionnaire to detect workaholism. Objective: The main objective of this study was to validate the French version of the WART. Methods: Questionnaires were proposed to voluntary French workers using the WittyFit software. There were no exclusion criteria. The questionnaire was administered anonymously for initial validity testing and readministered one week later for test-retest reliability. We also assessed the workers' sociodemographic characteristics, as well as other measurements for external validity, such as stress, well-being, and coaddictions to tobacco, alcohol, and cannabis. Several psychometric properties of the French-WART were explored: acceptability, reliability (internal consistency [Cronbach alpha coefficient] and reproducibility [Lin concordance coefficient]), construct validity (correlation coefficients and principal component analysis), and external validity (correlation coefficients). Results: Among the 1580 workers using WittyFit, 187 (11.83\%) agreed to complete the WART questionnaire. Of those, 128 completed the test-retest survey (68.4\%). Acceptability found that all respondents had fully completed the questionnaire, with few floor or ceiling effects. Reliability was very good with a Cronbach alpha coefficient at .90 (internal consistency) and Lin concordance coefficient at .90 (95\% CI .87-.94] with a difference on the retest of .04 (SD 4.9) (95\% CI ?9.6 to 9.7) (reproducibility). We identified three main dimensions (construct validity). Relationships between WART and stress and well-being confirmed its external validity. Conclusions: The French version of the WART is a valid and reliable instrument to assess work addiction with satisfactory psychometric properties. Used in occupational medicine, this tool would allow the diagnosis of work addiction and can be easily implemented in current practice. ", doi="10.2196/mental.8215", url="http://mental.jmir.org/2018/1/e12/", url="http://www.ncbi.nlm.nih.gov/pubmed/29439945" } @Article{info:doi/10.2196/mental.7497, author="Axelsson, Erland and Linds{\"a}ter, Elin and Lj{\'o}tsson, Brj{\'a}nn and Andersson, Erik and Hedman-Lagerl{\"o}f, Erik", title="The 12-item Self-Report World Health Organization Disability Assessment Schedule (WHODAS) 2.0 Administered Via the Internet to Individuals With Anxiety and Stress Disorders: A Psychometric Investigation Based on Data From Two Clinical Trials", journal="JMIR Ment Health", year="2017", month="Dec", day="08", volume="4", number="4", pages="e58", keywords="disability", keywords="Internet", keywords="psychometrics", keywords="questionnaire", keywords="validity", keywords="WHODAS", abstract="Background: The World Health Organization Disability Assessment Schedule 2.0 (WHODAS 2.0) is a widespread measure of disability and functional impairment, which is bundled with the Diagnostic and Statistical Manual of Mental Disorders (Fifth Edition) for use in psychiatry. Administering psychometric scales via the Internet is an effective way to reach respondents and allow for convenient handling of data. Objective: The aim was to study the psychometric properties of the 12-item self-report WHODAS 2.0 when administered online to individuals with anxiety and stress disorders. The WHODAS 2.0 was hypothesized to exhibit high internal consistency and be unidimensional. We also expected the WHODAS 2.0 to show high 2-week test-retest reliability, convergent validity (correlations approximately .50 to .90 with other self-report measures of functional impairment), that it would differentiate between patients with and without exhaustion disorder, and that it would respond to change in primary symptom domain. Methods: We administered the 12-item self-report WHODAS 2.0 online to patients with anxiety and stress disorders (N=160) enrolled in clinical trials of cognitive behavior therapy, and analyzed psychometric properties within a classical test theory framework. Scores were compared with well-established symptom and disability measures, and sensitivity to change was studied from pretreatment to posttreatment assessment. Results: The 12-item self-report WHODAS 2.0 showed high internal consistency (Cronbach alpha=.83-.92), high 2-week test-retest reliability (intraclass correlation coefficient=.83), adequate construct validity, and was sensitive to change. We found preliminary evidence for a three-factorial structure, but one strong factor accounted for a clear majority of the variance. Conclusions: We conclude that the 12-item self-report WHODAS 2.0 is a psychometrically sound instrument when administered online to individuals with anxiety and stress disorders, but that it is probably fruitful to also report the three subfactors to facilitate comparisons between studies. Trial Registration: Clinicaltrials.gov NCT02540317; https://clinicaltrials.gov/ct2/show/NCT02540317 (Archived by WebCite at http://www.webcitation.org/6vQEdYAem); Clinicaltrials.gov NCT02314065; https://clinicaltrials.gov/ct2/show/NCT02314065 (Archived by WebCite at http://www.webcitation.org/6vQEjlUU8) ", doi="10.2196/mental.7497", url="http://mental.jmir.org/2017/4/e58/", url="http://www.ncbi.nlm.nih.gov/pubmed/29222080" } @Article{info:doi/10.2196/jmir.8473, author="Lumsden, Jim and Skinner, Andy and Coyle, David and Lawrence, Natalia and Munafo, Marcus", title="Attrition from Web-Based Cognitive Testing: A Repeated Measures Comparison of Gamification Techniques", journal="J Med Internet Res", year="2017", month="Nov", day="22", volume="19", number="11", pages="e395", keywords="behavioral research/methods", keywords="games, experimental", keywords="computers", keywords="cognition", keywords="Internet", keywords="play and playthings/psychology", keywords="boredom", keywords="task performance and analysis", keywords="executive function", keywords="inhibition (psychology)", abstract="Background: The prospect of assessing cognition longitudinally and remotely is attractive to researchers, health practitioners, and pharmaceutical companies alike. However, such repeated testing regimes place a considerable burden on participants, and with cognitive tasks typically being regarded as effortful and unengaging, these studies may experience high levels of participant attrition. One potential solution is to gamify these tasks to make them more engaging: increasing participant willingness to take part and reducing attrition. However, such an approach must balance task validity with the introduction of entertaining gamelike elements. Objective: This study aims to investigate the effects of gamelike features on participant attrition using a between-subjects, longitudinal Web-based testing study. Methods: We used three variants of a common cognitive task, the Stop Signal Task (SST), with a single gamelike feature in each: one variant where points were rewarded for performing optimally; another where the task was given a graphical theme; and a third variant, which was a standard SST and served as a control condition. Participants completed four compulsory test sessions over 4 consecutive days before entering a 6-day voluntary testing period where they faced a daily decision to either drop out or continue taking part. Participants were paid for each session they completed. Results: A total of 482 participants signed up to take part in the study, with 265 completing the requisite four consecutive test sessions. No evidence of an effect of gamification on attrition was observed. A log-rank test showed no evidence of a difference in dropout rates between task variants ($\chi$22=3.0, P=.22), and a one-way analysis of variance of the mean number of sessions completed per participant in each variant also showed no evidence of a difference (F2,262=1.534, P=.21, partial $\eta$2=0.012). Conclusions: Our findings raise doubts about the ability of gamification to reduce attrition from longitudinal cognitive testing studies. ", doi="10.2196/jmir.8473", url="http://www.jmir.org/2017/11/e395/", url="http://www.ncbi.nlm.nih.gov/pubmed/29167090" } @Article{info:doi/10.2196/mental.6888, author="Kingston, Dawn and Biringer, Anne and Veldhuyzen van Zanten, Sander and Giallo, Rebecca and McDonald, Sarah and MacQueen, Glenda and Vermeyden, Lydia and Austin, Marie-Paule", title="Pregnant Women's Perceptions of the Risks and Benefits of Disclosure During Web-Based Mental Health E-Screening Versus Paper-Based Screening: Randomized Controlled Trial", journal="JMIR Ment Health", year="2017", month="Oct", day="20", volume="4", number="4", pages="e42", keywords="pregnancy", keywords="mental health", keywords="screening", keywords="prenatal care", keywords="computers", abstract="Background: Pregnant women's perceptions of the risks and benefits during mental health screening impact their willingness to disclose concerns. Early research in violence screening suggests that such perceptions may vary by mode of screening, whereby women view the anonymity of e-screening as less risky than other approaches. Understanding whether mode of screening influences perceptions of risk and benefit of disclosure is important in screening implementation. Objective: The objective of this randomized controlled trial was to compare the perceptions of pregnant women randomized to a Web-based screening intervention group and a paper-based screening control group on the level of risk and benefit they perceive in disclosing mental health concerns to their prenatal care provider. A secondary objective was to identify factors associated with women's perceptions of risk and benefit of disclosure. Methods: Pregnant women recruited from maternity clinics, hospitals, and prenatal classes were computer-randomized to a fully automated Web-based e-screening intervention group or a paper-based control. The intervention group completed the Antenatal Psychosocial Health Assessment and the Edinburgh Postnatal Depression Scale on a computer tablet, whereas the control group completed them on paper. The primary outcome was women's perceptions of the risk and benefits of mental health screening using the Disclosure Expectations Scale (DES). A completer analysis was conducted. Statistical significance was set at P<.05. We used t tests to compare the means of the risk and benefit subscales between groups. Results: Of the 675 eligible women approached, 636 (94.2\%) agreed to participate and were randomized to the intervention (n=305) and control (n=331) groups. There were no significant baseline differences between groups. The mode of screening was not associated with either perceived risk or benefit of screening. There were no differences in groups in the mean scores of the risk and benefit of disclosure subscales. Over three-quarters of women in both intervention and control groups perceived that mental health screening was beneficial. However, 43.1\% (272/631) of women in both groups reported feeling very, moderately, or somewhat vulnerable during mental health screening. We found that women of low income, those treated previously for depression or anxiety, and those pregnant with their first child were more likely to perceive greater risk. However, these associations were very small. Conclusions: Pregnant women in both the e-screening and paper-based screening groups perceived benefit and risk of disclosure similarly, suggesting that providers can implement the mode of screening that is most ideal for their clinical setting. Regardless of the mode of screening, a substantial number of women reported feeling vulnerable during mental health screening, highlighting the importance of the need to reduce women's vulnerability throughout the screening process with strategies such as addressing women's concerns, explaining the rationale for screening, and discussing how results will be used. Trial Registration: Clinicaltrials.gov NCT01899534; https://clinicaltrials.gov/ct2/show/NCT01899534 (Archived by WebCite at?http://www.webcitation.org/6tRKtGC4M) ", doi="10.2196/mental.6888", url="http://mental.jmir.org/2017/4/e42/", url="http://www.ncbi.nlm.nih.gov/pubmed/29054833" } @Article{info:doi/10.2196/jmir.7671, author="Austin, Johanna and Hollingshead, Kristy and Kaye, Jeffrey", title="Internet Searches and Their Relationship to Cognitive Function in Older Adults: Cross-Sectional Analysis", journal="J Med Internet Res", year="2017", month="Sep", day="06", volume="19", number="9", pages="e307", keywords="Internet", keywords="geriatrics", keywords="cognition", keywords="executive function", abstract="Background: Alzheimer disease (AD) is a very challenging experience for all those affected. Unfortunately, detection of Alzheimer disease in its early stages when clinical treatments may be most effective is challenging, as the clinical evaluations are time-consuming and costly. Recent studies have demonstrated a close relationship between cognitive function and everyday behavior, an avenue of research that holds great promise for the early detection of cognitive decline. One area of behavior that changes with cognitive decline is language use. Multiple groups have demonstrated a close relationship between cognitive function and vocabulary size, verbal fluency, and semantic ability, using conventional in-person cognitive testing. An alternative to this approach which is inherently ecologically valid may be to take advantage of automated computer monitoring software to continually capture and analyze language use while on the computer. Objective: The aim of this study was to understand the relationship between Internet searches as a measure of language and cognitive function in older adults. We hypothesize that individuals with poorer cognitive function will search using fewer unique terms, employ shorter words, and use less obscure words in their searches. Methods: Computer monitoring software (WorkTime, Nestersoft Inc) was used to continuously track the terms people entered while conducting searches in Google, Yahoo, Bing, and Ask.com. For all searches, punctuation, accents, and non-ASCII characters were removed, and the resulting search terms were spell-checked before any analysis. Cognitive function was evaluated as a z-normalized summary score capturing five unique cognitive domains. Linear regression was used to determine the relationship between cognitive function and Internet searches by controlling for variables such as age, sex, and education. Results: Over a 6-month monitoring period, 42 participants (mean age 81 years [SD 10.5], 83\% [35/42] female) conducted 2915 searches using these top search engines. Participants averaged 3.08 words per search (SD 1.6) and 5.77 letters per word (SD 2.2). Individuals with higher cognitive function used more unique terms per search (beta=.39, P=.002) and employed less common terms in their searches (beta=1.39, P=.02). Cognitive function was not significantly associated with the length of the words used in the searches. Conclusions: These results suggest that early decline in cognitive function may be detected from the terms people search for when they use the Internet. By continuously tracking basic aspects of Internet search terms, it may be possible to detect cognitive decline earlier than currently possible, thereby enabling proactive treatment and intervention. ", doi="10.2196/jmir.7671", url="http://www.jmir.org/2017/9/e307/", url="http://www.ncbi.nlm.nih.gov/pubmed/28877864" } @Article{info:doi/10.2196/mental.6805, author="Brodey, B. Benjamin and Gonzalez, L. Nicole and Elkin, Ann Kathryn and Sasiela, Jordan W. and Brodey, S. Inger", title="Assessing the Equivalence of Paper, Mobile Phone, and Tablet Survey Responses at a Community Mental Health Center Using Equivalent Halves of a `Gold-Standard' Depression Item Bank", journal="JMIR Ment Health", year="2017", month="Sep", day="06", volume="4", number="3", pages="e36", keywords="mobile phone", keywords="tablet", keywords="PROMIS", keywords="depression", keywords="item response theory", keywords="outcomes tracking", keywords="PORTAL", keywords="TeleSage", keywords="behavioral health", keywords="special issue on computing and mental health", abstract="Background: The computerized administration of self-report psychiatric diagnostic and outcomes assessments has risen in popularity. If results are similar enough across different administration modalities, then new administration technologies can be used interchangeably and the choice of technology can be based on other factors, such as convenience in the study design. An assessment based on item response theory (IRT), such as the Patient-Reported Outcomes Measurement Information System (PROMIS) depression item bank, offers new possibilities for assessing the effect of technology choice upon results. Objective: To create equivalent halves of the PROMIS depression item bank and to use these halves to compare survey responses and user satisfaction among administration modalities---paper, mobile phone, or tablet---with a community mental health care population. Methods: The 28 PROMIS depression items were divided into 2 halves based on content and simulations with an established PROMIS response data set. A total of 129 participants were recruited from an outpatient public sector mental health clinic based in Memphis. All participants took both nonoverlapping halves of the PROMIS IRT-based depression items (Part A and Part B): once using paper and pencil, and once using either a mobile phone or tablet. An 8-cell randomization was done on technology used, order of technologies used, and order of PROMIS Parts A and B. Both Parts A and B were administered as fixed-length assessments and both were scored using published PROMIS IRT parameters and algorithms. Results: All 129 participants received either Part A or B via paper assessment. Participants were also administered the opposite assessment, 63 using a mobile phone and 66 using a tablet. There was no significant difference in item response scores for Part A versus B. All 3 of the technologies yielded essentially identical assessment results and equivalent satisfaction levels. Conclusions: Our findings show that the PROMIS depression assessment can be divided into 2 equivalent halves, with the potential to simplify future experimental methodologies. Among community mental health care recipients, the PROMIS items function similarly whether administered via paper, tablet, or mobile phone. User satisfaction across modalities was also similar. Because paper, tablet, and mobile phone administrations yielded similar results, the choice of technology should be based on factors such as convenience and can even be changed during a study without adversely affecting the comparability of results. ", doi="10.2196/mental.6805", url="http://mental.jmir.org/2017/3/e36/", url="http://www.ncbi.nlm.nih.gov/pubmed/28877861" } @Article{info:doi/10.2196/mental.5453, author="Meuldijk, Denise and Giltay, J. Erik and Carlier, VE Ingrid and van Vliet, M. Irene and van Hemert, M. Albert and Zitman, G. Frans", title="A Validation Study of the Web Screening Questionnaire (WSQ) Compared With the Mini-International Neuropsychiatric Interview-Plus (MINI-Plus)", journal="JMIR Ment Health", year="2017", month="Aug", day="29", volume="4", number="3", pages="e35", keywords="depressive disorders", keywords="anxiety disorders", keywords="surveys and questionnaires", keywords="diagnostic, brief", keywords="clinical practice", abstract="Background: There is a need for brief screening methods for psychiatric disorders in clinical practice. This study assesses the validity and accuracy of a brief self-report screening questionnaire, the Web Screening Questionnaire (WSQ), in detecting psychiatric disorders in a study group comprising the general population and psychiatric outpatients aged 18 years and older. Objective: The aim of this study was to investigate whether the WSQ is an adequate test to screen for the presence of depressive and anxiety disorders in clinical practice. Methods: Participants were 1292 adults (1117 subjects from the general population and 175 psychiatric outpatients), aged 18 to 65 years. The discriminant characteristics of the WSQ were examined in relation to the (``gold standard'') Mini-International Neuropsychiatric Interview-Plus (MINI-Plus) disorders, by means of sensitivity, specificity, area under the curve (AUC), and positive and negative predictive values (PPVs, NPVs). Results: The specificity of the WSQ to individually detect depressive disorders, anxiety disorders, and alcohol abuse or dependence ranged from 0.89 to 0.97 for most disorders, with the exception of post-traumatic stress disorder (0.52) and specific phobia (0.73). The sensitivity values ranged from 0.67 to 1.00, with the exception of depressive disorder (0.56) and alcohol abuse or dependence (0.56). Given the low prevalence of separate disorders in the general population sample, NPVs were extremely high across disorders (?0.97), whereas PPVs were of poor strength (range 0.02-0.33). Conclusions: In this study group, the WSQ was a relatively good screening tool to identify individuals without a depressive or anxiety disorder, as it accurately identified those unlikely to suffer from these disorders (except for post-traumatic stress disorders and specific phobias). However, in case of a positive WSQ screening result, further diagnostic procedures are required. ", doi="10.2196/mental.5453", url="http://mental.jmir.org/2017/3/e35/", url="http://www.ncbi.nlm.nih.gov/pubmed/28851674" } @Article{info:doi/10.2196/mhealth.6333, author="Price, Edward and Moore, George and Galway, Leo and Linden, Mark", title="Validation of a Smartphone-Based Approach to In Situ Cognitive Fatigue Assessment", journal="JMIR Mhealth Uhealth", year="2017", month="Aug", day="17", volume="5", number="8", pages="e125", keywords="mental fatigue", keywords="fatigue", keywords="acquired brain injury", keywords="cognitive tests", keywords="assistive technology", keywords="smartphone", abstract="Background: Acquired Brain Injuries (ABIs) can result in multiple detrimental cognitive effects, such as reduced memory capability, concentration, and planning. These effects can lead to cognitive fatigue, which can exacerbate the symptoms of ABIs and hinder management and recovery. Assessing cognitive fatigue is difficult due to the largely subjective nature of the condition and existing assessment approaches. Traditional methods of assessment use self-assessment questionnaires delivered in a medical setting, but recent work has attempted to employ more objective cognitive tests as a way of evaluating cognitive fatigue. However, these tests are still predominantly delivered within a medical environment, limiting their utility and efficacy. Objective: The aim of this research was to investigate how cognitive fatigue can be accurately assessed in situ, during the quotidian activities of life. It was hypothesized that this assessment could be achieved through the use of mobile assistive technology to assess working memory, sustained attention, information processing speed, reaction time, and cognitive throughput. Methods: The study used a bespoke smartphone app to track daily cognitive performance, in order to assess potential levels of cognitive fatigue. Twenty-one participants with no prior reported brain injuries took place in a two-week study, resulting in 81 individual testing instances being collected. The smartphone app delivered three cognitive tests on a daily basis: (1) Spatial Span to measure visuospatial working memory; (2) Psychomotor Vigilance Task (PVT) to measure sustained attention, information processing speed, and reaction time; and (3) a Mental Arithmetic Test to measure cognitive throughput. A smartphone-optimized version of the Mental Fatigue Scale (MFS) self-assessment questionnaire was used as a baseline to assess the validity of the three cognitive tests, as the questionnaire has already been validated in multiple peer-reviewed studies. Results: The most highly correlated results were from the PVT, which showed a positive correlation with those from the prevalidated MFS, measuring 0.342 (P<.008). Scores from the cognitive tests were entered into a regression model and showed that only reaction time in the PVT was a significant predictor of fatigue (P=.016, F=2.682, 95\% CI 9.0-84.2). Higher scores on the MFS were related to increases in reaction time during our mobile variant of the PVT. Conclusions: The results show that the PVT mobile cognitive test developed for this study could be used as a valid and reliable method for measuring cognitive fatigue in situ. This test would remove the subjectivity associated with established self-assessment approaches and the need for assessments to be performed in a medical setting. Based on our findings, future work could explore delivering a small set of tests with increased duration to further improve measurement reliability. Moreover, as the smartphone assessment tool can be used as part of everyday life, additional sources of data relating to physiological, psychological, and environmental context could be included within the analysis to improve the nature and precision of the assessment process. ", doi="10.2196/mhealth.6333", url="http://mhealth.jmir.org/2017/8/e125/", url="http://www.ncbi.nlm.nih.gov/pubmed/28818818" } @Article{info:doi/10.2196/medinform.6808, author="Kaiser, Tim and Laireiter, Rupert Anton", title="DynAMo: A Modular Platform for Monitoring Process, Outcome, and Algorithm-Based Treatment Planning in Psychotherapy", journal="JMIR Med Inform", year="2017", month="Jul", day="20", volume="5", number="3", pages="e20", keywords="health information management", keywords="mental health", keywords="mental disorders", keywords="psychotherapeutic processes", keywords="algorithms", abstract="Background: In recent years, the assessment of mental disorders has become more and more personalized. Modern advancements such as Internet-enabled mobile phones and increased computing capacity make it possible to tap sources of information that have long been unavailable to mental health practitioners. Objective: Software packages that combine algorithm-based treatment planning, process monitoring, and outcome monitoring are scarce. The objective of this study was to assess whether the DynAMo Web application can fill this gap by providing a software solution that can be used by both researchers to conduct state-of-the-art psychotherapy process research and clinicians to plan treatments and monitor psychotherapeutic processes. Methods: In this paper, we report on the current state of a Web application that can be used for assessing the temporal structure of mental disorders using information on their temporal and synchronous associations. A treatment planning algorithm automatically interprets the data and delivers priority scores of symptoms to practitioners. The application is also capable of monitoring psychotherapeutic processes during therapy and of monitoring treatment outcomes. This application was developed using the R programming language (R Core Team, Vienna) and the Shiny Web application framework (RStudio, Inc, Boston). It is made entirely from open-source software packages and thus is easily extensible. Results: The capabilities of the proposed application are demonstrated. Case illustrations are provided to exemplify its usefulness in clinical practice. Conclusions: With the broad availability of Internet-enabled mobile phones and similar devices, collecting data on psychopathology and psychotherapeutic processes has become easier than ever. The proposed application is a valuable tool for capturing, processing, and visualizing these data. The combination of dynamic assessment and process- and outcome monitoring has the potential to improve the efficacy and effectiveness of psychotherapy. ", doi="10.2196/medinform.6808", url="http://medinform.jmir.org/2017/3/e20/", url="http://www.ncbi.nlm.nih.gov/pubmed/28729233" } @Article{info:doi/10.2196/resprot.7447, author="Penders, M. Thomas and Wuensch, L. Karl and Ninan, T. Philip", title="eMindLog: Self-Measurement of Anxiety and Depression Using Mobile Technology", journal="JMIR Res Protoc", year="2017", month="May", day="24", volume="6", number="5", pages="e98", keywords="mobile", keywords="anxiety", keywords="depression", keywords="internet", keywords="measurement", abstract="Background: Quantifying anxiety and depressive experiences permits individuals to calibrate where they are and monitor intervention-associated changes. eMindLog is a novel self-report measure for anxiety and depression that is grounded in psychology with an organizing structure based on neuroscience. Objective: Our aim was to explore the psychometric properties of eMindLog in a nonclinical sample of subjects. Methods: In a cross-sectional study of eMindLog, a convenience sample of 198 adults provided informed consent and completed eMindLog and the Hospital Anxiety and Depression Scale (HADS) as a reference. Brain systems (eg, negative and positive valence systems, cognitive systems) and their functional states that drive behavior are measured daily as emotions, thoughts, and behaviors. Associated symptoms, quality of life, and functioning are assessed weekly. eMindLog offers ease of use and expediency, using mobile technology across multiple platforms, with dashboard reporting of scores. It enhances precision by providing distinct, nonoverlapping description of terms, and accuracy through guidance for scoring severity. Results: eMindLog daily total score had a Cronbach alpha of .94. Pearson correlation coefficient for eMindLog indexes for anxiety and sadness/anhedonia were r=.66 (P<.001) and r=.62 (P<.001) contrasted with the HADS anxiety and depression subscales respectively. Of 195 subjects, 23 (11.8\%) had cross-sectional symptoms above the threshold for Generalized Anxiety Disorder and 29 (29/195, 14.9\%) for Major Depressive Disorder. Factor analysis supported the theoretically derived index derivatives for anxiety, anger, sadness, and anhedonia. Conclusions: eMindLog is a novel self-measurement tool to measure anxiety and depression, demonstrating excellent reliability and strong validity in a nonclinical population. Further studies in clinical populations are necessary for fuller validation of its psychometric properties. Self-measurement of anxiety and depressive symptoms with precision and accuracy has several potential benefits, including case detection, tracking change over time, efficacy assessment of interventions, and exploration of potential biomarkers. ", doi="10.2196/resprot.7447", url="http://www.researchprotocols.org/2017/5/e98/", url="http://www.ncbi.nlm.nih.gov/pubmed/28539304" } @Article{info:doi/10.2196/jmir.5439, author="Ben-Sasson, Ayelet and Yom-Tov, Elad", title="Online Concerns of Parents Suspecting Autism Spectrum Disorder in Their Child: Content Analysis of Signs and Automated Prediction of Risk", journal="J Med Internet Res", year="2016", month="Nov", day="22", volume="18", number="11", pages="e300", keywords="online queries", keywords="autistic disorders", keywords="parents", keywords="machine learning", keywords="early detection", abstract="Background: Online communities are used as platforms by parents to verify developmental and health concerns related to their child. The increasing public awareness of autism spectrum disorders (ASD) leads more parents to suspect ASD in their child. Early identification of ASD is important for early intervention. Objective: To characterize the symptoms mentioned in online queries posed by parents who suspect that their child might have ASD and determine whether they are age-specific. To test the efficacy of machine learning tools in classifying the child's risk of ASD based on the parent's narrative. Methods: To this end, we analyzed online queries posed by parents who were concerned that their child might have ASD and categorized the warning signs they mentioned according to ASD-specific and non-ASD--specific domains. We then used the data to test the efficacy with which a trained machine learning tool classified the degree of ASD risk. Yahoo Answers, a social site for posting queries and finding answers, was mined for queries of parents asking the community whether their child has ASD. A total of 195 queries were sampled for this study (mean child age=38.0 months; 84.7\% [160/189] boys). Content text analysis of the queries aimed to categorize the types of symptoms described and obtain clinical judgment of the child's ASD-risk level. Results: Concerns related to repetitive and restricted behaviors and interests (RRBI) were the most prevalent (75.4\%, 147/195), followed by concerns related to language (61.5\%, 120/195) and emotional markers (50.3\%, 98/195). Of the 195 queries, 18.5\% (36/195) were rated by clinical experts as low-risk, 30.8\% (60/195) as medium-risk, and 50.8\% (99/195) as high-risk. Risk groups differed significantly (P<.001) in the rate of concerns in the language, social, communication, and RRBI domains. When testing whether an automatic classifier (decision tree) could predict if a query was medium- or high-risk based on the text of the query and the coded symptoms, performance reached an area under the receiver operating curve (ROC) curve of 0.67 (CI 95\% 0.50-0.78), whereas predicting from the text and the coded signs resulted in an area under the curve of 0.82 (0.80-0.86). Conclusions: Findings call for health care providers to closely listen to parental ASD-related concerns, as recommended by screening guidelines. They also demonstrate the need for Internet-based screening systems that utilize parents' narratives using a decision tree questioning method. ", doi="10.2196/jmir.5439", url="http://www.jmir.org/2016/11/e300/", url="http://www.ncbi.nlm.nih.gov/pubmed/27876688" } @Article{info:doi/10.2196/games.5888, author="Lumsden, Jim and Edwards, A. Elizabeth and Lawrence, S. Natalia and Coyle, David and Munaf{\`o}, R. Marcus", title="Gamification of Cognitive Assessment and Cognitive Training: A Systematic Review of Applications and Efficacy", journal="JMIR Serious Games", year="2016", month="Jul", day="15", volume="4", number="2", pages="e11", keywords="gamification", keywords="gamelike", keywords="cognition", keywords="computer games", keywords="review", abstract="Background: Cognitive tasks are typically viewed as effortful, frustrating, and repetitive, which often leads to participant disengagement. This, in turn, may negatively impact data quality and/or reduce intervention effects. However, gamification may provide a possible solution. If game design features can be incorporated into cognitive tasks without undermining their scientific value, then data quality, intervention effects, and participant engagement may be improved. Objectives: This systematic review aims to explore and evaluate the ways in which gamification has already been used for cognitive training and assessment purposes. We hope to answer 3 questions: (1) Why have researchers opted to use gamification? (2) What domains has gamification been applied in? (3) How successful has gamification been in cognitive research thus far? Methods: We systematically searched several Web-based databases, searching the titles, abstracts, and keywords of database entries using the search strategy (gamif* OR game OR games) AND (cognit* OR engag* OR behavi* OR health* OR attention OR motiv*). Searches included papers published in English between January 2007 and October 2015. Results: Our review identified 33 relevant studies, covering 31 gamified cognitive tasks used across a range of disorders and cognitive domains. We identified 7 reasons for researchers opting to gamify their cognitive training and testing. We found that working memory and general executive functions were common targets for both gamified assessment and training. Gamified tests were typically validated successfully, although mixed-domain measurement was a problem. Gamified training appears to be highly engaging and does boost participant motivation, but mixed effects of gamification on task performance were reported. Conclusions: Heterogeneous study designs and typically small sample sizes highlight the need for further research in both gamified training and testing. Nevertheless, careful application of gamification can provide a way to develop engaging and yet scientifically valid cognitive assessments, and it is likely worthwhile to continue to develop gamified cognitive tasks in the future. ", doi="10.2196/games.5888", url="http://games.jmir.org/2016/2/e11/", url="http://www.ncbi.nlm.nih.gov/pubmed/27421244" } @Article{info:doi/10.2196/jmir.5726, author="BinDhim, F. Nasser and Alanazi, M. Eman and Aljadhey, Hisham and Basyouni, H. Mada and Kowalski, R. Stefan and Pont, G. Lisa and Shaman, M. Ahmed and Trevena, Lyndal and Alhawassi, M. Tariq", title="Does a Mobile Phone Depression-Screening App Motivate Mobile Phone Users With High Depressive Symptoms to Seek a Health Care Professional's Help?", journal="J Med Internet Res", year="2016", month="Jun", day="27", volume="18", number="6", pages="e156", keywords="mental health", keywords="depression", keywords="mobile phone", keywords="public health informatics", keywords="patients' screening", abstract="Background: The objective of disease screening is to encourage high-risk subjects to seek health care diagnosis and treatment. Mobile phone apps can effectively screen mental health conditions, including depression. However, it is not known how effective such screening methods are in motivating users to discuss the obtained results of such apps with health care professionals. Does a mobile phone depression-screening app motivate users with high depressive symptoms to seek health care professional advice? This study aimed to address this question. Method: This was a single-cohort, prospective, observational study of a free mobile phone depression app developed in English and released on Apple's App Store. Apple App Store users (aged 18 or above) in 5 countries, that is, Australia, Canada, New Zealand (NZ), the United Kingdom (UK), and the United States (US), were recruited directly via the app's download page. The participants then completed the Patient Health Questionnaire (PHQ-9), and their depression screening score was displayed to them. If their score was 11 or above and they had never been diagnosed with depression before, they were advised to take their results to their health care professional. They were to follow up after 1 month. Results: A group of 2538 participants from the 5 countries completed PHQ-9 depression screening with the app. Of them, 322 participants were found to have high depressive symptoms and had never been diagnosed with depression, and received advice to discuss their results with health care professionals. About 74\% of those completed the follow-up; approximately 38\% of these self-reported consulting their health care professionals about their depression score. Only positive attitude toward depression as a real disease was associated with increased follow-up response rate (odds ratio (OR) 3.2, CI 1.38-8.29). Conclusions: A mobile phone depression-screening app motivated some users to seek a depression diagnosis. However, further study should investigate how other app users use the screening results provided by such apps. ", doi="10.2196/jmir.5726", url="http://www.jmir.org/2016/6/e156/", url="http://www.ncbi.nlm.nih.gov/pubmed/27349441" } @Article{info:doi/10.2196/rehab.4155, author="Gamito, Pedro and Morais, Diogo and Oliveira, Jorge and Ferreira Lopes, Paulo and Picareli, Felipe Lu{\'i}s and Matias, Marcelo and Correia, Sara and Brito, Rodrigo", title="Systemic Lisbon Battery: Normative Data for Memory and Attention Assessments", journal="JMIR Rehabil Assist Technol", year="2016", month="May", day="04", volume="3", number="1", pages="e5", keywords="Systemic Lisbon Battery", keywords="attention", keywords="memory", keywords="cognitive assessment", keywords="virtual reality", abstract="Background: Memory and attention are two cognitive domains pivotal for the performance of instrumental activities of daily living (IADLs). The assessment of these functions is still widely carried out with pencil-and-paper tests, which lack ecological validity. The evaluation of cognitive and memory functions while the patients are performing IADLs should contribute to the ecological validity of the evaluation process. Objective: The objective of this study is to establish normative data from virtual reality (VR) IADLs designed to activate memory and attention functions. Methods: A total of 243 non-clinical participants carried out a paper-and-pencil Mini-Mental State Examination (MMSE) and performed 3 VR activities: art gallery visual matching task, supermarket shopping task, and memory fruit matching game. The data (execution time and errors, and money spent in the case of the supermarket activity) was automatically generated from the app. Results: Outcomes were computed using non-parametric statistics, due to non-normality of distributions. Age, academic qualifications, and computer experience all had significant effects on most measures. Normative values for different levels of these measures were defined. Conclusions: Age, academic qualifications, and computer experience should be taken into account while using our VR-based platform for cognitive assessment purposes. ", doi="10.2196/rehab.4155", url="http://rehab.jmir.org/2016/1/e5/", url="http://www.ncbi.nlm.nih.gov/pubmed/28582246" } @Article{info:doi/10.2196/jmir.4195, author="Nguyen, Phong David and Klein, Britt and Meyer, Denny and Austin, William David and Abbott, M. Jo-Anne", title="The Diagnostic Validity and Reliability of an Internet-Based Clinical Assessment Program for Mental Disorders", journal="J Med Internet Res", year="2015", month="Sep", day="21", volume="17", number="9", pages="e218", keywords="Internet", keywords="online", keywords="mental health", keywords="validity", keywords="reliability", keywords="assessment", keywords="diagnosis", keywords="screening", keywords="anxiety", keywords="depression", abstract="Background: Internet-based assessment has the potential to assist with the diagnosis of mental health disorders and overcome the barriers associated with traditional services (eg, cost, stigma, distance). Further to existing online screening programs available, there is an opportunity to deliver more comprehensive and accurate diagnostic tools to supplement the assessment and treatment of mental health disorders. Objective: The aim was to evaluate the diagnostic criterion validity and test-retest reliability of the electronic Psychological Assessment System (e-PASS), an online, self-report, multidisorder, clinical assessment and referral system. Methods: Participants were 616 adults residing in Australia, recruited online, and representing prospective e-PASS users. Following e-PASS completion, 158 participants underwent a telephone-administered structured clinical interview and 39 participants repeated the e-PASS within 25 days of initial completion. Results: With structured clinical interview results serving as the gold standard, diagnostic agreement with the e-PASS varied considerably from fair (eg, generalized anxiety disorder: $\kappa$=.37) to strong (eg, panic disorder: $\kappa$=.62). Although the e-PASS' sensitivity also varied (0.43-0.86) the specificity was generally high (0.68-1.00). The e-PASS sensitivity generally improved when reducing the e-PASS threshold to a subclinical result. Test-retest reliability ranged from moderate (eg, specific phobia: $\kappa$=.54) to substantial (eg, bulimia nervosa: $\kappa$=.87). Conclusions: The e-PASS produces reliable diagnostic results and performs generally well in excluding mental disorders, although at the expense of sensitivity. For screening purposes, the e-PASS subclinical result generally appears better than a clinical result as a diagnostic indicator. Further development and evaluation is needed to support the use of online diagnostic assessment programs for mental disorders. Trial Registration: Australian and New Zealand Clinical Trials Registry ACTRN121611000704998; http://www.anzctr.org.au/trial\_view.aspx?ID=336143 (Archived by WebCite at http://www.webcitation.org/618r3wvOG). ", doi="10.2196/jmir.4195", url="http://www.jmir.org/2015/9/e218/", url="http://www.ncbi.nlm.nih.gov/pubmed/26392066" } @Article{info:doi/10.2196/mental.3805, author="Khazaal, Yasser and Achab, Sophia and Billieux, Joel and Thorens, Gabriel and Zullino, Daniele and Dufour, Magali and Rothen, St{\'e}phane", title="Factor Structure of the Internet Addiction Test in Online Gamers and Poker Players", journal="JMIR Mental Health", year="2015", month="Apr", day="22", volume="2", number="2", pages="e12", keywords="Internet addiction", keywords="Internet Addiction Test (IAT)", keywords="poker players", keywords="World of Warcraft", keywords="massively multiplayer online role playing", keywords="validation", keywords="factorial structure", abstract="Background: The Internet Addiction Test (IAT) is the most widely used questionnaire to screen for problematic Internet use. Nevertheless, its factorial structure is still debated, which complicates comparisons among existing studies. Most previous studies were performed with students or community samples despite the probability of there being more problematic Internet use among users of specific applications, such as online gaming or gambling. Objective: To assess the factorial structure of a modified version of the IAT that addresses specific applications, such as video games and online poker. Methods: Two adult samples---one sample of Internet gamers (n=920) and one sample of online poker players (n=214)---were recruited and completed an online version of the modified IAT. Both samples were split into two subsamples. Two principal component analyses (PCAs) followed by two confirmatory factor analyses (CFAs) were run separately. Results: The results of principal component analysis indicated that a one-factor model fit the data well across both samples. In consideration of the weakness of some IAT items, a 17-item modified version of the IAT was proposed. Conclusions: This study assessed, for the first time, the factorial structure of a modified version of an Internet-administered IAT on a sample of Internet gamers and a sample of online poker players. The scale seems appropriate for the assessment of such online behaviors. Further studies on the modified 17-item IAT version are needed. ", doi="10.2196/mental.3805", url="http://mental.jmir.org/2015/2/e12/", url="http://www.ncbi.nlm.nih.gov/pubmed/26543917" } @Article{info:doi/10.2196/mental.3889, author="Torous, John and Staples, Patrick and Shanahan, Meghan and Lin, Charlie and Peck, Pamela and Keshavan, Matcheri and Onnela, Jukka-Pekka", title="Utilizing a Personal Smartphone Custom App to Assess the Patient Health Questionnaire-9 (PHQ-9) Depressive Symptoms in Patients With Major Depressive Disorder", journal="JMIR Mental Health", year="2015", month="Mar", day="24", volume="2", number="1", pages="e8", keywords="medical informatics", keywords="mobile health", keywords="depression", abstract="Background: Accurate reporting of patient symptoms is critical for diagnosis and therapeutic monitoring in psychiatry. Smartphones offer an accessible, low-cost means to collect patient symptoms in real time and aid in care. Objective: To investigate adherence among psychiatric outpatients diagnosed with major depressive disorder in utilizing their personal smartphones to run a custom app to monitor Patient Health Questionnaire-9 (PHQ-9) depression symptoms, as well as to examine the correlation of these scores to traditionally administered (paper-and-pencil) PHQ-9 scores. Methods: A total of 13 patients with major depressive disorder, referred by their clinicians, received standard outpatient treatment and, in addition, utilized their personal smartphones to run the study app to monitor their symptoms. Subjects downloaded and used the Mindful Moods app on their personal smartphone to complete up to three survey sessions per day, during which a randomized subset of PHQ-9 symptoms of major depressive disorder were assessed on a Likert scale. The study lasted 29 or 30 days without additional follow-up. Outcome measures included adherence, measured by the percentage of completed survey sessions, and estimates of daily PHQ-9 scores collected from the smartphone app, as well as from the traditionally administered PHQ-9. Results: Overall adherence was 77.78\% (903/1161) and varied with time of day. PHQ-9 estimates collected from the app strongly correlated (r=.84) with traditionally administered PHQ-9 scores, but app-collected scores were 3.02 (SD 2.25) points higher on average. More subjects reported suicidal ideation using the app than they did on the traditionally administered PHQ-9. Conclusions: Patients with major depressive disorder are able to utilize an app on their personal smartphones to self-assess their symptoms of major depressive disorder with high levels of adherence. These app-collected results correlate with the traditionally administered PHQ-9. Scores recorded from the app may potentially be more sensitive and better able to capture suicidality than the traditional PHQ-9. ", doi="10.2196/mental.3889", url="http://mental.jmir.org/2015/1/e8/", url="http://www.ncbi.nlm.nih.gov/pubmed/26543914" } @Article{info:doi/10.2196/jmir.3398, author="Bischof-Kastner, Christina and Kuntsche, Emmanuel and Wolstein, J{\"o}rg", title="Identifying Problematic Internet Users: Development and Validation of the Internet Motive Questionnaire for Adolescents (IMQ-A)", journal="J Med Internet Res", year="2014", month="Oct", day="09", volume="16", number="10", pages="e230", keywords="Internet", keywords="adolescents", keywords="questionnaires", keywords="validation", keywords="addictive behavior", keywords="statistical factor analysis", abstract="Background: Internationally, up to 15.1\% of intensive Internet use among adolescents is dysfunctional. To provide a basis for early intervention and preventive measures, understanding the motives behind intensive Internet use is important. Objective: This study aims to develop a questionnaire, the Internet Motive Questionnaire for Adolescents (IMQ-A), as a theory-based measurement for identifying the underlying motives for high-risk Internet use. More precisely, the aim was to confirm the 4-factor structure (ie, social, enhancement, coping, and conformity motives) as well as its construct and concurrent validity. Another aim was to identify the motivational differences between high-risk and low-risk Internet users. Methods: A sample of 101 German adolescents (female: 52.5\%, 53/101; age: mean 15.9, SD 1.3 years) was recruited. High-risk users (n=47) and low-risk users (n=54) were identified based on a screening measure for online addiction behavior in children and adolescents (Online-Suchtverhalten-Skala, OSVK-S). Here, ``high-risk'' Internet use means use that exceeds the level of intensive Internet use (OSVK-S sum score ?7). Results: The confirmatory factor analysis confirmed the IMQ-A's 4-factor structure. A reliability analysis revealed good internal consistencies of the subscales (.71 up to .86). Moreover, regression analyses confirmed that the enhancement and coping motive groups significantly predicted high-risk Internet consumption and the OSVK-S sum score. A mixed-model ANOVA confirmed that adolescents mainly access the Internet for social motives, followed by enhancement and coping motives, and that high-risk users access the Internet more frequently for coping and enhancement motives than low-risk users. Low-risk users were primarily motivated socially. Conclusions: The IMQ-A enables the assessment of motives related to adolescent Internet use and thus the identification of populations at risk. The questionnaire enables the development of preventive measures or early intervention programs, especially dealing with internal motives of Internet consumption. ", doi="10.2196/jmir.3398", url="http://www.jmir.org/2014/10/e230/", url="http://www.ncbi.nlm.nih.gov/pubmed/25299174" } @Article{info:doi/10.2196/jmir.3511, author="De Beurs, Paul Derek and de Vries, LM Anton and de Groot, H. Marieke and de Keijser, Jos and Kerkhof, JFM Ad", title="Applying Computer Adaptive Testing to Optimize Online Assessment of Suicidal Behavior: A Simulation Study", journal="J Med Internet Res", year="2014", month="Sep", day="11", volume="16", number="9", pages="e207", keywords="suicide", keywords="psychometrics", keywords="computing methodologies", keywords="Internet", keywords="suicidal ideation", keywords="risk assessment", abstract="Background: The Internet is used increasingly for both suicide research and prevention. To optimize online assessment of suicidal patients, there is a need for short, good-quality tools to assess elevated risk of future suicidal behavior. Computer adaptive testing (CAT) can be used to reduce response burden and improve accuracy, and make the available pencil-and-paper tools more appropriate for online administration. Objective: The aim was to test whether an item response--based computer adaptive simulation can be used to reduce the length of the Beck Scale for Suicide Ideation (BSS). Methods: The data used for our simulation was obtained from a large multicenter trial from The Netherlands: the Professionals in Training to STOP suicide (PITSTOP suicide) study. We applied a principal components analysis (PCA), confirmatory factor analysis (CFA), a graded response model (GRM), and simulated a CAT. Results: The scores of 505 patients were analyzed. Psychometric analyses showed the questionnaire to be unidimensional with good internal consistency. The computer adaptive simulation showed that for the estimation of elevation of risk of future suicidal behavior 4 items (instead of the full 19) were sufficient, on average. Conclusions: This study demonstrated that CAT can be applied successfully to reduce the length of the Dutch version of the BSS. We argue that the use of CAT can improve the accuracy and the response burden when assessing the risk of future suicidal behavior online. Because CAT can be daunting for clinicians and applied scientists, we offer a concrete example of our computer adaptive simulation of the Dutch version of the BSS at the end of the paper. ", doi="10.2196/jmir.3511", url="http://www.jmir.org/2014/9/e207/", url="http://www.ncbi.nlm.nih.gov/pubmed/25213259" } @Article{info:doi/10.2196/jmir.2818, author="Hedman, Erik and Lj{\'o}tsson, Brj{\'a}nn and Blom, Kerstin and El Alaoui, Samir and Kraepelien, Martin and R{\"u}ck, Christian and Andersson, Gerhard and Svanborg, Cecilia and Lindefors, Nils and Kaldo, Viktor", title="Telephone Versus Internet Administration of Self-Report Measures of Social Anxiety, Depressive Symptoms, and Insomnia: Psychometric Evaluation of a Method to Reduce the Impact of Missing Data", journal="J Med Internet Res", year="2013", month="Oct", day="18", volume="15", number="10", pages="e229", keywords="Internet", keywords="telephone", keywords="self-report measures", keywords="missing data", keywords="method validation", abstract="Background: Internet-administered self-report measures of social anxiety, depressive symptoms, and sleep difficulties are widely used in clinical trials and in clinical routine care, but data loss is a common problem that could render skewed estimates of symptom levels and treatment effects. One way of reducing the negative impact of missing data could be to use telephone administration of self-report measures as a means to complete the data missing from the online data collection. Objective: The aim of the study was to compare the convergence of telephone and Internet administration of self-report measures of social anxiety, depressive symptoms, and sleep difficulties. Methods: The Liebowitz Social Anxiety Scale-Self-Report (LSAS-SR), Montgomery-{\AA}sberg Depression Rating Scale-Self-Rated (MADRS-S), and the Insomnia Severity Index (ISI) were administered over the telephone and via the Internet to a clinical sample (N=82) of psychiatric patients at a clinic specializing in Internet-delivered treatment. Shortened versions of the LSAS-SR and the ISI were used when administered via telephone. Results: As predicted, the results showed that the estimates produced by the two administration formats were highly correlated (r=.82-.91; P<.001) and internal consistencies were high in both administration formats (telephone: Cronbach alpha=.76-.86 and Internet: Cronbach alpha=.79-.93). The correlation coefficients were similar across questionnaires and the shorter versions of the questionnaires used in the telephone administration of the LSAS-SR and ISI performed in general equally well compared to when the full scale was used, as was the case with the MADRS-S. Conclusions: Telephone administration of self-report questionnaires is a valid method that can be used to reduce data loss in routine psychiatric practice as well as in clinical trials, thereby contributing to more accurate symptom estimates. ", doi="10.2196/jmir.2818", url="http://www.jmir.org/2013/10/e229/", url="http://www.ncbi.nlm.nih.gov/pubmed/24140566" } @Article{info:doi/10.2196/jmir.2935, author="Faraci, Palmira and Craparo, Giuseppe and Messina, Roberta and Severino, Sergio", title="Internet Addiction Test (IAT): Which is the Best Factorial Solution?", journal="J Med Internet Res", year="2013", month="Oct", day="09", volume="15", number="10", pages="e225", keywords="IAT", keywords="Internet", keywords="addiction", keywords="factorial structure", keywords="psychometric properties", keywords="structural validity", abstract="Background: The Internet Addiction Test (IAT) by Kimberly Young is one of the most utilized diagnostic instruments for Internet addiction. Although many studies have documented psychometric properties of the IAT, consensus on the optimal overall structure of the instrument has yet to emerge since previous analyses yielded markedly different factor analytic results. Objective: The objective of this study was to evaluate the psychometric properties of the Italian version of the IAT, specifically testing the factor structure stability across cultures. Methods: In order to determine the dimensional structure underlying the questionnaire, both exploratory and confirmatory factor analyses were performed. The reliability of the questionnaire was computed by the Cronbach alpha coefficient. Results: Data analyses were conducted on a sample of 485 college students (32.3\%, 157/485 males and 67.7\%, 328/485 females) with a mean age of 24.05 years (SD 7.3, range 17-47). Results showed 176/485 (36.3\%) participants with IAT score from 40 to 69, revealing excessive Internet use, and 11/485 (1.9\%) participants with IAT score from 70 to 100, suggesting significant problems because of Internet use. The IAT Italian version showed good psychometric properties, in terms of internal consistency and factorial validity. Alpha values were satisfactory for both the one-factor solution (Cronbach alpha=.91), and the two-factor solution (Cronbach alpha=.88 and Cronbach alpha=.79). The one-factor solution comprised 20 items, explaining 36.18\% of the variance. The two-factor solution, accounting for 42.15\% of the variance, showed 11 items loading on Factor 1 (Emotional and Cognitive Preoccupation with the Internet) and 7 items on Factor 2 (Loss of Control and Interference with Daily Life). Goodness-of-fit indexes (NNFI: Non-Normed Fit Index; CFI: Comparative Fit Index; RMSEA: Root Mean Square Error of Approximation; SRMR: Standardized Root Mean Square Residual) from confirmatory factor analyses conducted on a random half subsample of participants (n=243) were satisfactory in both factorial solutions: two-factor model ($\chi$2132= 354.17, P<.001, $\chi$2/df=2.68, NNFI=.99, CFI=.99, RMSEA=.02 [90\% CI 0.000-0.038], and SRMR=.07), and one-factor model ($\chi$2169=483.79, P<.001, $\chi$2/df=2.86, NNFI=.98, CFI=.99, RMSEA=.02 [90\% CI 0.000-0.039], and SRMR=.07). Conclusions: Our study was aimed at determining the most parsimonious and veridical representation of the structure of Internet addiction as measured by the IAT. Based on our findings, support was provided for both single and two-factor models, with slightly strong support for the bidimensionality of the instrument. Given the inconsistency of the factor analytic literature of the IAT, researchers should exercise caution when using the instrument, dividing the scale into factors or subscales. Additional research examining the cross-cultural stability of factor solutions is still needed. ", doi="10.2196/jmir.2935", url="http://www.jmir.org/2013/10/e225/", url="http://www.ncbi.nlm.nih.gov/pubmed/24184961" }