<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="review-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Ment Health</journal-id>
      <journal-title>JMIR Mental Health</journal-title>
      <issn pub-type="epub">2368-7959</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v13i1e88057</article-id>
      <article-id pub-id-type="pmid">42139691</article-id>
      <article-id pub-id-type="doi">10.2196/88057</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Review</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Review</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Large Language Models and Their Applications in Mental Health: Scoping Review</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Torous</surname>
            <given-names>John</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Okolie</surname>
            <given-names>Awele</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Bhat</surname>
            <given-names>Venkat</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Kurmashev</surname>
            <given-names>Ruslan</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Emekli</surname>
            <given-names>Esra</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Lokadjaja</surname>
            <given-names>Matheus Calvin</given-names>
          </name>
          <degrees>B Eng</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0005-9608-5882</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Kho</surname>
            <given-names>Jordon Junyang</given-names>
          </name>
          <degrees>B Eng</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0009-9620-3649</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Schulz</surname>
            <given-names>Peter Johannes</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4281-489X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Goh</surname>
            <given-names>Wilson Wen Bin</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Lee Kong Chian School of Medicine</institution>
            <institution>Nanyang Technological University</institution>
            <addr-line>59 Nanyang Drive</addr-line>
            <addr-line>Singapore, 636921</addr-line>
            <country>Singapore</country>
            <phone>65 65927871</phone>
            <email>wilsongoh@ntu.edu.sg</email>
          </address>
          <xref rid="aff4" ref-type="aff">4</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <xref rid="aff6" ref-type="aff">6</xref>
          <xref rid="aff7" ref-type="aff">7</xref>
          <xref rid="aff8" ref-type="aff">8</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3863-7501</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Lee Kong Chian School of Medicine</institution>
        <institution>Nanyang Technological University</institution>
        <addr-line>Singapore</addr-line>
        <country>Singapore</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Wee Kim Wee School of Communication and Information</institution>
        <institution>Nanyang Technological University</institution>
        <addr-line>Singapore</addr-line>
        <country>Singapore</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Communication &#38; Media</institution>
        <institution>Ewha Womans University</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Center of AI in Medicine</institution>
        <institution>Nanyang Technological University</institution>
        <addr-line>Singapore</addr-line>
        <country>Singapore</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Center for Biomedical Informatics</institution>
        <institution>Nanyang Technological University</institution>
        <addr-line>Singapore</addr-line>
        <country>Singapore</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>School of Biological Sciences</institution>
        <institution>Nanyang Technological University</institution>
        <addr-line>Singapore</addr-line>
        <country>Singapore</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>Division of Neurology, Department of Brain Sciences</institution>
        <institution>Faculty of Medicine</institution>
        <institution>Imperial College London</institution>
        <addr-line>London, England</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff8">
        <label>8</label>
        <institution>Institute of Mental Health</institution>
        <addr-line>Singapore</addr-line>
        <country>Singapore</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Wilson Wen Bin Goh <email>wilsongoh@ntu.edu.sg</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2026</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>15</day>
        <month>5</month>
        <year>2026</year>
      </pub-date>
      <volume>13</volume>
      <elocation-id>e88057</elocation-id>
      <history>
        <date date-type="received">
          <day>18</day>
          <month>11</month>
          <year>2025</year>
        </date>
        <date date-type="rev-request">
          <day>23</day>
          <month>12</month>
          <year>2025</year>
        </date>
        <date date-type="rev-recd">
          <day>15</day>
          <month>2</month>
          <year>2026</year>
        </date>
        <date date-type="accepted">
          <day>16</day>
          <month>2</month>
          <year>2026</year>
        </date>
      </history>
      <copyright-statement>©Matheus Calvin Lokadjaja, Jordon Junyang Kho, Peter Johannes Schulz, Wilson Wen Bin Goh. Originally published in JMIR Mental Health (https://mental.jmir.org), 15.05.2026.</copyright-statement>
      <copyright-year>2026</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Mental Health, is properly cited. The complete bibliographic information, a link to the original publication on https://mental.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://mental.jmir.org/2026/1/e88057" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Large language models (LLMs) are poised to transform mental health care, offering advanced capabilities in diagnosis, prognosis, and decision support. Since their inception, numerous mental health-focused LLMs have emerged in the scientific literature, reflecting the growing interest in leveraging these models across various clinical applications. With a broad range of models available, diverse optimization strategies, and multiple use cases, reviewing the current landscape is critical to understanding where future impact lies.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to conduct a scoping review investigating the use of LLMs in mental health across diagnostic, prognostic, and decision support tasks.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We screened 3121 papers from PubMed, Scopus, and Web of Science for studies published between January 2023 and October 2025, using terms related to LLM and mental health. After removing duplicates, 2 reviewers (MCL and WWBG) independently screened the studies, with a third (JJK) to resolve conflicting opinions. We extracted and synthesized information on the models, use cases, datasets, and adaptation methods from selected papers.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>In total, 41 papers were selected. Many studies included evaluations on OpenAI’s GPT series applications: GPT-4 (24 studies, 58.5%) and GPT-3.5 (16 studies, 39%). Others included Bidirectional Encoder Representations from Transformers-derived models (9 studies, 22%), LLaMA (8 studies, 19.5%), and RoBERTa-derived models (6 studies, 14.6%). While all studies initially applied out-of-the-box LLMs, several adapted them through few-shot learning or fine-tuning to better align with specific research goals. The most common use case was in diagnostics (31 studies, 75.6%), while the most common target condition was depression (11 studies, 26.8%). While many studies reported superior performance of LLMs, only a minority of studies (13 studies, 31.7%) validated LLM performance against clinician assessments using real patient data, with the majority relying on proxy outcomes such as clinical vignettes, examination questions, or social media posts.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Despite rapid growth and diversity of LLM applications in mental health, the field remains nascent and exploratory. Future developments must emphasize consistent model adaptation procedures to ensure safety and clinical workflow alignment. Models must also be evaluated on robust evaluation criteria by using standardized protocols and real clinical outcome measures.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>artificial intelligence</kwd>
        <kwd>generative AI</kwd>
        <kwd>large language models</kwd>
        <kwd>mental health</kwd>
        <kwd>natural language processing</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The advent of large language models (LLMs) represents a transformative shift in mental health care, offering novel opportunities for diagnosis, prognosis, and decision support. Since the inception of ChatGPT (OpenAI) in November 2022, the field has witnessed rapid advancements with the emergence of specialized, fine-tuned models tailored for health care [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. These developments have sparked significant interest in leveraging LLMs to address longstanding challenges in mental health, such as improving access to care, enhancing diagnostic precision, and personalizing treatment strategies [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref7">7</xref>].</p>
      <p>Early implementations of general-purpose LLMs demonstrated promising potential in mental health-related tasks, including conversational support and preliminary assessments [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>]. Riding on this momentum, researchers have developed domain-specific applications such as MentalBERT, MentaLLaMA, and Mental-LLM, which are fine-tuned from out-of-the-box models using mental health-specific datasets, social media posts, and therapeutic dialogues [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. These efforts aim to inject expert clinical knowledge into LLMs, enabling more nuanced and accurate responses tailored to psychiatric contexts. Additionally, innovative techniques like instruction fine-tuning and chain-of-empathy prompting have been introduced to enhance the reasoning and interaction capabilities of these models, especially in complex emotional and diagnostic scenarios [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref16">16</xref>].</p>
      <p>Similar to other clinical domains, the potential applications of LLMs in mental health are vast and varied. These include assisting clinicians with diagnostic reasoning, predicting disease progression, and even providing direct patient-facing services such as emotional support or psychoeducation [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>]. Furthermore, LLMs could streamline administrative processes in mental health settings by summarizing therapy sessions or generating treatment plans based on evidence-based practices [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. These capabilities hold promise for addressing barriers to mental health care access, particularly for underserved populations [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>].</p>
      <p>Despite these advancements, a significant translational gap exists between research and real-world clinical deployment. Many LLMs are tested on simulated scenarios, vignettes, medical exams, and social media-derived datasets, raising concerns about their reliability and validity in real-world contexts (eg, a model trained on cleaned, idealized data may not function on real-world data, which may contain noise and biases). Furthermore, the implementation pathway and concomitant ethical implications of deploying LLMs in high-stakes environments like psychiatry warrant additional care informed by clear research evidence. Issues such as data privacy, bias in model outputs, and the risk of misinterpretation must be encapsulated into model development, training, and implementation processes. It is unclear how far the field has progressed while accounting for these critical considerations.</p>
      <p>In this scoping review, we assess recent technical developments and the real-world applicability of LLMs in mental health care. We examined model types, tuning methodologies, use case diversity, and evaluation criteria to uncover key trends and highlight approaches that address practical deployment challenges. By identifying where progress is being made, we clarify the fruits of current efforts while also highlighting unresolved conflicts and gaps. We hope this can help AI developers think more critically about how to create more clinically meaningful and ethically sound applications. Simultaneously, applied clinical researchers can use this to raise awareness of various conflicts and synergies based on use case, to help them prioritize deployment testing areas and to implement critical checks, ensuring patient safety and outcomes are prioritized.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Literature Extraction</title>
        <p>We conducted literature extraction on PubMed, Web of Science, and Scopus by using a comprehensive battery of search terms on November 4, 2025. To avoid duplication efforts, one author (MCL) performed the initial literature extraction following search term consensus among all authors.</p>
        <p>The search strategy used a comprehensive set of keywords related to psychiatry and mental health, in combination with terms related to large language models (LLMs). Psychiatry-related terms included “mental health” and “psychiatry,” while LLM-related terms included “large language model” and “LLM.” Specific model names, such as “Gemini,” “GPT,” “Llama,” “Claude,” and “Deepseek,” were added to broaden and refine the search results. Search terms were combined using Boolean operators (AND/OR) and are applied to titles, abstracts, and topic and keyword fields. Further details on the specific search strategies and a full list of search terms are shown in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <p>Studies were eligible if their first online publication date fell between January 1, 2021, and October 31, 2025. For articles published “online ahead of print,” the electronic publication (Epub) date indexed in the database was used to determine eligibility rather than the later print issue date. This approach ensured consistent application of the predefined search window and avoided exclusion of studies that were available online within the eligibility period but assigned to a later print issue. One article [<xref ref-type="bibr" rid="ref23">23</xref>], published online ahead of print on July 10, 2025, was identified during citation checking following the primary search and met the inclusion criteria.</p>
        <p>We include peer-reviewed, nonreview articles (including online-ahead-of-print) published in English without forward and backward citation search. Preprints and unpublished manuscripts were excluded due to a lack of peer-review quality assurance. To maintain focus on real-world applications, we excluded studies that used or generated synthetic data, given the limited confidence in synthetic data within medical research. We also excluded studies where relevance to psychiatry or mental health was peripheral (eg, studies in ethical AI where mental health was only mentioned in passing). Eligible studies must explicitly state that they cover both LLMs and psychiatry or mental health applications.</p>
      </sec>
      <sec>
        <title>Included Studies</title>
        <p>Two independent authors (WWBG and MCL) screened the studies. When there is a difference in opinion, a third independent author (JJK) helped to resolve the difference. A fourth author (PS) acted as an independent methodological assessor to validate processes and workflows. From search parameters, we obtained a corpus of 3121 papers derived from PubMed, Scopus, and Web of Science.</p>
        <p>From these 3121 papers, 993 were eliminated due to duplicates. To focus the review on higher-impact and well-indexed outlets, we excluded 1601 papers published in journals not ranked within the top quartile of their respective subject categories according to our university’s journal tier list, which was compiled using bibliometric data from Journal Citation Reports and Scopus (via SCImago Journal Rank). This step was taken to ensure a focus on studies meeting established bibliometric quality thresholds, rather than as a proxy for methodological rigor. Of the remaining 527 papers, 486 were removed because they do not meet the inclusion criteria. We identified 41 studies [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref59">59</xref>] of interest (<xref ref-type="table" rid="table1">Table 1</xref> for summary of articles).</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Summary of the included studies.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="90"/>
            <col width="80"/>
            <col width="70"/>
            <col width="90"/>
            <col width="90"/>
            <col width="70"/>
            <col width="100"/>
            <col width="90"/>
            <col width="110"/>
            <col width="110"/>
            <col width="100"/>
            <thead>
              <tr valign="bottom">
                <td>Author</td>
                <td>Year</td>
                <td>Use case</td>
                <td>Target condition</td>
                <td>Dataset type</td>
                <td>Modalities</td>
                <td>LLM<sup>a</sup> used</td>
                <td>LLM type</td>
                <td>Fine-tuning status</td>
                <td>Knowledge augmentation</td>
                <td>Country</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Schubert et al [<xref ref-type="bibr" rid="ref24">24</xref>]</td>
                <td>2023</td>
                <td>Diagnosis</td>
                <td>General mental health</td>
                <td>Medical examination</td>
                <td>Text</td>
                <td>GPT-4; GPT-3.5</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—<sup>b</sup></td>
                <td>Germany</td>
              </tr>
              <tr valign="top">
                <td>Watari et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td>
                <td>2023</td>
                <td>Diagnosis</td>
                <td>General mental health</td>
                <td>Medical examination</td>
                <td>Text</td>
                <td>GPT-4</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>Japan</td>
              </tr>
              <tr valign="top">
                <td>Rojas et al [<xref ref-type="bibr" rid="ref25">25</xref>]</td>
                <td>2024</td>
                <td>Diagnosis</td>
                <td>General mental health</td>
                <td>Medical examination</td>
                <td>Text; Visual</td>
                <td>GPT-3.5; GPT-4; GPT-4 with Vision</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>USA</td>
              </tr>
              <tr valign="top">
                <td>Li et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td>
                <td>2024</td>
                <td>Diagnosis</td>
                <td>General mental health</td>
                <td>Medical examination</td>
                <td>Text</td>
                <td>GPT-4; Bard; LLaMA-2</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>Taiwan</td>
              </tr>
              <tr valign="top">
                <td>Herrmann-Werner et al [<xref ref-type="bibr" rid="ref29">29</xref>]</td>
                <td>2024</td>
                <td>Diagnosis</td>
                <td>General mental health</td>
                <td>Medical examination</td>
                <td>Text</td>
                <td>GPT-4</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>Germany</td>
              </tr>
              <tr valign="top">
                <td>Kim et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td>
                <td>2025</td>
                <td>Diagnosis</td>
                <td>General mental health</td>
                <td>Medical examination</td>
                <td>Text; Visual</td>
                <td>Claude 3.5 Sonnet; Gemini 1.5 Pro; GPT-4o</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>South Korea</td>
              </tr>
              <tr valign="top">
                <td>Levkovich and Elyoseph [<xref ref-type="bibr" rid="ref32">32</xref>]</td>
                <td>2023</td>
                <td>Diagnosis</td>
                <td>Suicide</td>
                <td>Vignettes</td>
                <td>Text</td>
                <td>GPT-4; GPT-3.5</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>Israel</td>
              </tr>
              <tr valign="top">
                <td>Levkovich and Elyoseph [<xref ref-type="bibr" rid="ref17">17</xref>]</td>
                <td>2023</td>
                <td>Diagnosis</td>
                <td>Depression</td>
                <td>Vignettes</td>
                <td>Text</td>
                <td>GPT-3.5; GPT-4</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>Israel</td>
              </tr>
              <tr valign="top">
                <td>Gargari et al [<xref ref-type="bibr" rid="ref31">31</xref>]</td>
                <td>2024</td>
                <td>Diagnosis</td>
                <td>Suicide</td>
                <td>Vignettes</td>
                <td>Text</td>
                <td>Aya, GPT-3.5, GPT-4, GPT-3.5 Clinical Assistant (CA), Nemotron, and Nemotron CA</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>RAG<sup>c</sup></td>
                <td>Iran</td>
              </tr>
              <tr valign="top">
                <td>Kim et al [<xref ref-type="bibr" rid="ref33">33</xref>]</td>
                <td>2024</td>
                <td>Diagnosis</td>
                <td>Obsessive-compulsive disorder</td>
                <td>Vignettes</td>
                <td>Text</td>
                <td>GPT-4; LLaMA-3; Gemini-Pro</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>USA</td>
              </tr>
              <tr valign="top">
                <td>Choi et al [<xref ref-type="bibr" rid="ref35">35</xref>]</td>
                <td>2024</td>
                <td>Diagnosis</td>
                <td>Delirium</td>
                <td>Vignettes</td>
                <td>Text</td>
                <td>GPT-3.5; GPT-4</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>USA</td>
              </tr>
              <tr valign="top">
                <td>Wislocki et al [<xref ref-type="bibr" rid="ref34">34</xref>]</td>
                <td>2025</td>
                <td>Diagnosis</td>
                <td>Trauma</td>
                <td>Vignettes</td>
                <td>Text</td>
                <td>Gemini 1.5 Flash; GPT-4o mini, Claude Sonnet; LLama 3</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>USA</td>
              </tr>
              <tr valign="top">
                <td>Ohse et al [<xref ref-type="bibr" rid="ref36">36</xref>]</td>
                <td>2024</td>
                <td>Diagnosis</td>
                <td>Depression</td>
                <td>Clinical dataset</td>
                <td>Text</td>
                <td>GPT-4; GPT3.5; Llama2-13B; BERT</td>
                <td>Encoder-only; Decoder-only</td>
                <td>Fine-tuned; Untuned</td>
                <td>—</td>
                <td>Germany</td>
              </tr>
              <tr valign="top">
                <td>Ghosh et al [<xref ref-type="bibr" rid="ref43">43</xref>]</td>
                <td>2024</td>
                <td>Diagnosis</td>
                <td>Depression</td>
                <td>Clinical dataset</td>
                <td>Text; Audio; Visual</td>
                <td>BERT</td>
                <td>Encoder-only</td>
                <td>Fine-tuned; Untuned</td>
                <td>—</td>
                <td>Australia</td>
              </tr>
              <tr valign="top">
                <td>Sadeghi et al [<xref ref-type="bibr" rid="ref50">50</xref>]</td>
                <td>2024</td>
                <td>Diagnosis</td>
                <td>Depression</td>
                <td>Clinical dataset</td>
                <td>Text; Audio; Visual</td>
                <td>GPT-3.5; DepRoBERTa</td>
                <td>Encoder-only; Decoder-only</td>
                <td>Fine-tuned; Untuned</td>
                <td>—</td>
                <td>Germany</td>
              </tr>
              <tr valign="top">
                <td>Arslan et al [<xref ref-type="bibr" rid="ref52">52</xref>]</td>
                <td>2024</td>
                <td>Diagnosis</td>
                <td>Schizophrenia-spectrum disorders</td>
                <td>Clinical dataset</td>
                <td>Text</td>
                <td>SBERT</td>
                <td>Encoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>Turkey</td>
              </tr>
              <tr valign="top">
                <td>Shi et al [<xref ref-type="bibr" rid="ref38">38</xref>]</td>
                <td>2025</td>
                <td>Diagnosis</td>
                <td>Obsessive-compulsive disorder; Trauma</td>
                <td>Clinical dataset</td>
                <td>Text</td>
                <td>Mental-LLaMa; MentalQLM; GPT-4</td>
                <td>Decoder-only</td>
                <td>Fine-tuned; Untuned</td>
                <td>—</td>
                <td>China</td>
              </tr>
              <tr valign="top">
                <td>Palominos et al [<xref ref-type="bibr" rid="ref49">49</xref>]</td>
                <td>2025</td>
                <td>Diagnosis</td>
                <td>Schizophrenia</td>
                <td>Clinical dataset</td>
                <td>Text</td>
                <td>BERT</td>
                <td>Encoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>Spain</td>
              </tr>
              <tr valign="top">
                <td>Leng et al [<xref ref-type="bibr" rid="ref41">41</xref>]</td>
                <td>2025</td>
                <td>Diagnosis</td>
                <td>Cognitive impairment</td>
                <td>Clinical dataset</td>
                <td>Text</td>
                <td>GPT-4o-mini</td>
                <td>Decoder-only</td>
                <td>Fine-tuned; Untuned</td>
                <td>—</td>
                <td>USA</td>
              </tr>
              <tr valign="top">
                <td>Shin et al [<xref ref-type="bibr" rid="ref37">37</xref>]</td>
                <td>2024</td>
                <td>Diagnosis</td>
                <td>Depression; suicide risk</td>
                <td>Personal data</td>
                <td>Text</td>
                <td>GPT-3.5; GPT-4</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>South Korea</td>
              </tr>
              <tr valign="top">
                <td>van Buchem et al [<xref ref-type="bibr" rid="ref40">40</xref>]</td>
                <td>2024</td>
                <td>Diagnosis</td>
                <td>Depression</td>
                <td>Personal data</td>
                <td>Text</td>
                <td>BERT; RedditBERT</td>
                <td>Encoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>Netherlands</td>
              </tr>
              <tr valign="top">
                <td>Bartal et al [<xref ref-type="bibr" rid="ref51">51</xref>]</td>
                <td>2024</td>
                <td>Diagnosis</td>
                <td>Posttraumatic stress disorder</td>
                <td>Personal data</td>
                <td>Text</td>
                <td>GPT-3.5</td>
                <td>Decoder-only</td>
                <td>Fine-tuned; Untuned</td>
                <td>—</td>
                <td>USA</td>
              </tr>
              <tr valign="top">
                <td>Thomas et al [<xref ref-type="bibr" rid="ref46">46</xref>]</td>
                <td>2025</td>
                <td>Diagnosis</td>
                <td>Suicide ideation and advanced suicidal engagement</td>
                <td>Personal data</td>
                <td>Text</td>
                <td>XLM-RoBERTa-base</td>
                <td>Encoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>Switzerland</td>
              </tr>
              <tr valign="top">
                <td>Chung et al [<xref ref-type="bibr" rid="ref39">39</xref>]</td>
                <td>2025</td>
                <td>Diagnosis</td>
                <td>Depression</td>
                <td>Personal data</td>
                <td>Text</td>
                <td>BERT; BERTopic</td>
                <td>Encoder-only; Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>South Korea</td>
              </tr>
              <tr valign="top">
                <td>Xu et al [<xref ref-type="bibr" rid="ref12">12</xref>]</td>
                <td>2024</td>
                <td>Diagnosis</td>
                <td>General mental health</td>
                <td>Social media</td>
                <td>Text</td>
                <td>Mental-Alpaca; Mental-RoBERTa; Mental-Flan-T5; Flan-T5; Alpaca; BERT; Llama-2; GPT-3.5; GPT-4</td>
                <td>Encoder-only; Decoder-only</td>
                <td>Fine-tuned; Untuned</td>
                <td>—</td>
                <td>USA</td>
              </tr>
              <tr valign="top">
                <td>Dalal et al [<xref ref-type="bibr" rid="ref48">48</xref>]</td>
                <td>2024</td>
                <td>Diagnosis</td>
                <td>Depression</td>
                <td>Social media</td>
                <td>Text</td>
                <td>LongFormer; RoBERTa; BERT; ERNIEv2; MentalBERT; PsychBERT; ClinicalT5; MentalT5; MentalBART; MentaLLAMA</td>
                <td>Encoder-only; Decoder-only</td>
                <td>Fine-tuned; Untuned</td>
                <td>—</td>
                <td>India</td>
              </tr>
              <tr valign="top">
                <td>Bouktif et al [<xref ref-type="bibr" rid="ref47">47</xref>]</td>
                <td>2025</td>
                <td>Diagnosis</td>
                <td>Suicide ideation</td>
                <td>Social media</td>
                <td>Text</td>
                <td>BERT</td>
                <td>Encoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>UAE</td>
              </tr>
              <tr valign="top">
                <td>Esmi et al [<xref ref-type="bibr" rid="ref42">42</xref>]</td>
                <td>2025</td>
                <td>Diagnosis</td>
                <td>Stress</td>
                <td>Social media</td>
                <td>Text</td>
                <td>GPT-4</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>Netherlands</td>
              </tr>
              <tr valign="top">
                <td>Kallstenius et al [<xref ref-type="bibr" rid="ref45">45</xref>]</td>
                <td>2025</td>
                <td>Diagnosis</td>
                <td>General mental health</td>
                <td>Social media</td>
                <td>Text</td>
                <td>GPT-4o-mini</td>
                <td>Decoder-only</td>
                <td>Fine-tuned; Untuned</td>
                <td>—</td>
                <td>Sweden</td>
              </tr>
              <tr valign="top">
                <td>Elyoseph et al [<xref ref-type="bibr" rid="ref53">53</xref>]</td>
                <td>2024</td>
                <td>Prognosis</td>
                <td>Depression</td>
                <td>Vignettes</td>
                <td>Text</td>
                <td>GPT-3.5; GPT-4; Bard; Claude</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>Israel</td>
              </tr>
              <tr valign="top">
                <td>Elyosep and Levkovich [<xref ref-type="bibr" rid="ref54">54</xref>]</td>
                <td>2024</td>
                <td>Prognosis</td>
                <td>Schizophrenia</td>
                <td>Vignettes</td>
                <td>Text</td>
                <td>GPT-3.5;GPT-4; Bard; Claude</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>Israel</td>
              </tr>
              <tr valign="top">
                <td>Lee et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td>
                <td>2024</td>
                <td>Prognosis</td>
                <td>Suicide ideation</td>
                <td>Personal data</td>
                <td>Text</td>
                <td>GPT-4</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>USA</td>
              </tr>
              <tr valign="top">
                <td>Perlis et al [<xref ref-type="bibr" rid="ref59">59</xref>]</td>
                <td>2024</td>
                <td>Decision Support</td>
                <td>Bipolar depression</td>
                <td>Vignettes</td>
                <td>Text</td>
                <td>GPT-4 Turbo</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>RAG</td>
                <td>USA</td>
              </tr>
              <tr valign="top">
                <td>Adhikary et al [<xref ref-type="bibr" rid="ref20">20</xref>]</td>
                <td>2024</td>
                <td>Decision support</td>
                <td>General mental health</td>
                <td>Clinical dataset</td>
                <td>Text</td>
                <td>BART; T5; GPT-2; GPT-Neo; GPT-J; Flan-T5; Mistral; MentalBART; MentalLlama; Llama-2; Phi-2</td>
                <td>Encoder-only; Decoder-only; Encoder-Decoder</td>
                <td>Fine-tuned; Untuned</td>
                <td>—</td>
                <td>India</td>
              </tr>
              <tr valign="top">
                <td>So et al [<xref ref-type="bibr" rid="ref55">55</xref>]</td>
                <td>2024</td>
                <td>Decision Support</td>
                <td>Posttraumatic stress disorder</td>
                <td>Clinical dataset</td>
                <td>Text</td>
                <td>GPT-3.5 Turbo; GPT-4 Turbo</td>
                <td>Decoder-only</td>
                <td>Fine-tuned; Untuned</td>
                <td>—</td>
                <td>South Korea</td>
              </tr>
              <tr valign="top">
                <td>Taylor et al [<xref ref-type="bibr" rid="ref58">58</xref>]</td>
                <td>2024</td>
                <td>Decision Support</td>
                <td>General mental health</td>
                <td>Clinical dataset</td>
                <td>Text</td>
                <td>RoBERTa-base; RoBERTA-base-OHFT; Clinical Longformer</td>
                <td>Encoder-only; Decoder-only</td>
                <td>Fine-tuned; Untuned</td>
                <td>—</td>
                <td>UK</td>
              </tr>
              <tr valign="top">
                <td>Mahbub et al [<xref ref-type="bibr" rid="ref56">56</xref>]</td>
                <td>2025</td>
                <td>Decision support</td>
                <td>Substance use disorder</td>
                <td>Clinical dataset</td>
                <td>Text</td>
                <td>Flan-T5</td>
                <td>Encoder-Decoder</td>
                <td>Untuned</td>
                <td>—</td>
                <td>USA</td>
              </tr>
              <tr valign="top">
                <td>Chen et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td>
                <td>2025</td>
                <td>Decision Support</td>
                <td>General mental health</td>
                <td>Clinical dataset</td>
                <td>Text</td>
                <td>Deepseek R1 Dis-Qwen; Internlm2.5; opt model; gpt-sw3 model; Qwen model</td>
                <td>Decoder-only</td>
                <td>Fine-tuned; Untuned</td>
                <td>—</td>
                <td>China</td>
              </tr>
              <tr valign="top">
                <td>Liu et al [<xref ref-type="bibr" rid="ref57">57</xref>]</td>
                <td>2025</td>
                <td>Decision Support</td>
                <td>Schizophrenia</td>
                <td>Clinical dataset</td>
                <td>Text; Audio; Visual</td>
                <td>Claude 3 Haiku; Gemini 1.0 Pro; GPT-3.5 Turbo</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>Taiwan</td>
              </tr>
              <tr valign="top">
                <td>D'Souza et al [<xref ref-type="bibr" rid="ref30">30</xref>]</td>
                <td>2023</td>
                <td>Diagnosis; Prognosis</td>
                <td>Psychiatry</td>
                <td>Vignettes</td>
                <td>Text</td>
                <td>GPT-3.5</td>
                <td>Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>India</td>
              </tr>
              <tr valign="top">
                <td>Abdullah and Negied [<xref ref-type="bibr" rid="ref44">44</xref>]</td>
                <td>2024</td>
                <td>Diagnosis; Prognosis</td>
                <td>ADHD; anxiety; bipolar; depression</td>
                <td>Social media</td>
                <td>Text</td>
                <td>BERT; RoBERTa; OpenAI GPT; GPT 2</td>
                <td>Encoder-only; Decoder-only</td>
                <td>Untuned</td>
                <td>—</td>
                <td>Egypt</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>LLM: large language model.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>Not available.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>RAG: retrieval-augmented generation.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Use Case Stratification</title>
        <p>Selected studies were divided into three main use cases based on explicit examination of their objectives: diagnosis, prognosis, and decision support. Diagnostic studies leverage LLM capabilities to detect mental health conditions directly from text and evaluate diagnostic performance. Prognostic studies focus on predicting future mental health outcomes with LLMs. Decision support studies examine various LLM-based approaches to assist clinicians in making informed decisions about patient care, such as providing advice on treatment protocols and evidence search.</p>
        <p>Each study was assigned to one or more use-case categories. Studies addressing multiple use cases with comparable emphasis (eg, both diagnostic classification and prognostic prediction) were assigned to each relevant category. These studies will be discussed in their respective subsections to ensure their contribution to each use case are fully captured in the analysis. Consequently, studies were intentionally counted in all applicable categories, and percentage calculations were based on the total number of included studies (N=41) [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref59">59</xref>], with the understanding that category percentages are not mutually exclusive and may exceed 100% when summed. Coding decisions were conducted independently by 2 reviewers (MCL and WWBG), with disagreements resolved through discussion until consensus was reached.</p>
      </sec>
      <sec>
        <title>Data Extraction</title>
        <p>For each study, 2 authors (WWBG and MCL) performed full-text extraction and coded the articles. We extracted the following information from the full-text: study information (author’s name, year of publication, and country of study), LLM use case, what their role is, the target condition, type of dataset, and the modalities, LLM type and configuration, tuning status, and presence of knowledge augmentation. As there were many types of datasets across studies, we defined five types of datasets: medical examinations, vignettes, clinical datasets, personal data, and social media posts. During the extraction process, when there was a difference in opinion, a third independent author (JJK) helped resolve the difference. To ensure protocol was strictly adhered to and in line with best practices, we reviewed processes with an independent senior author (PS).</p>
        <p>Medical examination data were defined as standardized examination materials that include an explicit psychiatric or mental health component. While most studies in this category did not focus exclusively on actual mental health applications, they were included if the evaluated task involved mental health–relevant reasoning or assessment. Given the limited number of studies examining direct LLM applications in mental health, inclusion of such studies aligns with the objective of a scoping review to map the breadth of existing evidence. Moreover, the inclusion of these studies enables comparison of LLM performance across different medical subfields.</p>
        <p>Vignette data are compiled from case narratives presenting clinical scenarios on mental health conditions. Clinical datasets contain real-world patient data recorded by health care professionals (eg, electronic health records [EHRs] and clinical notes). Personal data are directly collected from individuals (eg, diary entries and messages). Social media posts are sourced from social media platforms (eg, Reddit and Facebook). We categorized the LLM tuning status into untuned and fine-tuned. Untuned LLMs are out-of-the-box models not trained in any specific domain. Fine-tuned LLMs are models that have been trained for a mental health-specific task. The country of study was recorded as the country of the corresponding author.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Executive Summary</title>
        <p>Broadly, most studies used applications based on OpenAI’s GPT series: GPT-4 (n=24, 58.5%) [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref19">19</xref>, <xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref31">31</xref>-<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref53">53</xref>-<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref59">59</xref>] and GPT-3.5 (n=16, 39%) [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref35">35</xref>-<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref53">53</xref>-<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref57">57</xref>]. Other popular models include Google’s Bidirectional Encoder Representations from Transformers (BERT) and its derivatives (n=9, 22%) [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref47">47</xref>-<xref ref-type="bibr" rid="ref49">49</xref>], META’s Llama series (n=8, 19.5%) [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref48">48</xref>], and RoBERTa and its derivatives (n=6, 14.6%) [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref58">58</xref>]. The most used LLM type is decoder-only model (n=34, 82.9%) [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref39">39</xref>, <xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref53">53</xref>-<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref57">57</xref>-<xref ref-type="bibr" rid="ref59">59</xref>], followed by encoder-only model (n=14, 34.1%) [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref46">46</xref>-<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref58">58</xref>].</p>
        <p>All studies used out-of-the-box models with about a third also evaluating domain-specific fine-tuned derivatives (n=13, 31.7%) [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref58">58</xref>]. The most common use-case was diagnosis (n=31, 75.6%) [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref52">52</xref>], followed by decision support (n=7, 17.1%) [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref55">55</xref>-<xref ref-type="bibr" rid="ref59">59</xref>]. Among mental conditions, the most studied was depression (n=11, 26.8%) [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref59">59</xref>]. Other conditions studied include attention deficit hyperactivity disorder, obsessive-compulsive disorder (OCD), and suicidal ideation (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Schematic diagram of the studies in this review.</p>
          </caption>
          <graphic xlink:href="mental_v13i1e88057_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Diagnosis Is the Most Common Use Case for LLMs in Mental Health</title>
        <sec>
          <title>Overview</title>
          <p>We divided use cases into three categories: diagnosis, prognosis, and decision support. Diagnosis involves identifying mental health conditions, while prognosis predicts their likely course and outcome. Decision support encompasses tools that help clinicians and patients make informed choices about care. Of the selected studies, most focused on diagnosis where the LLM, usually untuned, was used to diagnose a mental health disorder directly from input (n=31, 75.6%) [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref52">52</xref>].</p>
        </sec>
        <sec>
          <title>Diagnostic Evaluations on Medical Examinations</title>
          <p>Of 31 relevant studies [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref52">52</xref>], 6 [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref29">29</xref>] evaluated LLMs via performance in medical exams. These controlled assessments evaluate the degree of knowledge retrieval in out-of-the-box GPT models, with the expectation that a pretrained model, knowledgeable in the clinical domain, has potential for clinical deployment. Model performance has improved markedly across GPT iterations: When Schubert et al [<xref ref-type="bibr" rid="ref24">24</xref>] compared both GPT-4 and GPT-3.5 to answer neurology board-style exam questions, GPT-4 performance surpassed the performance of GPT-3.5 and humans in the behavioral, cognitive, and psychological categories. This result was also corroborated by Rojas et al [<xref ref-type="bibr" rid="ref25">25</xref>], who showed that GPT-4 and GPT-4V are superior to GPT-3.5 when taking the Chile’s major medical examination, Examen Único Nacional de Conocimientos de Medicina, for the psychiatric portion, they perform well.</p>
          <p>Across multimodel comparisons, GPT consistently outperformed alternative LLMs. Li et al [<xref ref-type="bibr" rid="ref26">26</xref>] showed that GPT-4 was able to pass the 2022 Taiwan Psychiatric Licensing Examination, whereas Bard and Llama-2 failed. When tested for differential diagnosis, it was reported that GPT-4 performed close to an experienced psychiatrist [<xref ref-type="bibr" rid="ref26">26</xref>]. Following the trend of testing the ability of LLM to perform in non-English tests, Kim et al [<xref ref-type="bibr" rid="ref27">27</xref>] showed that GPT-4o, Claude 3.5, and Gemini 1.5 Pro all performed well on taking the Korean medical licensing examination, with GPT-4o and Claude 3.5 outshining the Gemini 1.5 Pro in all categories, including psychiatry.</p>
          <p>However, there are also noteworthy limitations, especially when it comes to hallucinations. Schubert et al [<xref ref-type="bibr" rid="ref24">24</xref>] reported that regardless of GPT version, mistakes and wrong information are presented with complete confidence especially when challenged with higher-order cognitive type problems [<xref ref-type="bibr" rid="ref24">24</xref>]. When Watari et al [<xref ref-type="bibr" rid="ref28">28</xref>] compared the performance of GPT-4 and the average Japanese medical resident when taking the Japanese General Medicine In-Training Examination, they found that while GPT-4 outperformed the average medical resident, the medical resident outperformed GPT-4 in the category of “psychiatry” [<xref ref-type="bibr" rid="ref28">28</xref>]. Furthermore, compared to other clinical categories, GPT-4 performed worst in “psychiatry,” demonstrating the specific domain challenge pertinent to psychiatry and mental health. Moreover, there is also the issue of black boxes and catastrophic forgetting. Herrmann-Werner et al [<xref ref-type="bibr" rid="ref29">29</xref>] pointed out that although GPT-4 passed the exam with more than 90% accuracy in the evaluation of clinical diagnostics, when GPT-4 was incorrect, the algorithm showed that they were unable to “remember” or “understand” the context of the problem [<xref ref-type="bibr" rid="ref29">29</xref>]. Similarly, casting doubt on real-world deployment, Li et al [<xref ref-type="bibr" rid="ref26">26</xref>] also posit that while GPT-4 can pass the Taiwan Psychiatric Licensing Examination, it is also noted that it performs worse compared to experienced psychiatrists. Such unexpected failure can have important implications regarding patient safety, clinical efficacy, and even liabilities in real-world deployment.</p>
        </sec>
        <sec>
          <title>Diagnostic Evaluations on Vignettes</title>
          <p>Vignettes are another important evaluation scenario, with the key difference being that vignettes take the form of unstructured narratives. Seven selected studies evaluated LLMs on clinical vignettes. Here, the LLMs are evaluated for their ability to apply the knowledge that they learned by extracting relevant information from vignettes to form and justify their diagnosis.</p>
          <p>Here, LLMs generally display good performance. Franco D'Souza et al [<xref ref-type="bibr" rid="ref30">30</xref>] used 100 cases of psychiatry vignettes to evaluate GPT-3.5, reporting exceptional performance, especially in forming management strategies and diagnoses from the scenario. Gargari et al [<xref ref-type="bibr" rid="ref31">31</xref>] also showed that GPT-4, GPT-3.5, and GPT-3.5 with RAG can perform well in diagnostic tasks when presented with 20 clinical vignettes. In some cases, LLMs can perform as well or better than mental health professionals as shown by Levkovich and Elyoseph [<xref ref-type="bibr" rid="ref32">32</xref>] who compared GPT-3.5 and GPT-4 with mental health professionals in the case of clinical vignettes related to suicide, and by Kim et al [<xref ref-type="bibr" rid="ref33">33</xref>] who compared GPT-4, LLaMA-3, and Gemini-Pro with mental health professionals in the case of OCD <italic>DSM-5</italic> (<italic>Diagnostic and Statistical Manual of Mental Disorders, Fifth Edition</italic>) clinical casebook, respectively.</p>
          <p>Another key advantage was reported by Levkovich and Elyoseph [<xref ref-type="bibr" rid="ref17">17</xref>], where they evaluated GPT-3.5 and GPT-4 for their evaluation in depression vignette cases and found that they are in line with official guidelines. But more than that, they also found that when compared against physicians, LLMs displayed no biases with regard to the gender or socioeconomic status of the patient [<xref ref-type="bibr" rid="ref17">17</xref>]. Similarly, Wislocki et al [<xref ref-type="bibr" rid="ref34">34</xref>] found that the LLMs that they used (Gemini 1.5 Flash, GPT-4o mini, Claude Sonnet, and Meta Llama 3) demonstrated less trauma-related diagnostic overshadowing bias when presented with vignette about OCD and substance abuse symptoms. This is an important result, seeing as issues of model discrimination and inequity are often raised as reasons for non-adoption.</p>
          <p>However, there are limitations. In Levkovich and Elyoseph’s study with suicide vignettes, GPT-3.5 tended to underestimate the risk of suicide [<xref ref-type="bibr" rid="ref32">32</xref>]. In their work with depression vignette cases, both GPT-3.5 and GPT-4 showed difficulty in assessing vignettes containing forbidden content (sexual violence), which violate rules of use. Choi et al [<xref ref-type="bibr" rid="ref35">35</xref>], while showing that GPT-3.5 and GPT-4 can work well in delirium vignette cases, found that there is difficulty for the models to comprehend complex use cases, especially when tasked to justify their thought processes. Gargari et al [<xref ref-type="bibr" rid="ref31">31</xref>] also cautioned that the models performed worse with specific disorders such as cyclothymic and disruptive mood dysregulation disorders. They also reported that GPT-4 and GPT-3.5 outperformed AYA and Nemotron models, which may highlight a performance difference between open-source and proprietary models.</p>
        </sec>
        <sec>
          <title>Diagnostic Evaluations on Clinical Datasets, Personal Data, and Social Media Posts</title>
          <p>Unlike medical examinations and vignettes, texts from clinical datasets, personal data, and social media posts tend to be written in a freestyle manner (ie, no specific formatting) and contain nonclinical terms. We found 18 studies [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref52">52</xref>] evaluating LLMs on these data types and exploring novel frameworks to enhance diagnostic performance and clinical explainability.</p>
          <p>Most studies reported that fine-tuning is crucial for higher diagnostic performance. Ohse et al [<xref ref-type="bibr" rid="ref36">36</xref>] compared out-of-the-box GPT-4, GPT-3.5, Llama2-13B, and BERT in diagnosing depression from clinical patient interviews. They reported that while GPT-4 was the best-performing untuned model, it did not outperform fine-tuned GPT-3.5. Separately, Shin et al [<xref ref-type="bibr" rid="ref37">37</xref>] reported that fine-tuned GPT-3.5 performed best in diagnosing depression and suicide risk from diary texts as compared to untuned GPT-4 and GPT-3.5. Similarly, Xu et al [<xref ref-type="bibr" rid="ref12">12</xref>] and Shi et al [<xref ref-type="bibr" rid="ref38">38</xref>] demonstrated that fine-tuned LLMs outperformed out-of-the-box LLMs in detecting various mental health conditions from Reddit, Twitter, and other social media posts. Chung et al [<xref ref-type="bibr" rid="ref39">39</xref>] also found that BERT model using the Patient Health Questionnaire-9 (PHQ-9) as a benchmark can screen clinical depression from text messages with high performance. However, van Buchem et al [<xref ref-type="bibr" rid="ref40">40</xref>] noted that performance increase from fine-tuning is not always significant. They did not find a significant increase in diagnostic accuracy between BERT and fine-tuned RedditBERT in detecting depression from patient messages to caregivers.</p>
          <p>What seemed to work is by adding self-evaluating steps to enhance reliability of the LLM, as shown by Leng et al [<xref ref-type="bibr" rid="ref41">41</xref>], who made a framework for the classification of stages of cognitive impairment from EHR. Another method to improve LLM performance is by altering the prompt – prompt engineering – as shown by Esmi et al [<xref ref-type="bibr" rid="ref42">42</xref>] who provide hints in the prompt, which made GPT-4 to surpass domain-specific models (MentalQLM and Mental-RoBERTa) in detecting stress from social media data.</p>
          <p>Most studies reported that LLMs achieved higher diagnostic accuracies than traditional machine learning (ML) models when applied to the same task. Van Buchem et al [<xref ref-type="bibr" rid="ref40">40</xref>] reported that BERT and RedditBERT outperformed logistic regressors and support vector machines (SVMs) in diagnosing depression from patient messages and were able to qualitatively provide explanations. Ghosh et al [<xref ref-type="bibr" rid="ref43">43</xref>] demonstrated that BERT-based models performed best in diagnosing depression from clinical interviews as compared to random forests, SVMs, convolutional neural networks (CNNs), and long short-term memory (LSTM) networks. Furthermore, Abdullah and Negied found that BERT, RoBERTa, GPT, and GPT-2 outperformed ML models in diagnosing attention deficit hyperactivity disorder, anxiety, bipolar disorder, and depression from Reddit posts in clinical subreddits [<xref ref-type="bibr" rid="ref44">44</xref>]. However, this notion is challenged by Kallstenius et al [<xref ref-type="bibr" rid="ref45">45</xref>] who compared GPT-4o, out-of-the-box and fine-tuned, against traditional ML and NLP methods with advanced feature engineering. They found that traditional ML performed better against fine-tuned and the out-of-the-box LLM.</p>
          <p>There have also been efforts to merge LLMs and ML models into hybrid models to improve overall diagnostic performance. For instance, Thomas et al [<xref ref-type="bibr" rid="ref46">46</xref>] developed an LLM-multilayer perceptron (LLM-MLP) model with XLM-RoBERTa-base as an encoder to detect suicide risk from a German crisis helpline dataset, and found that it outperformed word2vector-MLP, a non-LLM hybrid model. Similarly, Bouktif et al [<xref ref-type="bibr" rid="ref47">47</xref>] reported that combining BERT with CNNs and LSTM networks improved the detection of suicidal ideation from Reddit posts. Besides integration with deep learning models, LLMs have been used as building blocks in more complex frameworks to enhance explainability and diagnostic performance. For example, Dalal et al [<xref ref-type="bibr" rid="ref48">48</xref>] developed a BERT-based model infused with PHQ-9 lexicon and reported that it outperformed BERT and mental health-specific BERT models in diagnosing depression from Reddit posts. Palominos et al [<xref ref-type="bibr" rid="ref49">49</xref>] used small LLMs such as BERT and sentence Transformers to make sentence embeddings that are then used to make a single composite index that can reliably classify schizophrenia spectrum disorders and track their symptoms over time.</p>
          <p>Several studies have also leveraged LLMs to extract input features for ML diagnostic models. Sadeghi et al [<xref ref-type="bibr" rid="ref50">50</xref>] used GPT-3.5 and DepRoBERTa to extract depression severity features from E-DAIC interview transcripts. These features were evaluated on SVMs and were able to detect depression with good performance. Bartal et al [<xref ref-type="bibr" rid="ref51">51</xref>] generated text embeddings from text-embedding-ada-2002 from unstructured written text narratives. They evaluated these embeddings on a neural network model to detect childbirth-related posttraumatic stress disorder and found that the model outperformed GPT-3.5’s zero-shot and few-shot classifications. Going beyond English-language datasets, Arslan et al [<xref ref-type="bibr" rid="ref52">52</xref>] demonstrated the versatility of sentence-transformer models by using SBERT on speech samples from Turkish-speaking patients to detect schizophrenia-spectrum disorders. By generating embeddings from interview transcripts and applying traditional ML classifiers, they achieved high diagnostic accuracy, showcasing the adaptability of LLM-based approaches across languages and clinical conditions.</p>
          <p>However, a common challenge remains when applying LLMs for diagnostic role on unstructured data. Xu et al [<xref ref-type="bibr" rid="ref12">12</xref>] noted that the LLM struggled with processing complex contextual sentences, especially given the nonspecific nature of social media posts, which may limit generalizability across different population groups. Furthermore, Esmi et al [<xref ref-type="bibr" rid="ref42">42</xref>] mentioned that there is no established method to systematically evaluate the soundness of the reasoning of the LLM output when prompted. In the same vein, Abdullah and Negied [<xref ref-type="bibr" rid="ref44">44</xref>] also highlighted that while LLMs outperformed machine learning classifiers on clinical subreddit content, traditional ML classifiers outperformed LLMs when presented with nonclinical subreddit content. This underscores the importance of data context and source in model performance. On a more fundamental level, Ohse et al [<xref ref-type="bibr" rid="ref36">36</xref>] highlighted that open-source LLMs tend to have an older knowledge cutoff compared to their closed-source counterparts, which may affect their clinical use. In the field of mental health, where privacy and data protection are key concerns, this performance gap between open and closed-sourced models may prove to be an obstacle to widespread adoptability. Additionally, van Buchem et al [<xref ref-type="bibr" rid="ref40">40</xref>] brought up the presence of bias, which can be seen by the variations in performance across different patient groups, especially with regard to racial and ethnic background.</p>
        </sec>
      </sec>
      <sec>
        <title>Prognosis</title>
        <p>Five studies [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref54">54</xref>] covered the use of LLMs in prognosis. These covers scenarios involving using AI to predict likely course and outcome of mental health diseases across a variety of data landscapes, including vignettes, personal data, and social media posts.</p>
        <p>In a prognostic role, while generally LLMs performed well, there are nuances. When answering clinical vignettes about prognosis, Franco D'Souza et al [<xref ref-type="bibr" rid="ref30">30</xref>] reported that GPT-4 achieved high performance, attaining the highest grade for most of the vignettes. This high performance was also reported by Elyoseph et al [<xref ref-type="bibr" rid="ref53">53</xref>] when they used GPT-4, Bard, and Claude to analyze clinical vignettes about long-term outcome of major depressive disorder and found their predictions were comparable to experts; by Elyoseph and Levkovich [<xref ref-type="bibr" rid="ref54">54</xref>], again using GPT-4, Bard, and Claude to analyze clinical vignettes related to long-term outcomes of schizophrenia and also found their predictions to be comparable to experts; and by Lee et al [<xref ref-type="bibr" rid="ref19">19</xref>] using GPT-4 to predict future mental health crisis using telehealth data and showed that GPT-4 performed comparably to expert clinicians. Moreover, GPT-4 can extract relevant risk indicators, which can explain their thought process and build trust for LLM predictions.</p>
        <p>However, there are caveats in their findings. Other than GPT-4, Bard, and Claude, Elyoseph et al [<xref ref-type="bibr" rid="ref53">53</xref>] also evaluated GPT-3.5 and found that its predictions were more pessimistic than experts. Elyoseph and Levkovich [<xref ref-type="bibr" rid="ref54">54</xref>] similarly evaluated GPT-3.5 and found that its predictions were more pessimistic than experts. Lee et al [<xref ref-type="bibr" rid="ref19">19</xref>] observed that GPT-4 has a high number of false positives compared to clinicians [<xref ref-type="bibr" rid="ref19">19</xref>]. GPT-4 also performed particularly well when clinicians showed high agreement in their assessments, suggesting that GPT-4 is stronger in extracting obvious, widely recognized clinical signs but may perform less reliably on more ambiguous or nuanced cases. This argument is supported by the findings of Abdullah and Negied [<xref ref-type="bibr" rid="ref44">44</xref>] who compared between ML and ensemble learning classifiers and 4 out-of-the-box LLMs: BERT, RoBERTa, OpenAI GPT, and GPT-2 in predicting future mental disorders. They found that ML and ensemble learning classifiers outperformed the LLM models.</p>
      </sec>
      <sec>
        <title>Decision Support</title>
        <p>Seven studies [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref55">55</xref>-<xref ref-type="bibr" rid="ref59">59</xref>] covered the use of LLMs in decision support where AI is used to assist in informing decisions about a patient’s care to the clinicians. These applications fall into two main categories: information extraction and summarization, and direct clinical recommendation.</p>
        <p>One approach to information extraction involves automatic identification of relevant and critical information from lengthy exchanges between patient and doctor in real time. Adhikary et al [<xref ref-type="bibr" rid="ref20">20</xref>] evaluated an array of LLMs–BART, T5, GPT-2, GPT-Neo, GPT-J, FLAN T5, Mistral, MentalBart, MentalLlama, Llama 2, and Phi 2-in summarizing mental health counseling sessions which may enable faster decision-making by providing concise, relevant session highlights [<xref ref-type="bibr" rid="ref20">20</xref>]. They found that domain-specific LLMs (MentalBart and MentalLlama) outperformed general-purpose models. Similarly, when summarizing psychiatric interviews for symptoms delineation, So et al [<xref ref-type="bibr" rid="ref55">55</xref>] found that domain-specific model GPT-3.5 performed better than general-purpose GPT-4. Even so, with out-of-the-box Flan-T5 model, Mahbub et al [<xref ref-type="bibr" rid="ref56">56</xref>] demonstrated that Flan-T5 outperformed rule-based regular expressions in extracting key information from substance use disorder clinical notes. This can be attributed to LLMs being able to understand nuanced and diverse expressions in the clinical notes. This ability also extends to non-English transcripts shown in the study by Liu et al [<xref ref-type="bibr" rid="ref57">57</xref>], which used LLM to evaluate the motivation and pleasure domain of negative symptoms. The LLMs such as Claude 3-Haiku, Gemini-10 Pro, and GPT-3.5 Turbo were used to extract key information from Chinese interview transcripts, provide severity score and reasoning behind it, which can then be used by the clinician to base their decision.</p>
        <p>For a direct clinical recommendation approach, Taylor et al [<xref ref-type="bibr" rid="ref58">58</xref>] fine-tuned multiple BERT-based language models–TinyBERT, MobileBERT, DistilBERT, BERT–in triaging patients from the National Health Service electronic health record (NHS EHR) dataset with general mental health conditions. They found that these fine-tuned models achieved high accuracy and outperformed the larger language models, such as LLama 2-7B, when computational resource is limited. Similarly, Chen et al [<xref ref-type="bibr" rid="ref23">23</xref>] showed that lightweight LLM is able to consume medical data from electroencephalograms. With the information obtained, the LLM can generate emotional states of the patient and suggest diagnostic and treatment suggestions. Domain-specific model superiority was also reported by Perlis et al [<xref ref-type="bibr" rid="ref59">59</xref>], where their prompt-augmented GPT-4 outperformed the base model in analyzing bipolar disorder clinical vignettes and coming up with their own recommendation, it even outperformed community clinicians.</p>
        <p>However, there are notable concerns. While LLMs reported stellar quantitative performance, Adhikary et al [<xref ref-type="bibr" rid="ref20">20</xref>] also conducted qualitative assessments and consultations with key stakeholders such as the health care professionals. It was agreed that LLMs are not yet reliable enough for clinical deployment, particularly due to issues where LLMs struggle to distinguish clinical details from therapist interpretations and sometimes miss critical nuances such as suicide risk. This insufficiency highlights that some LLMs currently lack an understanding of emotions, nuances, and subtexts, which are important in real-world conversations. This may partly be attributed to older models being used in their study. In contrast, So et al [<xref ref-type="bibr" rid="ref55">55</xref>] with a newer model, GPT-3.5, demonstrated that the LLM can be tuned to a high degree of granularity, allowing it to pick out crucial information, and even pinpoint specific utterances associated with psychiatric symptoms. Another persisting concern that was aired is hallucination. Mahbub et al [<xref ref-type="bibr" rid="ref56">56</xref>] reported LLMs’ tendency to hallucinate when confronting notes with more than one substance use disorder condition with varying severity, as well as their limited context window. Chen et al [<xref ref-type="bibr" rid="ref23">23</xref>] mentioned that larger language model has a tendency to be less precise than smaller language model in their terminology for specific domain context.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Our scoping review identified several studies that show predominantly positive findings regarding LLM performance relative to traditional ML and deep learning approaches, and for some even exceeding mental health professionals. However, these findings should be interpreted cautiously. Publication bias likely skews the literature toward positive results, as studies suggesting otherwise are unlikely to be published in today’s AI-centric age, hence potentially skewing the publication toward positive findings.</p>
        <p>Despite this limitation, we do see many interesting implementations and use cases for LLMs that hold potential. In some studies, they used a combination of LLM models, and even combined LLMs with traditional AI and ML models (eg, CNN and LSTM) to achieve even higher performance. This seems to be a powerful approach, and could be explored further, especially with greater technical depth. This was also done to augment the capability of the LLM in processing not only textual data but also other modalities. Most studies (n=37, 90.2%) [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref44">44</xref>-<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref51">51</xref>-<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref58">58</xref>,<xref ref-type="bibr" rid="ref59">59</xref>] focused only on textual content, but in mental health, we should also consider the importance of sentiments and emotions, which can be picked up in other modalities, such as audio and visual. An emotionally aware AI may be more well-suited to pick up aberrations in behaviors, which also enhances mental health detection capabilities. While the benefit of domain-specific fine-tuning has previously been reported in other fields [<xref ref-type="bibr" rid="ref60">60</xref>-<xref ref-type="bibr" rid="ref62">62</xref>], only about a third (n=13, 31.7%) [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref58">58</xref>] used fine-tuned model. This may be because model fine-tuning uses significant computational resources, which may not be readily available to those research teams.</p>
        <p>We noted the proliferation of models and use cases, reflecting the inherently complex and heterogeneous nature of both technological and clinical landscapes in mental health AI. However, this diversity also exposes several critical concerns. Notably, there is significant variability in approaches to model fine-tuning and a lack of consistency in the diversity and representation of training data. These disparities may risk perpetuating access inequalities and under-representation, particularly among vulnerable or non-Western populations, which may ultimately impact the generalizability and fairness of these models.</p>
        <p>Moreover, the absence of standardized testing and evaluation frameworks further exacerbates these challenges. Inconsistent methodologies can hinder repeatability and reproducibility, undermining trust in model performance and potentially leading to unintended harm to patients. Inconsistent data (from different sources), language, dataset specificity, and confounding can also affect model performance. This fragmentation highlights the urgent need for the development and adoption of standardized protocols for model tuning, testing, and evaluation—spanning the entire lifecycle from initial development to real-world implementation.</p>
        <p>Current works still rely too much on simple performance metrics. However, LLM research also needs to investigate how to understand and evaluate LLM explanations, ensuring they are acceptable to clinicians and patients, supporting their potential for real-world clinical decision support. The development of explainable models that can act as a domain-level contrast to LLMs will be useful and provide a new level of much-needed scalability, as it is impractical to keep relying on health care professionals to test-gauge LLM and other AI models, taking away time that was supposed to be saved (in theory) by AI.</p>
        <p>Placing patients at the center of this technological transformation requires not only technical rigor but also a commitment to equity, transparency, and safety. Establishing robust, standardized frameworks is essential to ensure that AI-driven tools in mental health are both effective and ethically deployed, ultimately safeguarding patient well-being and promoting broader access to high-quality mental health care. We have not seen research that actively involved patient advocacy and involvement of patient perspectives. Moving beyond the technical realm, and actively engaging patients as the center stakeholder is critical and may even accelerate AI adoption in mental health care institutions. However, this requires institutional and even social mindset changes, which is a big topic unto itself.</p>
        <p>We also noted a greater preference for using closed-source LLMs (eg, OpenAI’s GPT) in current studies, possibly due to their superior performance and exposure to larger and more recent datasets as compared to open-source LLMs. However, closed-source LLMs pose an inherent risk of data leakage, which can jeopardize patient privacy and confidentiality. To address data privacy challenges and increase receptivity to LLM deployment in the clinical community, future studies should also consider and evaluate open-source LLMs.</p>
        <p>Thus, future developments must emphasize multimodal synergies (between LLMs and deep AI and ML), standardize development and testing, enhance explainability, and conduct deeper investigations into implementation and deployment practices that engage patients, centering on their well-being.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this scoping review, LLMs have demonstrated considerable potential to transform aspects of mental health care. However, current implementations remain predominantly experimental and qualify as preliminary proof-of-concept studies. While many studies reported superior performance of LLMs, these studies are characterized by risk of publication bias as well as heterogeneous study designs. Critically, only a minority of studies (n=13, 31.7%) [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref55">55</xref>-<xref ref-type="bibr" rid="ref58">58</xref>] validated LLM performance against clinician assessments using real patient data, with the majority relying on proxy outcomes such as clinical vignettes, exam questions, or social media posts. This validation gap potentially overestimates clinical use and limits generalizability to real-world practice. Currently, model tuning is the predominant method for training LLMs toward specific tasks, but this needs proper standardization and guidelines to ensure repeatability and reproducibility. Moreover, common frameworks for model evaluation to ensure safety and efficacy must precede implementation. In future works, prioritizing patient well-being as the paramount principle must remain throughout technology development, testing, and operational deployment.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Specific search strategies and the full list of search terms.</p>
        <media xlink:href="mental_v13i1e88057_app1.docx" xlink:title="DOCX File , 25 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>PRISMA-ScR checklist.</p>
        <media xlink:href="mental_v13i1e88057_app2.docx" xlink:title="DOCX File , 85 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">DSM-5</term>
          <def>
            <p>Diagnostic and Statistical Manual of Mental Disorders, Fifth Edition</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">LSTM</term>
          <def>
            <p>long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">OCD</term>
          <def>
            <p>obsessive-compulsive disorder</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">PHQ-9</term>
          <def>
            <p>Patient Health Questionnaire-9</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <notes>
      <title>Funding</title>
      <p>This research was supported by the National Medical Research Council (NMRC), Singapore, under its Population Health Research Grant (PHRG) scheme, Project PHRGOC24jul-0026. This research is supported by the Ministry of Education, Singapore, under its Academic Research Fund Tier 1 (RS08/21 and RT11/21).</p>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>MCL performed the literature search, analyzed data, and contributed toward content and figure development. JJK co-wrote the methods and results sections. PS provided keen insights on methodological rigor and research positioning while also acting as an independent process and protocol evaluator, and co-wrote the manuscript. WWBG conceptualized, organized content, synthesized findings, supervised, and wrote the manuscript. All authors reviewed and approved the final manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Busch</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffmann</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rueger</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>van Dijk</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Kader</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ortiz-Prado</surname>
              <given-names>E</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Current applications and challenges in large language models for patient care: a systematic review</article-title>
          <source>Commun Med (Lond)</source>
          <year>2025</year>
          <month>01</month>
          <day>21</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>26</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s43856-024-00717-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s43856-024-00717-2</pub-id>
          <pub-id pub-id-type="medline">39838160</pub-id>
          <pub-id pub-id-type="pii">10.1038/s43856-024-00717-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC11751060</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Clusmann</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kolbinger</surname>
              <given-names>FR</given-names>
            </name>
            <name name-style="western">
              <surname>Muti</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Carrero</surname>
              <given-names>ZI</given-names>
            </name>
            <name name-style="western">
              <surname>Eckardt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Laleh</surname>
              <given-names>NG</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>The future landscape of large language models in medicine</article-title>
          <source>Commun Med (Lond)</source>
          <year>2023</year>
          <month>10</month>
          <day>10</day>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>141</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s43856-023-00370-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s43856-023-00370-1</pub-id>
          <pub-id pub-id-type="medline">37816837</pub-id>
          <pub-id pub-id-type="pii">10.1038/s43856-023-00370-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC10564921</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pomerantz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cole</surname>
              <given-names>BH</given-names>
            </name>
            <name name-style="western">
              <surname>Watts</surname>
              <given-names>BV</given-names>
            </name>
            <name name-style="western">
              <surname>Weeks</surname>
              <given-names>WB</given-names>
            </name>
          </person-group>
          <article-title>Improving efficiency and access to mental health care: combining integrated care and advanced access</article-title>
          <source>Gen Hosp Psychiatry</source>
          <year>2008</year>
          <volume>30</volume>
          <issue>6</issue>
          <fpage>546</fpage>
          <lpage>51</lpage>
          <pub-id pub-id-type="doi">10.1016/j.genhosppsych.2008.09.004</pub-id>
          <pub-id pub-id-type="medline">19061681</pub-id>
          <pub-id pub-id-type="pii">S0163-8343(08)00165-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>Health (UK) NCC for M</collab>
          </person-group>
          <source>Common Mental Health Disorders: Identification and Pathways to Care</source>
          <year>2011</year>
          <publisher-loc>Leicester (UK)</publisher-loc>
          <publisher-name>British Psychological Society</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moggia</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lutz</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Brakemeier</surname>
              <given-names>EL</given-names>
            </name>
            <name name-style="western">
              <surname>Bickman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Treatment personalization and precision mental health care: where are we and where do we want to go?</article-title>
          <source>Adm Policy Ment Health</source>
          <year>2024</year>
          <month>09</month>
          <volume>51</volume>
          <issue>5</issue>
          <fpage>611</fpage>
          <lpage>616</lpage>
          <pub-id pub-id-type="doi">10.1007/s10488-024-01407-w</pub-id>
          <pub-id pub-id-type="medline">39172281</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10488-024-01407-w</pub-id>
          <pub-id pub-id-type="pmcid">PMC11379769</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rosenfeld</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Benrimoh</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Armstrong</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mirchi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Langlois-Therrien</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rollins</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Big data analytics and AI in mental healthcare</article-title>
          <source>arXiv</source>
          <year>2019</year>
          <month>3</month>
          <day>12</day>
          <fpage>1</fpage>
          <lpage>30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1903.12071"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/b978-0-12-820203-6.00001-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alegría</surname>
              <given-names>Margarita</given-names>
            </name>
            <name name-style="western">
              <surname>Nakash</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>NeMoyer</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Increasing equity in access to mental health care: a critical first step in improving service quality</article-title>
          <source>World Psychiatry</source>
          <year>2018</year>
          <month>03</month>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>43</fpage>
          <lpage>44</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29352534"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/wps.20486</pub-id>
          <pub-id pub-id-type="medline">29352534</pub-id>
          <pub-id pub-id-type="pmcid">PMC5775117</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Shang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>SouLLMate: an adaptive LLM-driven system for advanced mental health support and assessment, based on a systematic application survey</article-title>
          <source>arXiv</source>
          <year>2025</year>
          <month>09</month>
          <day>19</day>
          <fpage>1</fpage>
          <lpage>10</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2410.11859"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2410.11859</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>ChatCounselor: a large language models for mental health support</article-title>
          <source>arXiv</source>
          <year>2025</year>
          <month>09</month>
          <day>27</day>
          <fpage>1</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2309.15461"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2309.15461</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Chat, summary and diagnosis: a LLM - enhanced conversational agent for interactive depression detection</article-title>
          <year>2024</year>
          <conf-name>Proceed4th International Conference on Industrial Automation, Robotics and Control Engineering (IARCE)</conf-name>
          <conf-date>November 15-17, 2024</conf-date>
          <conf-loc>Chengdu, China</conf-loc>
          <publisher-name>IEEE</publisher-name>
          <fpage>343</fpage>
          <lpage>348</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/abstract/document/10936542"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/iarce64300.2024.00070</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kuang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ananiadou</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>MentaLLaMA: interpretable mental health analysis on social media with large language models</article-title>
          <year>2024</year>
          <conf-name>Proceedings of the ACM Web Conference</conf-name>
          <conf-date>June 29-July 3, 2026</conf-date>
          <conf-loc>Dubai, United Arab Emirates</conf-loc>
          <fpage>4489</fpage>
          <lpage>4500</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.1145/3589334.3648137"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3589334.3648137</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gabriel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hendler</surname>
              <given-names>J</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Mental-LLM: leveraging large language models for mental health prediction via online text data</article-title>
          <source>Proc ACM Interact Mob Wearable Ubiquitous Technol</source>
          <year>2024</year>
          <month>03</month>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>32</lpage>
          <pub-id pub-id-type="doi">10.1145/3643540</pub-id>
          <pub-id pub-id-type="medline">39925940</pub-id>
          <pub-id pub-id-type="pii">31</pub-id>
          <pub-id pub-id-type="pmcid">PMC11806945</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ansari</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tiwari</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Cambria</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>MentalBERT: publicly available pretrained language models for mental healthcare</article-title>
          <year>2026</year>
          <conf-name>Proceedings of the Thirteenth Language Resources and Evaluation Conference</conf-name>
          <conf-date>April 19, 2026</conf-date>
          <conf-loc>Marseille, France</conf-loc>
          <publisher-loc>In</publisher-loc>
          <publisher-name>European Language Resources Association</publisher-name>
          <fpage>7184</fpage>
          <lpage>7190</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2022.lrec-1.778/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yoon-Kyung</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sowon</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Seoyeon</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Inju</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Minjung</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Enhancing empathic reasoning of large language models based on psychotherapy models for AI-assisted social support</article-title>
          <source>Korean Journal of Cognitive Science</source>
          <year>2024</year>
          <volume>35</volume>
          <issue>1</issue>
          <fpage>23</fpage>
          <lpage>48</lpage>
          <pub-id pub-id-type="doi">10.19066/cogsci.2024.35.1.002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>HW</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Longpre</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zoph</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Tay</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Fedus</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Scaling instruction-finetuned language models</article-title>
          <source>arXiv</source>
          <year>2022</year>
          <month>10</month>
          <day>20</day>
          <fpage>1</fpage>
          <lpage>54</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2210.11416"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2210.11416</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Logeswaran</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Exploring the benefits of training expert language models over instruction tuning</article-title>
          <year>2023</year>
          <conf-name>Proceedings of the 40th International Conference on Machine Learning</conf-name>
          <conf-date>2023 July 23-29</conf-date>
          <conf-loc>Honolulu, Hawaii, USA</conf-loc>
          <publisher-loc>Honolulu, Hawaii, USA</publisher-loc>
          <publisher-name>JMLR.org</publisher-name>
          <fpage>14702</fpage>
          <lpage>14729</lpage>
          <pub-id pub-id-type="doi">https://dl.acm.org/doi/abs/10.5555/3618408.3619008</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Levkovich</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Elyoseph</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Identifying depression and its determinants upon initiating treatment: ChatGPT versus primary care physicians</article-title>
          <source>Fam Med Community Health</source>
          <year>2023</year>
          <volume>11</volume>
          <issue>4</issue>
          <fpage>e002391</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://fmch.bmj.com/lookup/pmidlookup?view=long&#38;pmid=37844967"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/fmch-2023-002391</pub-id>
          <pub-id pub-id-type="medline">37844967</pub-id>
          <pub-id pub-id-type="pii">fmch-2023-002391</pub-id>
          <pub-id pub-id-type="pmcid">PMC10582915</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Na</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>CBT-LLM: a Chinese large language model for cognitive behavioral therapy-based mental health question answering</article-title>
          <source>arXiv</source>
          <year>2024</year>
          <month>03</month>
          <day>24</day>
          <fpage>1</fpage>
          <lpage>11</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2403.16008"/>
          </comment>
          <pub-id pub-id-type="doi">10.63317/3bgf5o6ze4k6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mohebbi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>O'Callaghan</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Winsberg</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Large language models versus expert clinicians in crisis prediction among telemental health patients: comparative study</article-title>
          <source>JMIR Ment Health</source>
          <year>2024</year>
          <month>08</month>
          <day>02</day>
          <volume>11</volume>
          <fpage>e58129</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mental.jmir.org/2024//e58129/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/58129</pub-id>
          <pub-id pub-id-type="medline">38876484</pub-id>
          <pub-id pub-id-type="pii">v11i1e58129</pub-id>
          <pub-id pub-id-type="pmcid">PMC11329850</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Adhikary</surname>
              <given-names>PK</given-names>
            </name>
            <name name-style="western">
              <surname>Srivastava</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Manuja</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gopinath</surname>
              <given-names>JK</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Exploring the efficacy of large language models in summarizing mental health counseling sessions: benchmark study</article-title>
          <source>JMIR Ment Health</source>
          <year>2024</year>
          <month>07</month>
          <day>23</day>
          <volume>11</volume>
          <fpage>e57306</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mental.jmir.org/2024//e57306/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/57306</pub-id>
          <pub-id pub-id-type="medline">39042893</pub-id>
          <pub-id pub-id-type="pii">v11i1e57306</pub-id>
          <pub-id pub-id-type="pmcid">PMC11303879</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Balli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Doğan</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Eser</surname>
              <given-names>HY</given-names>
            </name>
          </person-group>
          <article-title>Improving psychiatry services with artificial intelligence: opportunities and challenges</article-title>
          <source>Turk Psikiyatri Derg</source>
          <year>2024</year>
          <volume>35</volume>
          <issue>4</issue>
          <fpage>317</fpage>
          <lpage>328</lpage>
          <pub-id pub-id-type="doi">10.5080/u27604</pub-id>
          <pub-id pub-id-type="medline">39783807</pub-id>
          <pub-id pub-id-type="pmcid">PMC11681275</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pearson</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Hines-Martin</surname>
              <given-names>VP</given-names>
            </name>
            <name name-style="western">
              <surname>Evans</surname>
              <given-names>LK</given-names>
            </name>
            <name name-style="western">
              <surname>York</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Kane</surname>
              <given-names>CF</given-names>
            </name>
            <name name-style="western">
              <surname>Yearwood</surname>
              <given-names>EL</given-names>
            </name>
          </person-group>
          <article-title>Addressing gaps in mental health needs of diverse, at-risk, underserved, and disenfranchised populations: a call for nursing action</article-title>
          <source>Arch Psychiatr Nurs</source>
          <year>2015</year>
          <month>03</month>
          <volume>29</volume>
          <issue>1</issue>
          <fpage>14</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1016/j.apnu.2014.09.004</pub-id>
          <pub-id pub-id-type="medline">25634869</pub-id>
          <pub-id pub-id-type="pii">S0883-9417(14)00139-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>Y</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>EEG emotion copilot: optimizing lightweight LLMs for emotional EEG interpretation with assisted medical record generation</article-title>
          <source>Neural Netw</source>
          <year>2025</year>
          <month>12</month>
          <volume>192</volume>
          <fpage>107848</fpage>
          <pub-id pub-id-type="doi">10.1016/j.neunet.2025.107848</pub-id>
          <pub-id pub-id-type="medline">40683189</pub-id>
          <pub-id pub-id-type="pii">S0893-6080(25)00728-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schubert</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Wick</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Venkataramani</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Performance of large language models on a neurology board-style examination</article-title>
          <source>JAMA Netw Open</source>
          <year>2023</year>
          <month>12</month>
          <day>01</day>
          <volume>6</volume>
          <issue>12</issue>
          <fpage>e2346721</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38060223"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2023.46721</pub-id>
          <pub-id pub-id-type="medline">38060223</pub-id>
          <pub-id pub-id-type="pii">2812620</pub-id>
          <pub-id pub-id-type="pmcid">PMC10704278</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rojas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rojas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Burgess</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Toro-Pérez</surname>
              <given-names>Javier</given-names>
            </name>
            <name name-style="western">
              <surname>Salehi</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Exploring the performance of ChatGPT versions 3.5, 4, and 4 with vision in the Chilean medical licensing examination: observational study</article-title>
          <source>JMIR Med Educ</source>
          <year>2024</year>
          <month>04</month>
          <day>29</day>
          <volume>10</volume>
          <fpage>e55048</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2024//e55048/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/55048</pub-id>
          <pub-id pub-id-type="medline">38686550</pub-id>
          <pub-id pub-id-type="pii">v10i1e55048</pub-id>
          <pub-id pub-id-type="pmcid">PMC11082432</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kao</surname>
              <given-names>YC</given-names>
            </name>
            <name name-style="western">
              <surname>Tsai</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>YM</given-names>
            </name>
            <name name-style="western">
              <surname>Yeh</surname>
              <given-names>TC</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>CS</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Comparing the performance of ChatGPT GPT-4, Bard, and Llama-2 in the Taiwan psychiatric licensing examination and in differential diagnosis with multi-center psychiatrists</article-title>
          <source>Psychiatry Clin Neurosci</source>
          <year>2024</year>
          <month>06</month>
          <volume>78</volume>
          <issue>6</issue>
          <fpage>347</fpage>
          <lpage>352</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://onlinelibrary.wiley.com/doi/10.1111/pcn.13656"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/pcn.13656</pub-id>
          <pub-id pub-id-type="medline">38404249</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Jung</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>HS</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Performance evaluation of large language models on Korean medical licensing examination: a three-year comparative analysis</article-title>
          <source>Sci Rep</source>
          <year>2025</year>
          <month>10</month>
          <day>15</day>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>36082</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-025-20066-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-025-20066-x</pub-id>
          <pub-id pub-id-type="medline">41094133</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-025-20066-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC12528361</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Watari</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Takagi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sakaguchi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Nishizaki</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shimizu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yamamoto</surname>
              <given-names>Y</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Performance comparison of ChatGPT-4 and Japanese medical residents in the general medicine in-training examination: comparison study</article-title>
          <source>JMIR Med Educ</source>
          <year>2023</year>
          <month>12</month>
          <day>06</day>
          <volume>9</volume>
          <fpage>e52202</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2023//e52202/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/52202</pub-id>
          <pub-id pub-id-type="medline">38055323</pub-id>
          <pub-id pub-id-type="pii">v9i1e52202</pub-id>
          <pub-id pub-id-type="pmcid">PMC10733815</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Herrmann-Werner</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Festl-Wietek</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Holderried</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Herschbach</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Griewatz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Masters</surname>
              <given-names>K</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Assessing ChatGPT's mastery of Bloom's taxonomy using psychosomatic medicine exam questions: mixed-methods study</article-title>
          <source>J Med Internet Res</source>
          <year>2024</year>
          <month>01</month>
          <day>23</day>
          <volume>26</volume>
          <fpage>e52113</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2024//e52113/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/52113</pub-id>
          <pub-id pub-id-type="medline">38261378</pub-id>
          <pub-id pub-id-type="pii">v26i1e52113</pub-id>
          <pub-id pub-id-type="pmcid">PMC10848129</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Franco D'Souza</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Amanullah</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mathew</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Surapaneni</surname>
              <given-names>KM</given-names>
            </name>
          </person-group>
          <article-title>Appraising the performance of ChatGPT in psychiatry using 100 clinical case vignettes</article-title>
          <source>Asian J Psychiatr</source>
          <year>2023</year>
          <month>11</month>
          <volume>89</volume>
          <fpage>103770</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ajp.2023.103770</pub-id>
          <pub-id pub-id-type="medline">37812998</pub-id>
          <pub-id pub-id-type="pii">S1876-2018(23)00326-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gargari</surname>
              <given-names>OK</given-names>
            </name>
            <name name-style="western">
              <surname>Fatehi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Mohammadi</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Firouzabadi</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Shafiee</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Habibi</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Diagnostic accuracy of large language models in psychiatry</article-title>
          <source>Asian J Psychiatr</source>
          <year>2024</year>
          <month>10</month>
          <volume>100</volume>
          <fpage>104168</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ajp.2024.104168</pub-id>
          <pub-id pub-id-type="medline">39111087</pub-id>
          <pub-id pub-id-type="pii">S1876-2018(24)00261-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Levkovich</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Elyoseph</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Suicide risk assessments through the eyes of ChatGPT-3.5 versus ChatGPT-4: vignette study</article-title>
          <source>JMIR Ment Health</source>
          <year>2023</year>
          <month>09</month>
          <day>20</day>
          <volume>10</volume>
          <fpage>e51232</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mental.jmir.org/2023//e51232/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/51232</pub-id>
          <pub-id pub-id-type="medline">37728984</pub-id>
          <pub-id pub-id-type="pii">v10i1e51232</pub-id>
          <pub-id pub-id-type="pmcid">PMC10551796</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Leonte</surname>
              <given-names>KG</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Torous</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Linos</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Pinto</surname>
              <given-names>A</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Large language models outperform mental and medical health care professionals in identifying obsessive-compulsive disorder</article-title>
          <source>NPJ Digit Med</source>
          <year>2024</year>
          <month>07</month>
          <day>19</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>193</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-024-01181-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-024-01181-x</pub-id>
          <pub-id pub-id-type="medline">39030292</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-024-01181-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC11271579</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wislocki</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Sami</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liberzon</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zalta</surname>
              <given-names>AK</given-names>
            </name>
          </person-group>
          <article-title>Comparing generative artificial intelligence and mental health professionals for clinical decision-making with trauma-exposed populations: vignette-based experimental study</article-title>
          <source>JMIR Ment Health</source>
          <year>2025</year>
          <month>10</month>
          <day>14</day>
          <volume>12</volume>
          <fpage>e80801</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mental.jmir.org/2025//e80801/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/80801</pub-id>
          <pub-id pub-id-type="medline">41086458</pub-id>
          <pub-id pub-id-type="pii">v12i1e80801</pub-id>
          <pub-id pub-id-type="pmcid">PMC12527320</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>YK</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Fick</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Shulman</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shrestha</surname>
              <given-names>P</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Optimizing ChatGPT's interpretation and reporting of delirium assessment outcomes: exploratory study</article-title>
          <source>JMIR Form Res</source>
          <year>2024</year>
          <month>10</month>
          <day>01</day>
          <volume>8</volume>
          <fpage>e51383</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://formative.jmir.org/2024//e51383/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/51383</pub-id>
          <pub-id pub-id-type="medline">39353189</pub-id>
          <pub-id pub-id-type="pii">v8i1e51383</pub-id>
          <pub-id pub-id-type="pmcid">PMC11480687</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ohse</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hadžić</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Mohammed</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Peperkorn</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Danner</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yorita</surname>
              <given-names>A</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Zero-shot strike: testing the generalisation capabilities of out-of-the-box LLM models for depression detection</article-title>
          <source>Computer Speech &#38; Language</source>
          <year>2024</year>
          <month>11</month>
          <volume>88</volume>
          <fpage>101663</fpage>
          <pub-id pub-id-type="doi">10.1016/j.csl.2024.101663</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jung</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Using large language models to detect depression from user-generated diary text data as a novel approach in digital mental health screening: instrument validation study</article-title>
          <source>J Med Internet Res</source>
          <year>2024</year>
          <month>09</month>
          <day>18</day>
          <volume>26</volume>
          <fpage>e54617</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2024//e54617/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/54617</pub-id>
          <pub-id pub-id-type="medline">39292502</pub-id>
          <pub-id pub-id-type="pii">v26i1e54617</pub-id>
          <pub-id pub-id-type="pmcid">PMC11447422</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>PZ</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>E</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>MentalQLM: a lightweight large language model for mental healthcare based on instruction tuning and dual LoRA modules</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2025</year>
          <month>08</month>
          <day>01</day>
          <volume>PP</volume>
          <fpage>1</fpage>
          <lpage>12</lpage>
          <pub-id pub-id-type="doi">10.1109/JBHI.2025.3594133</pub-id>
          <pub-id pub-id-type="medline">40748801</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Hwang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>M</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>BERT and BERTopic for screening clinical depression on open-ended text messages collected through a mobile application from older adults</article-title>
          <source>BMC Public Health</source>
          <year>2025</year>
          <month>06</month>
          <day>10</day>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>2161</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcpublichealth.biomedcentral.com/articles/10.1186/s12889-025-23337-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12889-025-23337-4</pub-id>
          <pub-id pub-id-type="medline">40495126</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12889-025-23337-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC12150497</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Buchem</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>de Hond</surname>
              <given-names>AAH</given-names>
            </name>
            <name name-style="western">
              <surname>Fanconi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Schuessler</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kant</surname>
              <given-names>I</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Applying natural language processing to patient messages to identify depression concerns in cancer patients</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2024</year>
          <month>10</month>
          <day>01</day>
          <volume>31</volume>
          <issue>10</issue>
          <fpage>2255</fpage>
          <lpage>2262</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocae188</pub-id>
          <pub-id pub-id-type="medline">39018490</pub-id>
          <pub-id pub-id-type="pii">7715991</pub-id>
          <pub-id pub-id-type="pmcid">PMC11413442</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Leng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Amini</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Magdamo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Paschalidis</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Mukerji</surname>
              <given-names>SS</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>A GPT-4o-powered framework for identifying cognitive impairment stages in electronic health records</article-title>
          <source>NPJ Digit Med</source>
          <year>2025</year>
          <month>07</month>
          <day>03</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>401</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-025-01834-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-025-01834-5</pub-id>
          <pub-id pub-id-type="medline">40610683</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-025-01834-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC12229571</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Esmi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Shahbahrami</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nabati</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Rezaei</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gaydadjiev</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>de Jonge</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Stress detection through prompt engineering with a general-purpose LLM</article-title>
          <source>Acta Psychol (Amst)</source>
          <year>2025</year>
          <month>10</month>
          <volume>260</volume>
          <fpage>105462</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0001-6918(25)00775-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.actpsy.2025.105462</pub-id>
          <pub-id pub-id-type="medline">40882316</pub-id>
          <pub-id pub-id-type="pii">S0001-6918(25)00775-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ghosh</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Karande</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Gite</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pradhan</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Psychological disorder detection: a multimodal approach using a transformer-based hybrid model</article-title>
          <source>MethodsX</source>
          <year>2024</year>
          <month>12</month>
          <volume>13</volume>
          <fpage>102976</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2215-0161(24)00427-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.mex.2024.102976</pub-id>
          <pub-id pub-id-type="medline">39430783</pub-id>
          <pub-id pub-id-type="pii">S2215-0161(24)00427-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC11490908</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abdullah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Negied</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Detection and prediction of future mental disorder from social media data using machine learning, ensemble learning, and large language models</article-title>
          <source>IEEE Access</source>
          <year>2024</year>
          <volume>12</volume>
          <fpage>120553</fpage>
          <lpage>120569</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2024.3406469</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kallstenius</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Capusan</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Andersson</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Williamson</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Comparing traditional natural language processing and large language models for mental health status classification: a multi-model evaluation</article-title>
          <source>Sci Rep</source>
          <year>2025</year>
          <month>07</month>
          <day>06</day>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>24102</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-025-08031-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-025-08031-0</pub-id>
          <pub-id pub-id-type="medline">40619512</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-025-08031-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC12230148</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lucht</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Segler</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wundrack</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Miché</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lieb</surname>
              <given-names>R</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>An explainable artificial intelligence text classifier for suicidality prediction in youth crisis text line users: development and validation study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2025</year>
          <month>01</month>
          <day>29</day>
          <volume>11</volume>
          <fpage>e63809</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2025//e63809/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/63809</pub-id>
          <pub-id pub-id-type="medline">39879608</pub-id>
          <pub-id pub-id-type="pii">v11i1e63809</pub-id>
          <pub-id pub-id-type="pmcid">PMC11822322</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bouktif</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Khanday</surname>
              <given-names>AMUD</given-names>
            </name>
            <name name-style="western">
              <surname>Ouni</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Explainable predictive model for suicidal ideation during COVID-19: social media discourse study</article-title>
          <source>J Med Internet Res</source>
          <year>2025</year>
          <month>01</month>
          <day>17</day>
          <volume>27</volume>
          <fpage>e65434</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2025//e65434/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/65434</pub-id>
          <pub-id pub-id-type="medline">39823631</pub-id>
          <pub-id pub-id-type="pii">v27i1e65434</pub-id>
          <pub-id pub-id-type="pmcid">PMC11786132</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dalal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tilwani</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gaur</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jain</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shalin</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Sheth</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>A cross attention approach to diagnostic explainability using clinical practice guidelines for depression</article-title>
          <source>IEEE Journal of Biomedical and Health Informatics</source>
          <year>2024</year>
          <fpage>1</fpage>
          <lpage>11</lpage>
          <pub-id pub-id-type="doi">10.36227/techrxiv.170723261.14729994/v1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Palominos</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kirdun</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nikzad</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Spilka</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Homan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sommer</surname>
              <given-names>IE</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>A single composite index of semantic behavior tracks symptoms of psychosis over time</article-title>
          <source>Schizophr Res</source>
          <year>2025</year>
          <month>05</month>
          <volume>279</volume>
          <fpage>116</fpage>
          <lpage>127</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0920-9964(25)00111-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.schres.2025.03.038</pub-id>
          <pub-id pub-id-type="medline">40187184</pub-id>
          <pub-id pub-id-type="pii">S0920-9964(25)00111-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC12288044</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sadeghi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Richer</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Egger</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Schindler-Gmelch</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rupp</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Rahimi</surname>
              <given-names>F</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Harnessing multimodal approaches for depression detection using large language models and facial expressions</article-title>
          <source>Npj Ment Health Res</source>
          <year>2024</year>
          <month>12</month>
          <day>23</day>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>66</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s44184-024-00112-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s44184-024-00112-8</pub-id>
          <pub-id pub-id-type="medline">39715786</pub-id>
          <pub-id pub-id-type="pii">10.1038/s44184-024-00112-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC11666580</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bartal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jagodnik</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dekel</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>AI and narrative embeddings detect PTSD following childbirth via birth stories</article-title>
          <source>Sci Rep</source>
          <year>2024</year>
          <month>04</month>
          <day>11</day>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>8336</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-024-54242-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-024-54242-2</pub-id>
          <pub-id pub-id-type="medline">38605073</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-024-54242-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC11009279</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arslan</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kizilay</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Verim</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Demirlek</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dokuyan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Turan</surname>
              <given-names>YE</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Automated linguistic analysis in speech samples of Turkish-speaking patients with schizophrenia-spectrum disorders</article-title>
          <source>Schizophr Res</source>
          <year>2024</year>
          <month>05</month>
          <volume>267</volume>
          <fpage>65</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1016/j.schres.2024.03.014</pub-id>
          <pub-id pub-id-type="medline">38518480</pub-id>
          <pub-id pub-id-type="pii">S0920-9964(24)00114-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elyoseph</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Levkovich</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Shinan-Altman</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Assessing prognosis in depression: comparing perspectives of AI models, mental health professionals and the general public</article-title>
          <source>Fam Med Community Health</source>
          <year>2024</year>
          <month>01</month>
          <day>09</day>
          <volume>12</volume>
          <issue>Suppl 1</issue>
          <fpage>e002583</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://fmch.bmj.com/lookup/pmidlookup?view=long&#38;pmid=38199604"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/fmch-2023-002583</pub-id>
          <pub-id pub-id-type="medline">38199604</pub-id>
          <pub-id pub-id-type="pii">fmch-2023-002583</pub-id>
          <pub-id pub-id-type="pmcid">PMC10806564</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elyoseph</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Levkovich</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Comparing the perspectives of generative AI, mental health experts, and the general public on schizophrenia recovery: case vignette study</article-title>
          <source>JMIR Ment Health</source>
          <year>2024</year>
          <month>03</month>
          <day>18</day>
          <volume>11</volume>
          <fpage>e53043</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mental.jmir.org/2024//e53043/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/53043</pub-id>
          <pub-id pub-id-type="medline">38533615</pub-id>
          <pub-id pub-id-type="pii">v11i1e53043</pub-id>
          <pub-id pub-id-type="pmcid">PMC11004608</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>So</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Na</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>J</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Aligning large language models for enhancing psychiatric interviews through symptom delineation and summarization: pilot study</article-title>
          <source>JMIR Form Res</source>
          <year>2024</year>
          <month>10</month>
          <day>24</day>
          <volume>8</volume>
          <fpage>e58418</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://formative.jmir.org/2024//e58418/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/58418</pub-id>
          <pub-id pub-id-type="medline">39447159</pub-id>
          <pub-id pub-id-type="pii">v8i1e58418</pub-id>
          <pub-id pub-id-type="pmcid">PMC11544339</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mahbub</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dams</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Srinivasan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rizy</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Danciu</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Trafton</surname>
              <given-names>J</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Decoding substance use disorder severity from clinical notes using a large language model</article-title>
          <source>Npj Ment Health Res</source>
          <year>2025</year>
          <month>03</month>
          <day>07</day>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>5</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s44184-024-00114-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s44184-024-00114-6</pub-id>
          <pub-id pub-id-type="medline">39915681</pub-id>
          <pub-id pub-id-type="pii">10.1038/s44184-024-00114-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC11802718</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>YH</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>MY</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>YA</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Analyzing generative AI and machine learning in auto-assessing schizophrenia's negative symptoms</article-title>
          <source>Schizophr Bull</source>
          <year>2025</year>
          <month>08</month>
          <day>13</day>
          <fpage>sbaf102</fpage>
          <pub-id pub-id-type="doi">10.1093/schbul/sbaf102</pub-id>
          <pub-id pub-id-type="medline">40801756</pub-id>
          <pub-id pub-id-type="pii">8233145</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ghose</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Rohanian</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Nouriborji</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kormilitzin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Clifton</surname>
              <given-names>DA</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Efficiency at scale: investigating the performance of diminutive language models in clinical tasks</article-title>
          <source>Artif Intell Med</source>
          <year>2024</year>
          <month>11</month>
          <volume>157</volume>
          <fpage>103002</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0933-3657(24)00244-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.artmed.2024.103002</pub-id>
          <pub-id pub-id-type="medline">39471774</pub-id>
          <pub-id pub-id-type="pii">S0933-3657(24)00244-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Perlis</surname>
              <given-names>RH</given-names>
            </name>
            <name name-style="western">
              <surname>Goldberg</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Ostacher</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Schneck</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>Clinical decision support for bipolar depression using large language models</article-title>
          <source>Neuropsychopharmacology</source>
          <year>2024</year>
          <month>08</month>
          <volume>49</volume>
          <issue>9</issue>
          <fpage>1412</fpage>
          <lpage>1416</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41386-024-01841-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41386-024-01841-2</pub-id>
          <pub-id pub-id-type="medline">38480911</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41386-024-01841-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC11251032</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Anisuzzaman</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Malins</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Attia</surname>
              <given-names>ZI</given-names>
            </name>
          </person-group>
          <article-title>Fine-tuning large language models for specialized use cases</article-title>
          <source>Mayo Clin Proc Digit Health</source>
          <year>2025</year>
          <month>03</month>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>100184</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2949-7612(24)00114-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.mcpdig.2024.11.005</pub-id>
          <pub-id pub-id-type="medline">40206998</pub-id>
          <pub-id pub-id-type="pii">S2949-7612(24)00114-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC11976015</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Church</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Emerging trends: a gentle introduction to fine-tuning</article-title>
          <source>Nat Lang Eng</source>
          <year>2021</year>
          <month>10</month>
          <day>26</day>
          <volume>27</volume>
          <issue>6</issue>
          <fpage>763</fpage>
          <lpage>778</lpage>
          <pub-id pub-id-type="doi">10.1017/s1351324921000322</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>DoctorGPT: a large language model with Chinese medical question-answering capabilities</article-title>
          <year>2023</year>
          <conf-name>Proceedings of the International Conference on High Performance Big Data and Intelligent Systems (HDIS)</conf-name>
          <conf-date>December 06-08, 2023</conf-date>
          <conf-loc>Macau, China</conf-loc>
          <publisher-name>IEEE</publisher-name>
          <fpage>186</fpage>
          <lpage>193</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/abstract/document/10499472"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/hdis60872.2023.10499472</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
