<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Ment Health</journal-id><journal-id journal-id-type="publisher-id">mental</journal-id><journal-id journal-id-type="index">16</journal-id><journal-title>JMIR Mental Health</journal-title><abbrev-journal-title>JMIR Ment Health</abbrev-journal-title><issn pub-type="epub">2368-7959</issn></journal-meta><article-meta><article-id pub-id-type="publisher-id">57234</article-id><article-id pub-id-type="doi">10.2196/57234</article-id><title-group><article-title>Using Large Language Models to Understand Suicidality in a Social Media&#x2013;Based Taxonomy of Mental Health Disorders: Linguistic Analysis of Reddit Posts</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Bauer</surname><given-names>Brian</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Norel</surname><given-names>Raquel</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Leow</surname><given-names>Alex</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Rached</surname><given-names>Zad Abi</given-names></name><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wen</surname><given-names>Bo</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Cecchi</surname><given-names>Guillermo</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Psychology, University of Georgia</institution>, <addr-line>Athens</addr-line><addr-line>GA</addr-line>, <country>United States</country></aff><aff id="aff2"><institution>Digital Health, IBM Research</institution>, <addr-line>New York</addr-line><addr-line>NY</addr-line>, <country>United States</country></aff><aff id="aff3"><institution>Department of Psychiatry, University of Illinois Chicago</institution>, <addr-line>Chicago</addr-line><addr-line>IL</addr-line>, <country>United States</country></aff><aff id="aff4"><institution>Department of Biomedical Engineering and Computer Science, University of Illinois Chicago</institution>, <addr-line>Chicago</addr-line><addr-line>IL</addr-line>, <country>United States</country></aff><aff id="aff5"><institution>College Louise Wegmann</institution>, <addr-line>Beirut</addr-line>, <country>Lebanon</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Torous</surname><given-names>John</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Hassan</surname><given-names>Ahmed</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Elbattah</surname><given-names>Mahmoud</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Adebambo</surname><given-names>Temitope</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Brian Bauer, PhD<email>brian.bauer@uga.edu</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2024</year></pub-date><pub-date pub-type="epub"><day>16</day><month>5</month><year>2024</year></pub-date><volume>11</volume><elocation-id>e57234</elocation-id><history><date date-type="received"><day>08</day><month>02</month><year>2024</year></date><date date-type="rev-recd"><day>28</day><month>03</month><year>2024</year></date><date date-type="accepted"><day>29</day><month>03</month><year>2024</year></date></history><copyright-statement>&#x00A9; Brian Bauer, Raquel Norel, Alex Leow, Zad Abi Rached, Bo Wen, Guillermo Cecchi. Originally published in JMIR Mental Health (<ext-link ext-link-type="uri" xlink:href="https://mental.jmir.org">https://mental.jmir.org</ext-link>), 16.5.2024. </copyright-statement><copyright-year>2024</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Mental Health, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://mental.jmir.org/">https://mental.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://mental.jmir.org/2024/1/e57234"/><abstract><sec><title>Background</title><p>Rates of suicide have increased by over 35% since 1999. Despite concerted efforts, our ability to predict, explain, or treat suicide risk has not significantly improved over the past 50 years.</p></sec><sec><title>Objective</title><p>The aim of this study was to use large language models to understand natural language use during public web-based discussions (on Reddit) around topics related to suicidality.</p></sec><sec sec-type="methods"><title>Methods</title><p>We used large language model&#x2013;based sentence embedding to extract the latent linguistic dimensions of user postings derived from several mental health&#x2013;related subreddits, with a focus on suicidality. We then applied dimensionality reduction to these sentence embeddings, allowing them to be summarized and visualized in a lower-dimensional Euclidean space for further downstream analyses. We analyzed 2.9 million posts extracted from 30 subreddits, including r/SuicideWatch, between October 1 and December 31, 2022, and the same period in 2010.</p></sec><sec sec-type="results"><title>Results</title><p>Our results showed that, in line with existing theories of suicide, posters in the suicidality community (r/SuicideWatch) predominantly wrote about feelings of disconnection, burdensomeness, hopeless, desperation, resignation, and trauma. Further, we identified distinct latent linguistic dimensions (well-being, seeking support, and severity of distress) among all mental health subreddits, and many of the resulting subreddit clusters were in line with a statistically driven diagnostic classification system&#x2014;namely, the Hierarchical Taxonomy of Psychopathology (HiTOP)&#x2014;by mapping onto the proposed superspectra.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Overall, our findings provide data-driven support for several language-based theories of suicide, as well as dimensional classification systems for mental health disorders. Ultimately, this novel combination of natural language processing techniques can assist researchers in gaining deeper insights about emotions and experiences shared on the web and may aid in the validation and refutation of different mental health theories.</p></sec></abstract><kwd-group><kwd>natural language processing</kwd><kwd>explainable AI</kwd><kwd>suicide</kwd><kwd>mental health disorders</kwd><kwd>mental health disorder</kwd><kwd>mental health</kwd><kwd>social media</kwd><kwd>online discussions</kwd><kwd>online</kwd><kwd>large language model</kwd><kwd>LLM</kwd><kwd>downstream analyses</kwd><kwd>trauma</kwd><kwd>stress</kwd><kwd>depression</kwd><kwd>anxiety</kwd><kwd>AI</kwd><kwd>artificial intelligence</kwd><kwd>explainable artificial intelligence</kwd><kwd>web-based discussions</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Suicide rates have increased by 35% since 1999, and suicide remains a leading cause of death in the United States [<xref ref-type="bibr" rid="ref1">1</xref>]. Despite concerted efforts, our ability to predict, explain, or treat suicide risk has not significantly improved over the past 50 years [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. Thus, a top public health priority is understanding factors that contribute to suicide risk. Recent meta-analytic work, comprising suicide risk factor research over the past 50 years (using 365 studies), found that no single set of risk factors (eg, mood disorders or impulsivity) accurately predict future suicidal thoughts and behaviors [<xref ref-type="bibr" rid="ref2">2</xref>]. Past risk factor studies have been limited by (1) potential sampling biases (eg, overrepresentation of clinical populations), (2) structured clinical interviews and surveys, and (3) laboratory-based (rather than naturalistic) settings. With the rapid increase in the use of web-based platforms, such as Reddit, people experiencing mental health symptoms have new outlets for sharing experiences, seeking support, and engaging in discussion regarding their mental health. Platforms such as Reddit provide unique opportunities for studying the experiences and perspectives of individuals at risk of suicide in the context of other mental pathologies and stressors [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>]. To overcome previous limitations in suicide risk factor research, this study aims to analyze posts from a web-based community dedicated to providing support for individuals in crisis (ie, the r/SuicideWatch subreddit), to involve individuals who may not present for mental health studies or disclose their suicide risk and to obtain more nuanced insights into suicidality from the naturalistic and open-ended nature of anonymous web-based forums.</p><p>Understanding the factors contributing to suicidality is crucial for developing effective prevention strategies and interventions. Prominent theories of suicide&#x2014;such as the Interpersonal Theory of Suicide (ITS) [<xref ref-type="bibr" rid="ref7">7</xref>], Three-Step Theory (3ST) [<xref ref-type="bibr" rid="ref8">8</xref>], and Integrated Motivational&#x2013;Volitional (IMV) model [<xref ref-type="bibr" rid="ref9">9</xref>]&#x2014;are referred to as &#x201C;ideation-to-action&#x201D; frameworks. These theories attempt to explain how people develop suicidal ideation and transition to suicidal behaviors. Several common variables among ideation-to-action frameworks include feeling disconnected and burdensome to others, feelings of entrapment and hopelessness, and factors that may increase peoples&#x2019; capability to die by suicide (eg, traumatic experiences). Similar to many other psychological theories, they have been developed through researchers observing data patterns and testing their hypotheses (mainly) through self-reported survey data. Analyzing linguistic patterns in Reddit posts may provide an avenue for suicide theory exploration, confirmation, and refutation for these ideation-to-action frameworks, which could significantly impact future and existing intervention targets and assessment practices.</p><p>The use of natural language processing (NLP) with machine learning (ML) to gain new insights into mental health topics has increased dramatically over the last decade [<xref ref-type="bibr" rid="ref10">10</xref>]. In mental health, this approach has mainly been used to confirm existing hypotheses through extracting meaning from texts (NLP) and then classifying these extractions (ML); however, this combination approach can be equally useful for exploration and discovery [<xref ref-type="bibr" rid="ref10">10</xref>]. Specifically in suicide research, NLP and ML have primarily been used to help improve the accuracy of suicide risk identification [<xref ref-type="bibr" rid="ref11">11</xref>]. However, NLP combined with ML is less frequently used in both mental health and suicide research to derive theoretical perspectives. Newer large language models (LLMs) that use Bidirectional Encoder Representations from Transformers (BERT) allow researchers to capture more complexities in human language than previous approaches, which are ideal for discovery as well as for testing directional hypotheses. Furthermore, the recent advances in explainable artificial intelligence (XAI) can be applied to NLP to help improve transparency, trustworthiness, and understanding of results in the context of mental health [<xref ref-type="bibr" rid="ref12">12</xref>]. Using these modern techniques in tandem may help provide critical insights into suicide risk. The primary aim of this study is to analyze the content of posts from the r/SuicideWatch subreddit as well as mental health&#x2013;related and non&#x2013;mental health&#x2013;related subreddits, with the goal of contributing to our understanding of suicide risk to ultimately improve prevention strategies and interventions. Although there are several other anonymous platforms available for individuals to discuss suicide, we chose Reddit due to the size of its userbase (over 400,000 members), which may aid the generalizability of our findings, and for practical reasons&#x2014;namely, data availability. For this, we used LLMs to produce numerical representations of these posts&#x2014;called <italic>embeddings</italic> [<xref ref-type="bibr" rid="ref13">13</xref>]&#x2014;which may reveal <italic>unique</italic> suicidality linguistic patterns; we also used recent developments in generative LLMs [<xref ref-type="bibr" rid="ref14">14</xref>] and XAI [<xref ref-type="bibr" rid="ref15">15</xref>], which turn abstract embeddings into natural language text, to identify connections with theories of suicidal behavior.</p><p>Our study was primarily data driven, as we used <italic>generic</italic> LLM embeddings of the posts and only applied theoretical constructs for post hoc interpretation. This means that the numerical representation of the posts was based on how the sentences in them are related to sentences in very large text corpora used for training the LLM that cover vast swaths of topics.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Ethical Considerations</title><p>Reddit users are made aware that their posts are publicly accessible through Reddit&#x2019;s Terms and Conditions. No personal identifying information (eg, names, locations, or IP addresses) were collected. Further, none of the authors participated in any discussions; thus, it was not necessary to inform users that their posts may be used for research. Because the collected data set is publicly available and already deidentified, the University of Georgia Human Subjects Office reviewed the submission and assigned a determination of &#x201C;Not Human Research.&#x201D;</p></sec><sec id="s2-2"><title>Data Procurement, Selection, and Preprocessing</title><p>We downloaded posts from a list of subreddits from October 1 to December 31, 2022, and from October 1 to December 31, 2010, using The-Eye.eu [<xref ref-type="bibr" rid="ref16">16</xref>], which contains an archive of Reddit&#x2019;s full submission until December 2022. We used Python (Python Software Foundation) to process the data. Posts that were removed or deleted were not used. Empty entries or entries containing just a &#x201C;?&#x201D; were not considered. We analyzed 16 subreddits related to mental health and 14 subreddits not related to mental health to serve as controls. In <xref ref-type="table" rid="table1">Table 1</xref>, we compared the number of posts and words per post by 2 subreddit groups (mental health and controls). We saw that the number of posts was similar in both groups, but the range and variability (ie, SDs for the number of words and words per post) were much higher in the mental health subreddits. We noted that for the 2010 data, in general, the posts were much longer than those from 2022 data; in the 2022 data, some of the posts were just a single word (eg, &#x201C;Pls,&#x201D; &#x201C;Yuh,&#x201D; and &#x201C;Help&#x201D;), whereas for older data (2010), the posts were much longer.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Statistical descriptors of the 16 subreddits related to mental health and the 14 subreddits used as controls.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Subreddits and statistics</td><td align="left" valign="bottom" colspan="2">2010 data</td><td align="left" valign="bottom" colspan="2">2022 data</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom">Mean (SD)</td><td align="left" valign="bottom">Range</td><td align="left" valign="bottom">Mean (SD)</td><td align="left" valign="bottom">Range</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="6"><bold>Mental health</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Posts per subreddit</td><td align="left" valign="top">246 (225)</td><td align="left" valign="top">14-785</td><td align="left" valign="top">182,000 (240,000)</td><td align="left" valign="top">15,900-928,000</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Words per subreddit</td><td align="left" valign="top">409,000 (415,000)</td><td align="left" valign="top">42,000-1,600,000</td><td align="left" valign="top">34,700,000 (48,800,000)</td><td align="left" valign="top">2,700,000-189,000,000</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Words per post</td><td align="left" valign="top">1700 (446)</td><td align="left" valign="top">1000-3000</td><td align="left" valign="top">183 (31)</td><td align="left" valign="top">112-225</td></tr><tr><td align="left" valign="top" colspan="6"><bold>Controls</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Posts per subreddit</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">195,000 (243,000)</td><td align="left" valign="top">9700-831,000</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Words per subreddit</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">21,900,000 (28,300,000)</td><td align="left" valign="top">1,500,000-95,000,000</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Words per post</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">114 (27)</td><td align="left" valign="top">72-163</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-3"><title>Linguistic Analysis and Interpretation</title><sec id="s2-3-1"><title>Overall Approach</title><p>After obtaining the posts, we followed these steps: (1) represented the posts in the latent space of LLM embeddings; (2) computed a representation for each subreddit, averaging all its posts; (3) computed different metrics of the structure of the subreddits in the embedding space; and (4) applied interpretation techniques to these structural metrics to obtain insights about the relationship between suicidality and other self-identified groups.</p></sec><sec id="s2-3-2"><title>LLM Embeddings</title><p>Semantic text embedding is an NLP technique used to represent the meaning of text in numerical form. It accounts for the context of words or phrases rather than just their individual representations. By using advancements in text embedding, we used a more precise method in NLP using BERT [<xref ref-type="bibr" rid="ref17">17</xref>]. Text embedding assigns a numerical vector to each text, enabling texts with similar contexts to be closer in the vector space. This allows us, using mathematical tools, to better understand and analyze the semantic similarities and differences between different texts. To represent a subreddit, we compute the centroid (average) of the embeddings of all the posts assigned to it. As mentioned above, these embeddings are <italic>unbiased</italic>: we did not use the metadata related to the subreddit provenance nor applied any theory-driven construct [<xref ref-type="bibr" rid="ref18">18</xref>].</p></sec><sec id="s2-3-3"><title>Subreddits Structure</title><p>To obtain a measure of similarity between subreddits, for each post, we computed a &#x201C;linguistic label&#x201D;&#x2014;the label of the nearest centroid in the embedding space. Then, for each subreddit, we computed the percentage of posts that were assigned to any &#x201C;linguistic label,&#x201D; including the original label; the proportion of posts &#x201C;linguistically assigned&#x201D; to a subreddit other than the original one is a measure of the similarity between them.</p></sec><sec id="s2-3-4"><title>Hierarchical Clustering</title><p>Hierarchical clustering operates on the principle of iteratively merging the closest pair of clusters, where the definition of &#x201C;closeness&#x201D; varies according to different linkage criteria. We used the Ward linkage method, where the distance between 2 clusters is the increase in the summed square distance from each point to the centroid of its cluster after merging the clusters. A dendrogram (a tree-like diagram) visually represents the process and results of hierarchical clustering. Each leaf corresponds to a data point, and branches represent the successive merging of clusters, with the height of each merge proportional to the distance between the combined clusters. By examining a dendrogram, one can intuitively grasp the data&#x2019;s structure and decide on an appropriate number of clusters by cutting the tree at a specific height. This visual tool aids in interpreting the complex relationships and nested structures within the data, offering insights into the underlying patterns and groupings. We performed hierarchical clustering on the embedding representative of the subreddit, that is, the average of all the embedding vectors of the subreddit.</p></sec><sec id="s2-3-5"><title>Dimensionality Reduction</title><p>We factorized the subreddits centroids in the embedding space using singular value decomposition (SVD). We analyzed the relative location of the subreddits in the reduced representation of the first 3 SVD components, understood as latent semantic dimensions.</p></sec><sec id="s2-3-6"><title>Generative LLM</title><p>We used a generative LLM to obtain insights into the latent patterns of the SVD components. In a Jupyter Notebook environment, we configured the <italic>Langchain</italic> library to use the GPT-4-0613 model (OpenAI) [<xref ref-type="bibr" rid="ref14">14</xref>] with a zero-temperature setting, ensuring deterministic outputs for consistency in interpretation. We used the <italic>ConversationChain</italic> module coupled with <italic>ConversationBufferMemory</italic> to facilitate an interactive and memory-aware dialogue with GPT-4. This setup enabled us to iteratively query the model with our prompts and data, ensuring a contextually rich and coherent analysis of the subreddit postings. The use of a verbose mode in the conversation setup provided detailed logging of the model&#x2019;s responses, further aiding in the transparency and traceability of our analytical process. We identified posts with the top and bottom 5 embedding positions in the first 3 dimensions of SVD projections, totaling 30 extreme postings.</p></sec><sec id="s2-3-7"><title>XAI Techniques</title><p>XAI techniques are designed to provide insights into the factors or features that contribute to an artificial intelligence system&#x2019;s outputs, allowing users to understand and validate the reasoning behind those decisions. With this in mind, we use ProtoDash [<xref ref-type="bibr" rid="ref15">15</xref>], a technique used to choose representative examples that effectively represent the overall distribution of a data set, to help with the interpretation of the model. We fed the LLM embeddings and the associated posts from the r/SuicideWatch subreddit to ProtoDash and asked for the 5 most representative posts in the data set. Those selected prototypes of the data set were then fed to ChatGPT, to help obtain insight into the data. <xref ref-type="fig" rid="figure1">Figure 1</xref> shows a visual overview of the study methodology.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Schematic description of the processing workflow in this paper. All Reddit postings were first fed into BERT to yield posting-level embeddings in a high-dimensional space, followed by dimensionality reduction (into 3D) using SVD. This procedure allows us to extract prototypical postings (using ProtoDash) as well as extreme postings along each of the 3 SVD axes, which were then fed into ChatGPT for semantic interpretations. Last, we also performed hierarchical clustering to recover the relational structure between different subreddits. BERT: Bidirectional Encoder Representations from Transformers; GPT: Generative Pre-trained Transformer; SVD: singular value decomposition.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mental_v11i1e57234_fig01.png"/></fig></sec></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>Unless explicitly indicated, we presented results on the 2022 data; the 2010 data were used to ascertain the stability of the structural features we determined with the most recent and larger data set.</p><sec id="s3-1"><title>Verbosity in Mental Health Subreddits</title><p>The total number of posts and the verbosity (the average number of words per post in a subreddit) are shown in <xref ref-type="fig" rid="figure2">Figure 2</xref>. R/Depression is the most active of the mental health subreddits, and only behind the r/DnD (referring to Dungeons and Dragons) and r/gaming subreddits overall. The subreddits were sorted by verbosity (green line), which shows higher values for mental health posts as opposed to non&#x2013;mental health posts. Moreover, within mental health subreddits, there are differences of more than 30% between the least verbose subreddits (r/schizophrenia and r/EDAnonymous [eating disorders anonymous]) and the high-verbosity subreddits (r/PTSD [posttraumatic stress disorder] and r/Depression).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>The total number of posts (bar heights) and verbosity (number of words per post; green line) of subreddits. The verbosity of MH subreddits (blue) is significantly higher than non-MH ones (orange). BPD: borderline personality disorder; DnD: Dungeons and Dragons; EDAnonymous: eating disorders anonymous; MH: mental health; PTSD: posttraumatic stress disorder.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mental_v11i1e57234_fig02.png"/></fig></sec><sec id="s3-2"><title>Structure of the Linguistic Embedding Space</title><p>The plot in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> shows the measure of similarity to r/SuicideWatch for all subreddits. Almost half of the posts (234,406/479,321; 48.9%) from r/SuicideWatch were closer to their centroid than to any other centroid, with an additional 12.7% (60,854/479,321) being the closest to r/Depression, followed by r/BPD (borderline personality disorder) and r/SocialAnxiety. The extension of this approach to all subreddits is shown in <xref ref-type="fig" rid="figure3">Figure 3</xref>, which represents inter-subreddit similarities in the width of the links, thresholded at 7%. Besides the strong associations already present in <xref ref-type="fig" rid="figure2">Figure 2</xref> (of r/SuicideWatch with r/Depression and r/BPD), there are associations of r/BPD with r/Depression; r/SocialAnxiety and r/PTSD; and several associations between the different anxiety-related subreddits, r/Psychosis and r/schizophrenia, the 2 bipolar subreddits, and r/addiction with r/alcoholism.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Proportion of posts from a given subreddit that are the &#x201C;closest&#x201D; to a different subreddit centroid. The r/SuicideWatch centroid is colored red, nodes connected to it are colored purple, and the rest of nodes are colored blue. The width of the edge is proportional to the number of posts that are the closest to each centroid. addition: r/addiction; alcoholism: r/alcoholism; anxiety: r/SocialAnxiety; bipolar: r/Bipolar; bipolarR: r/BipolarReddit; BPD: r/BPD (borderline personality disorder); depression: r/Depression; EDA: r/EDAnonymous (eating disorders anonymous); healthanx: r/HealthAnxiety; mhealth: r/MentalHealth; millness: r/mentalillness; psychosis: r/Psychosis; ptsd: r/PTSD (posttraumatic stress disorder); schiz: r/schizophrenia; socialanx: r/SocialAnxiety; suicide: r/SuicideWatch.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mental_v11i1e57234_fig03.png"/></fig><p>The dendrogram in <xref ref-type="fig" rid="figure4">Figure 4</xref> represents the result of the hierarchical clustering of the centroid coordinates for all the mental health subreddits in 2022 and 2010. The horizontal axis representing the linkage distance illustrates and supports the sequential merging of clusters. Clusters that merge at lower distances are more akin; as the distance increases, the clusters amalgamate into broader categories. The cluster with the shortest distance is grouping the subreddits r/BipolarReddit and r/Bipolar. The following cluster consists of the subreddits that discuss mental illness (r/mentalillness) and mental health (r/MentalHealth). The third most similar cluster is formed by r/Psychosis and r/schizophrenia. R/Anxiety and r/HealthAnxiety cluster together (green), as do the subreddits regarding addiction (r/alcoholism and r/addiction; red). Importantly, the clustering of the 2022 and 2010 subreddits were highly consistent, supporting the notion that the posts by the different subreddits revolve around the same topics related to the mental health condition they identify with.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Dendrogram of mental health vectors, using Ward linkage of subreddit&#x2019;s embeddings. The horizontal axis represents the linkage distance; clusters that combine at shorter distances are more similar to each other; as the distance grows, these clusters join together to form larger, more general categories. BPD: borderline personality disorder; EDAnonymous: eating disorders anonymous; PTSD: posttraumatic stress disorder.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mental_v11i1e57234_fig04.png"/></fig></sec><sec id="s3-3"><title>Interpretation of the Linguistic Embedding Space</title><p>To obtain insights into the meaning of these topics, we performed a SVD factorization of the embedding space and used interpretation techniques on the resulting factors. <xref ref-type="fig" rid="figure5">Figure 5</xref> shows the relative location of all the subreddits in the space determined by the first 2 SVD components. With the exception of r/Mindfulness, which is close to the mental health subreddits, there is a clear separation of classes along the SVD1 dimension. We also observed that r/SuicideWatch was ranked the second highest in SVD2, suggesting that this dimension may contain patterns relevant to suicidality.</p><p>The result of this procedure is presented in <xref ref-type="table" rid="table2">Table 2</xref>, which can be summarized by the following labels and directionalities: SVD1, <italic>Emotional Well-Being: Despair to Resilience</italic>; SVD2, <italic>Seeking Understanding and Support: Closing In to Reaching Out</italic>; and SVD3, <italic>Severity of Distress: Low to High</italic>. Using these axis interpretations, we mapped the results from the second and third SVD projection only for the mental health subreddits, for better visualization and interpretation; <xref ref-type="fig" rid="figure6">Figure 6</xref> shows only mental health cases, which fall within the left portion of <xref ref-type="fig" rid="figure5">Figure 5</xref>. A prominent feature of the 2 plots is that r/SuicideWatch mapped onto the high end of the <italic>Understanding and Support (Reaching Out)</italic> dimension, as well as on the high end of the <italic>Severity of Distress (High Distress)</italic> dimension in a completely data-driven way.</p><list list-type="bullet"><list-item><p>Axis label prompt: &#x201C;Identify the name of the axis spanned by these two groups of extreme postings given that if one extreme is hot, the other extreme is cold, then the axis is temperature.&#x201D;</p></list-item><list-item><p>High values prompt: &#x201C;What are the similarities among the top (highest values on the axis) five postings.&#x201D;</p></list-item><list-item><p>Low values prompt: &#x201C;What are the similarities among the bottom (lowest values on the axis) five postings.&#x201D;</p></list-item></list><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>SVD factorization of the embedding space. With the exception of r/Mindfulness, all non-mental health subreddits have positive value on the first SVD component, whereas all mental health subreddits have negative value on the first SVD component. SVD: singular value decomposition.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mental_v11i1e57234_fig05.png"/></fig><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Interpretation based on GPT-4 for prompts for the main 3 singular value decomposition (SVD) axes. Text from GPT-4 has been edited for grammar and clarity.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Values</td><td align="left" valign="bottom" colspan="3">Interpretation and axis label</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">SVD1, Emotional Well-Being: Despair to Resilience</td><td align="left" valign="bottom">SVD2, Seeking Understanding and Support: Closing In to Reaching Out</td><td align="left" valign="bottom">SVD3, Severity of Distress: Low to High</td></tr></thead><tbody><tr><td align="left" valign="top">High</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Struggle and resilience</p></list-item><list-item><p>Experiencing significant anxiety, depression, and life changes</p></list-item><list-item><p>Learning to trust themselves and their abilities to handle their situations</p></list-item><list-item><p>Actively seeking help and trying to find ways to manage their mental health (eg, therapy, self-care, and relaxation techniques)</p></list-item><list-item><p>Making progress toward self improvement and happiness</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Seeking guidance or advice</p></list-item><list-item><p>Desire for information, advice, or validation</p></list-item><list-item><p>Reaching out for insights, recommendations, or shared experiences</p></list-item><list-item><p>This emotion is intertwined with feelings of uncertainty, curiosity, and a desire for understanding or improvement</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>More intense and distressing content</p></list-item><list-item><p>Severe mental health struggles, including suicidal thoughts and feelings of extreme despair</p></list-item><list-item><p>Darker tone and more desperate</p></list-item></list></td></tr><tr><td align="left" valign="top">Low</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Narrative of struggle and despair</p></list-item><list-item><p>Feelings of sadness and emptiness</p></list-item><list-item><p>Express a sense of hopelessness about their situation</p></list-item><list-item><p>Previously sought help (eg, therapy and medication) but feel that these methods have been ineffective</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Feelings of internal conflict, frustration, and being overwhelmed by personal challenges</p></list-item><list-item><p>Deep sense of pain stemming from mental health struggles, physical health issues, or personal insecurities</p></list-item><list-item><p>There&#x2019;s a recurring theme of individuals grappling with their emotions and seeking understanding, validation, or support</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Moderate distress and struggles with mental health</p></list-item><list-item><p>Discusses personal experiences with mental health struggles, seeking help, and dealing with anxiety and social situations</p></list-item></list></td></tr></tbody></table></table-wrap><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>SVD factorization of the embedding space for mental health subreddits. Centroids (representing each subreddit) are sorted from lowest to highest value on the second SVD factor in the top panel and on the third SVD factor in the bottom panel. On both panels, that is, for the second and third SVD components, the 4 extreme values correspond to r/SuicideWatch, r/SocialAnxiety, r/Depression, and r/BPD. BPD: borderline personality disorder; EDAnonymous: eating disorders anonymous; PTSD: posttraumatic stress disorder; SVD: singular value decomposition.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mental_v11i1e57234_fig06.png"/></fig><p>To complement our previous analyses that relied on extreme postings, here, we instead leveraged prototypical or representative postings within the r/SuicideWatch subreddit. To this end, we used ProtoDash to select the top 3 most representative postings, which were then fed into GPT-4 with the prompt of &#x201C;Are these postings in line with current theories about suicide and suicidal ideation?&#x201D; The following summarizes GPT-4&#x2019;s response:</p><disp-quote><p>The first post describes feelings of hopelessness, despair, and anxiety, with a clear intent to commit suicide. The second post, despite stating a lack of suicidal intent, expresses chronic emotional distress, a lack of enjoyment in life, and recurring thoughts of wanting to die, which may actually be signs of suicidal ideation. The third represents a state of severe emotional distress, feelings of hopelessness, despair, loneliness, and a sense of being misunderstood and neglected; it also mentions previous suicide attempts, a plan for a final attempt, and mentions hearing voices, which could suggest a psychotic disorder.</p></disp-quote><p>These posts are consistent with current theories of suicidality. The ITS, for example, posits that individuals are more likely to die by suicide when they have both the desire to die, often stemming from feelings of burden and social isolation, and the capability to do so, often developed through previous exposure to painful or fear-inducing experiences.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>We combined analytic and interpretability techniques to study linguistic contents in large numbers of postings derived from the r/SuicideWatch subreddit in relation to other mental health subreddits, as well as select non&#x2013;mental health subreddits. This allows us to better understand, both qualitatively and quantitatively, how suicidal ideation linguistically presents in popular social media sites (Reddit). Our results offer new insights into the emotional and thematic content shared by individuals at risk of suicide on Reddit.</p><p>First, by applying GPT-4 to postings spanning the top 3 axes in our dimensionally reduced embeddings across all subreddit postings, we were able to determine the linguistic meanings of these axes, which describe generally what people tend to discuss on anonymous web-based mental health forums, including suicide-related discourse. These axes were (1) resilience versus despair, (2) validation versus advice, and (3) high versus low distress. R/SuicideWatch was characterized by narratives of struggles and despair (eg, hopelessness and previous treatment being ineffective) and showed the highest values of nearly all mental health subreddits for seeking advice (more so than validation) and high levels of distress. R/SuicideWatch posts also had the highest values for seeking guidance and advice&#x2014;just above r/BPD and r/Depression and just below r/SocialAnxiety. These specific communities and those with associated disorders or phenomena may seek advice or guidance (ie, practical solutions) more than others because of their chronicity, recurrence, and historical difficulty to treat. For example, spanning across 50 years of intervention research for suicidal thoughts and behaviors, recent meta-analytic evidence shows that overall intervention effects are small regardless of the intervention or suicide-related outcome studied [<xref ref-type="bibr" rid="ref3">3</xref>]. Further, although effective treatments are available, when left untreated, social anxiety and BPD are often chronic conditions [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>], and depressive disorders are highly recurrent [<xref ref-type="bibr" rid="ref21">21</xref>]. Individuals on Reddit may therefore be searching for additional solutions, potentially because past treatment engagement was not adequately effective in reducing their symptoms. Last, regarding distress, our study is in line with previous studies examining suicide-related social media posts that characterizes these postings as indicating high levels of distress [<xref ref-type="bibr" rid="ref22">22</xref>].</p><p>More generally, these 3 axes exemplify broad common themes during clinical appointments, with people sharing messages of <italic>hope and resilience</italic> or <italic>despair and hopelessness</italic> (referred to as &#x201C;Emotional Well-Being&#x201D; by GPT-4). Similarly, individuals on mental health subreddits mention a desire for <italic>validation or support</italic> and <italic>problem-solving or solutions</italic>, both of which are core to several effective psychosocial treatments. Last, subjective distress represents a core component of what helps define a mental health disorder [<xref ref-type="bibr" rid="ref23">23</xref>] and varies widely from disorder to disorder as well as individually, as indicated by the values along this axis. Together, these axes may indicate that a therapy-like process naturally occurs in Reddit communities, where posters provide messages of despair or resilience with different degrees of distress and are searching for validation and solutions to their experiences.</p><p>Using ProtoDash in conjunction with GPT-4, we extracted and summarized common thematic and emotional contents from r/SuicideWatch postings. Results support the central variables within contemporary theories of suicide for explaining why the desire for suicide develops. Here, the 3 most prototypical posters predominantly wrote about feelings of disconnection, burden, hopelessness, desperation, resignation, and trauma. The ITS, 3ST, and IMV theories of suicide all state that disconnection (eg, thwarted belongingness), perceived burdensomeness, and feeling that their issues are intractable (eg, entrapment, hopelessness, and resignation) are necessary elements for developing a desire to die by suicide. Further, desperation&#x2014;having a deep sense of despair, feeling overwhelmed, and lacking the ability to improve current conditions&#x2014;is consistent with psychache [<xref ref-type="bibr" rid="ref24">24</xref>] and the recently proposed diagnostic criteria for acute suicide conditions (eg, suicide crisis syndrome [<xref ref-type="bibr" rid="ref25">25</xref>]), each of which cite despair as a core criterion for the development of suicidal ideation. Last, experiencing trauma (broadly defined) was frequently discussed in postings. Experiencing traumatic events is not posited as a necessary and sufficient condition for the development of suicidal ideation, but it has been put forth as a contributor for why people become capable of dying by suicide [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. However, previous meta-analytic work has correlated traumatic experiences such as abuse with future suicidal ideation [<xref ref-type="bibr" rid="ref2">2</xref>], and if the term is taken colloquially (ie, negative life stressors), it may contribute to theoretical constructs such as defeat or humiliation [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref25">25</xref>].</p><p>Next, we identified linguistically defined natural groupings among mental health subreddits. Our results seemed to suggest three different clusters: (1) r/SuicideWatch, r/Depression, r/MentalHealth, r/BPD, and r/Social Anxiety; (2) r/Psychosis, r/schizophrenia, and r/Bipolar; and (3) r/alcoholism, r/EDAnonymous, and r/addiction (see <xref ref-type="fig" rid="figure4">Figures 4</xref> and <xref ref-type="fig" rid="figure5">5</xref>). However, the 3 clusters mentioned above appear to (largely) support the superspectra put forth by the Hierarchical Taxonomy of Psychopathology (HiTOP)&#x2014;a recent statistically driven diagnostic classification system for mental disorders [<xref ref-type="bibr" rid="ref26">26</xref>]&#x2014;compared to traditional clusters found in the <italic>Diagnostic Statistical Manual of Mental Disorders, Fifth Edition</italic> [<xref ref-type="bibr" rid="ref23">23</xref>]. The three clusters mentioned above generally correspond to the following HiTOP spectra: (1) Internalizing Disorders; (2) Psychotic Disorders; and (3) Disinhibited Externalization Disorders. However, other mental health subreddits such as r/HealthAnxiety, r/Anxiety, r/mentalillness, and r/PTSD did not map as neatly onto any 1 dimension. In contrast with HiTOP, r/BPD was more aligned with internalizing disorders than externalizing disorders in this study. Similarly, while eating disorders (r/EDAnonymous) and substance or alcohol use disorders (r/addiction) are classified under different HiTOP spectra (Internalizing Disorders and Disinhibited Externalizing Disorders, respectively), their strong phenotypic associations and comorbidities are well documented in the literature (see a recent study examining their shared genetic risks [<xref ref-type="bibr" rid="ref27">27</xref>]). Taken as a whole, our results also support a more nuanced and dimensional view of HiTOP spectra (eg, internalizing vs externalizing disorders), where posters in r/BPD are more often discussing unsatisfying relationships, feelings of emptiness, desires for self-harm, anger, and other cognitive criteria rather than solely discussing externalizing behaviors (eg, physical fights and risky behaviors) that are central to the BPD HiTOP spectra (ie, externalizing disorders).</p><p>As indicated by the hierarchical structure of the dendrogram in <xref ref-type="fig" rid="figure4">Figure 4</xref>, the linguistic features in r/SuicideWatch have substantial overlap with postings in r/Depression, r/BPD, and r/SocialAnxiety. This is likely due to suicidal ideation being a symptom of depression and suicidal (and parasuicidal) behaviors, which are common features of BPD [<xref ref-type="bibr" rid="ref23">23</xref>]. Regarding the (somewhat unexpected) linguistic similarities between postings from r/SuicideWatch and r/SocialAnxiety, we posit that they are likely driven by mentioning social and interpersonal issues (eg, loneliness), which are common among both. These similarity findings may help serve as additional validity for our results and may have implications for recently proposed diagnostic criteria for suicide-related thoughts and behaviors, such as acute suicide affective disorder [<xref ref-type="bibr" rid="ref28">28</xref>] and suicide crisis syndrome [<xref ref-type="bibr" rid="ref25">25</xref>], that highlight abrupt or accentuated feelings of social disconnection or social withdrawal as an indicator of suicide crisis risk.</p><p>From <xref ref-type="fig" rid="figure6">Figure 6</xref>, we note in the top panel that r/BPD had the third highest value on the axis for &#x201C;Understanding and Support,&#x201D; with r/SuicideWatch being the second highest. In the bottom panel, r/BPD has the fourth highest value on the axis for &#x201C;Severity of Distress,&#x201D; next to r/Depression, whereas r/SuicideWatch had the highest value. Individuals with BPD are at a notably higher risk of suicide, with commonalities between the 2 including impulsivity, intense emotional dysregulation, and chronic feelings of emptiness. These individuals often struggle with unstable interpersonal relationships and heightening feelings of loneliness and rejection, which can trigger suicidal thoughts and behaviors. Additionally, a significant proportion of those with BPD have a history of trauma and may have co-occurring mental health disorders, such as depression or anxiety, further exacerbating the risk. The prevalence of self-harm behaviors in individuals with BPD, although not always indicative of suicidal intent, is also a critical risk factor [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref32">32</xref>].</p><p>Last, the representative posts selected by ProtoDash and interpreted by ChatGPT showed that r/SuicideWatch discussions emphasized feelings of empathy, support, understanding, and gratitude. In addition, ChatGPT found that typical replies offered messages of hope that things will improve and encouraged seeking help, traveling to beautiful places or to find solace, and connecting with nature to find &#x201C;self love.&#x201D; Many of these actions (eg, empathy, support, understanding, and encouraging help-seeking behavior) are what professional organizations (eg, National Suicide Prevention Lifeline and American Foundation for Suicide Prevention) advocate friends, family, and communities provide to individuals in crisis. Although some of the proposed suggestions do not have an evidence base for being effective in reducing suicidal desire (eg, traveling to beautiful places), survivors of suicide or individuals with lived experience (eg, other members of r/SuicideWatch) may provide additional perspectives that can be helpful when used alongside evidence-based therapies and interventions [<xref ref-type="bibr" rid="ref33">33</xref>]. Overall, these findings indicate that some web-based communities, such as r/SuicideWatch, could be a source of support for many individuals experiencing suicidal thoughts and can act in accordance with the suggestions put forth by several professional suicide prevention organizations.</p></sec><sec id="s4-2"><title>Limitations and Future Directions</title><p>We note a few limitations of our study. First, the data were limited to a 3-month period, which may not be sufficient to fully capture the range of experiences and emotions expressed in the r/SuicideWatch subreddit. Additionally, instead of more broadly looking into other social media platforms, the study focused solely on Reddit, and thus, the findings may not generalize to other web-based platforms. Future research could expand the time frame of data collection; explore other web-based platforms; and integrate additional data sources, such as user comments, to provide a more comprehensive understanding of web-based expressions of suicide risk. Last, the results from this study could not be validated against external criteria such as established measures of suicide risk or clinician judgment, potentially limiting the credibility of our findings. Future studies could incorporate multiple perspectives to help understand the accuracy and reliability of the extracted thematic interpretations.</p></sec><sec id="s4-3"><title>Conclusion</title><p>In conclusion, we used a novel combination of NLP techniques to detect and interpret linguistic patterns of mental health subreddits to better understand how suicidal ideation presents in web-based communities. LLM embeddings allowed for a nuanced analysis of subreddit content that revealed unique patterns and shared themes that are specific to suicide-related content. Further, dimensional reduction revealed latent dimensions of mental health discussions and helped identify relationships between various subreddits. Last, we used generative LLM for XAI to gain deeper insights into the emotions and experiences of individuals posting about suicidal thoughts. Our results supported contemporary theories of suicide. Our study highlights the potential use of web-based linguistic patterns as valuable data sources to better understand mental health disorders and suicidality.</p></sec></sec></body><back><ack><p>We thank the Reddit community for their openness in sharing their experiences and emotions, which made this research possible. We also acknowledge the contributions of ChatGPT (OpenAI) [<xref ref-type="bibr" rid="ref14">14</xref>] for its assistance in data interpretation.</p></ack><notes><sec><title>Data Availability</title><p>The data sets used and analyzed during this study were downloaded from The-Eye.eu [<xref ref-type="bibr" rid="ref16">16</xref>].</p></sec></notes><fn-group><fn fn-type="con"><p>BB, RN, AL, and GC created the study concept. BB, AL, RN and GC were major contributors to writing the manuscript. ZAR provided the code to access Reddit data from The-Eye.eu [<xref ref-type="bibr" rid="ref16">16</xref>]. BW provided the code to access ChatGPT programmatically. RN collected and analyzed the data. All authors read and approved the final manuscript.</p></fn><fn fn-type="conflict"><p>AL is a cofounder of Keywise AI and has served as an adviser or consultant for Otsuka US and Buoy Health. BB, RN, ZAR, BW, and GC have no conflicts of interest.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">3ST</term><def><p>Three-Step Theory</p></def></def-item><def-item><term id="abb2">BERT</term><def><p>Bidirectional Encoder Representations from Transformers</p></def></def-item><def-item><term id="abb3">BPD</term><def><p>borderline personality disorder</p></def></def-item><def-item><term id="abb4">EDAnonymous</term><def><p>eating disorders anonymous</p></def></def-item><def-item><term id="abb5">HiTOP</term><def><p>Hierarchical Taxonomy of Psychopathology</p></def></def-item><def-item><term id="abb6">IMV</term><def><p>Integrated Motivational&#x2013;Volitional</p></def></def-item><def-item><term id="abb7">ITS</term><def><p>Interpersonal Theory of Suicide</p></def></def-item><def-item><term id="abb8">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb9">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb10">NLP</term><def><p>natural language processing</p></def></def-item><def-item><term id="abb11">PTSD</term><def><p>posttraumatic stress disorder</p></def></def-item><def-item><term id="abb12">SVD</term><def><p>singular value decomposition</p></def></def-item><def-item><term id="abb13">XAI</term><def><p>explainable artificial intelligence</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Garnett</surname><given-names>MF</given-names></name><name name-style="western"><surname>Curtin</surname><given-names>SC</given-names></name></person-group><article-title>Suicide mortality in the United States, 2001-2021</article-title><source>NCHS Data Brief</source><year>2023</year><month>04</month><volume>464</volume><fpage>1</fpage><lpage>8</lpage><pub-id pub-id-type="medline">37093258</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Franklin</surname><given-names>JC</given-names></name><name name-style="western"><surname>Ribeiro</surname><given-names>JD</given-names></name><name name-style="western"><surname>Fox</surname><given-names>KR</given-names></name><etal/></person-group><article-title>Risk factors for suicidal thoughts and behaviors: a meta-analysis of 50 years of research</article-title><source>Psychol Bull</source><year>2017</year><month>02</month><volume>143</volume><issue>2</issue><fpage>187</fpage><lpage>232</lpage><pub-id pub-id-type="doi">10.1037/bul0000084</pub-id><pub-id pub-id-type="medline">27841450</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fox</surname><given-names>KR</given-names></name><name name-style="western"><surname>Huang</surname><given-names>X</given-names></name><name name-style="western"><surname>Guzm&#x00E1;n</surname><given-names>EM</given-names></name><etal/></person-group><article-title>Interventions for suicide and self-injury: a meta-analysis of randomized controlled trials across nearly 50 years of research</article-title><source>Psychol Bull</source><year>2020</year><month>12</month><volume>146</volume><issue>12</issue><fpage>1117</fpage><lpage>1145</lpage><pub-id pub-id-type="doi">10.1037/bul0000305</pub-id><pub-id pub-id-type="medline">33119344</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Low</surname><given-names>DM</given-names></name><name name-style="western"><surname>Rumker</surname><given-names>L</given-names></name><name name-style="western"><surname>Talkar</surname><given-names>T</given-names></name><name name-style="western"><surname>Torous</surname><given-names>J</given-names></name><name name-style="western"><surname>Cecchi</surname><given-names>G</given-names></name><name name-style="western"><surname>Ghosh</surname><given-names>SS</given-names></name></person-group><article-title>Natural language processing reveals vulnerable mental health support groups and heightened health anxiety on Reddit during COVID-19: observational study</article-title><source>J Med Internet Res</source><year>2020</year><month>10</month><day>12</day><volume>22</volume><issue>10</issue><fpage>e22635</fpage><pub-id pub-id-type="doi">10.2196/22635</pub-id><pub-id pub-id-type="medline">32936777</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fineberg</surname><given-names>SK</given-names></name><name name-style="western"><surname>Leavitt</surname><given-names>J</given-names></name><name name-style="western"><surname>Deutsch-Link</surname><given-names>S</given-names></name><etal/></person-group><article-title>Self-reference in psychosis and depression: a language marker of illness</article-title><source>Psychol Med</source><year>2016</year><month>09</month><volume>46</volume><issue>12</issue><fpage>2605</fpage><lpage>2615</lpage><pub-id pub-id-type="doi">10.1017/S0033291716001215</pub-id><pub-id pub-id-type="medline">27353541</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Birnbaum</surname><given-names>ML</given-names></name><name name-style="western"><surname>Norel</surname><given-names>R</given-names></name><name name-style="western"><surname>van Meter</surname><given-names>A</given-names></name><etal/></person-group><article-title>Identifying signals associated with psychiatric illness utilizing language and images posted to Facebook</article-title><source>NPJ Schizophr</source><year>2020</year><month>12</month><day>3</day><volume>6</volume><issue>1</issue><fpage>38</fpage><pub-id pub-id-type="doi">10.1038/s41537-020-00125-0</pub-id><pub-id pub-id-type="medline">33273468</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Orden</surname><given-names>KA</given-names></name><name name-style="western"><surname>Witte</surname><given-names>TK</given-names></name><name name-style="western"><surname>Gordon</surname><given-names>KH</given-names></name><name name-style="western"><surname>Bender</surname><given-names>TW</given-names></name><name name-style="western"><surname>Joiner</surname><given-names>TE Jr</given-names></name></person-group><article-title>Suicidal desire and the capability for suicide: tests of the interpersonal-psychological theory of suicidal behavior among adults</article-title><source>J Consult Clin Psychol</source><year>2008</year><month>02</month><volume>76</volume><issue>1</issue><fpage>72</fpage><lpage>83</lpage><pub-id pub-id-type="doi">10.1037/0022-006X.76.1.72</pub-id><pub-id pub-id-type="medline">18229985</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Klonsky</surname><given-names>ED</given-names></name><name name-style="western"><surname>May</surname><given-names>AM</given-names></name></person-group><article-title>The Three-Step Theory (3ST): a new theory of suicide rooted in the &#x201C;ideation-to-action&#x201D; framework</article-title><source>Int J Cogn Ther</source><year>2015</year><month>06</month><volume>8</volume><issue>2</issue><fpage>114</fpage><lpage>129</lpage><pub-id pub-id-type="doi">10.1521/ijct.2015.8.2.114</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>O&#x2019;Connor</surname><given-names>RC</given-names></name></person-group><article-title>The integrated motivational-volitional model of suicidal behavior</article-title><source>Crisis</source><year>2011</year><volume>32</volume><issue>6</issue><fpage>295</fpage><lpage>298</lpage><pub-id pub-id-type="doi">10.1027/0227-5910/a000120</pub-id><pub-id pub-id-type="medline">21945841</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Le Glaz</surname><given-names>A</given-names></name><name name-style="western"><surname>Haralambous</surname><given-names>Y</given-names></name><name name-style="western"><surname>Kim-Dufor</surname><given-names>DH</given-names></name><etal/></person-group><article-title>Machine learning and natural language processing in mental health: systematic review</article-title><source>J Med Internet Res</source><year>2021</year><month>05</month><day>4</day><volume>23</volume><issue>5</issue><fpage>e15708</fpage><pub-id pub-id-type="doi">10.2196/15708</pub-id><pub-id pub-id-type="medline">33944788</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Arowosegbe</surname><given-names>A</given-names></name><name name-style="western"><surname>Oyelade</surname><given-names>T</given-names></name></person-group><article-title>Application of natural language processing (NLP) in detecting and preventing suicide Ideation: a systematic review</article-title><source>Int J Environ Res Public Health</source><year>2023</year><month>01</month><day>13</day><volume>20</volume><issue>2</issue><fpage>1514</fpage><pub-id pub-id-type="doi">10.3390/ijerph20021514</pub-id><pub-id pub-id-type="medline">36674270</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Arnaud</surname><given-names>E</given-names></name><name name-style="western"><surname>Elbattah</surname><given-names>M</given-names></name><name name-style="western"><surname>Moreno-S&#x00E1;nchez</surname><given-names>PA</given-names></name><name name-style="western"><surname>Dequen</surname><given-names>G</given-names></name><name name-style="western"><surname>Ghazali</surname><given-names>DA</given-names></name></person-group><article-title>Explainable NLP model for predicting patient admissions at emergency department using triage notes</article-title><source>2023 IEEE International Conference on Big Data (BigData)</source><year>2023</year><publisher-name>IEEE</publisher-name><fpage>4843</fpage><lpage>4847</lpage><pub-id pub-id-type="doi">10.1109/BigData59044.2023.10386753</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Devlin</surname><given-names>J</given-names></name><name name-style="western"><surname>Chang</surname><given-names>MW</given-names></name><name name-style="western"><surname>Lee</surname><given-names>K</given-names></name><name name-style="western"><surname>Toutanova</surname><given-names>K</given-names></name></person-group><article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title><source>arXiv</source><comment>Preprint posted online on  May 24, 2019</comment><pub-id pub-id-type="doi">10.48550/arXiv.1810.04805</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="web"><article-title>ChatGPT</article-title><source>OpenAI</source><access-date>2024-05-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://chat.openai.com/">https://chat.openai.com/</ext-link></comment></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Gurumoorthy</surname><given-names>KS</given-names></name><name name-style="western"><surname>Dhurandhar</surname><given-names>A</given-names></name><name name-style="western"><surname>Cecchi</surname><given-names>G</given-names></name><name name-style="western"><surname>Aggarwal</surname><given-names>C</given-names></name></person-group><article-title>Efficient data representation by selecting prototypes with importance weights</article-title><source>2019 IEEE International Conference on Data Mining (ICDM)</source><year>2019</year><publisher-name>IEEE</publisher-name><fpage>260</fpage><lpage>269</lpage><pub-id pub-id-type="doi">10.1109/ICDM.2019.00036</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="web"><article-title>Reddit/Pushshift archives</article-title><source>The-Eye.eu</source><access-date>2024-05-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://the-eye.eu/redarcs/">https://the-eye.eu/redarcs/</ext-link></comment></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Reimers</surname><given-names>N</given-names></name><name name-style="western"><surname>Gurevych</surname><given-names>I</given-names></name></person-group><article-title>Sentence-BERT: sentence embeddings using Siamese BERT-networks</article-title><source>arXiv</source><comment>Preprint posted online on  Aug 27, 2019</comment><pub-id pub-id-type="doi">10.48550/arXiv.1908.10084</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Srivastava</surname><given-names>A</given-names></name><name name-style="western"><surname>Selloni</surname><given-names>A</given-names></name><name name-style="western"><surname>Bilgrami</surname><given-names>ZR</given-names></name><etal/></person-group><article-title>Differential expression of anomalous self-experiences in spontaneous speech in clinical high-risk and early-course psychosis quantified by natural language processing</article-title><source>Biol Psychiatry Cogn Neurosci Neuroimaging</source><year>2023</year><month>10</month><volume>8</volume><issue>10</issue><fpage>1005</fpage><lpage>1012</lpage><pub-id pub-id-type="doi">10.1016/j.bpsc.2023.06.007</pub-id><pub-id pub-id-type="medline">37414359</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Crowell</surname><given-names>SE</given-names></name><name name-style="western"><surname>Beauchaine</surname><given-names>TP</given-names></name><name name-style="western"><surname>Linehan</surname><given-names>MM</given-names></name></person-group><article-title>A biosocial developmental model of borderline personality: elaborating and extending Linehan's theory</article-title><source>Psychol Bull</source><year>2009</year><month>05</month><volume>135</volume><issue>3</issue><fpage>495</fpage><lpage>510</lpage><pub-id pub-id-type="doi">10.1037/a0015616</pub-id><pub-id pub-id-type="medline">19379027</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Keller</surname><given-names>MB</given-names></name></person-group><article-title>Social anxiety disorder clinical course and outcome: review of Harvard/Brown Anxiety Research Project (HARP) findings</article-title><source>J Clin Psychiatry</source><year>2006</year><volume>67</volume><issue>Suppl 12</issue><fpage>14</fpage><lpage>19</lpage><pub-id pub-id-type="medline">17092191</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Burcusa</surname><given-names>SL</given-names></name><name name-style="western"><surname>Iacono</surname><given-names>WG</given-names></name></person-group><article-title>Risk for recurrence in depression</article-title><source>Clin Psychol Rev</source><year>2007</year><month>12</month><volume>27</volume><issue>8</issue><fpage>959</fpage><lpage>985</lpage><pub-id pub-id-type="doi">10.1016/j.cpr.2007.02.005</pub-id><pub-id pub-id-type="medline">17448579</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>O&#x2019;Dea</surname><given-names>B</given-names></name><name name-style="western"><surname>Wan</surname><given-names>S</given-names></name><name name-style="western"><surname>Batterham</surname><given-names>PJ</given-names></name><name name-style="western"><surname>Calear</surname><given-names>AL</given-names></name><name name-style="western"><surname>Paris</surname><given-names>C</given-names></name><name name-style="western"><surname>Christensen</surname><given-names>H</given-names></name></person-group><article-title>Detecting suicidality on Twitter</article-title><source>Internet Interv</source><year>2015</year><month>05</month><volume>2</volume><issue>2</issue><fpage>183</fpage><lpage>188</lpage><pub-id pub-id-type="doi">10.1016/j.invent.2015.03.005</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="book"><person-group person-group-type="author"><collab>American Psychiatric Association</collab></person-group><source>Diagnostic and Statistical Manual of Mental Disorders, Fifth Edition</source><year>2013</year><publisher-name>American Psychiatric Association</publisher-name><pub-id pub-id-type="doi">10.1176/appi.books.9780890425596</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shneidman</surname><given-names>ES</given-names></name></person-group><article-title>Commentary: suicide as psychache</article-title><source>J Nerv Ment Dis</source><year>1993</year><month>03</month><volume>181</volume><issue>3</issue><fpage>145</fpage><lpage>147</lpage><pub-id pub-id-type="doi">10.1097/00005053-199303000-00001</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schuck</surname><given-names>A</given-names></name><name name-style="western"><surname>Calati</surname><given-names>R</given-names></name><name name-style="western"><surname>Barzilay</surname><given-names>S</given-names></name><name name-style="western"><surname>Bloch-Elkouby</surname><given-names>S</given-names></name><name name-style="western"><surname>Galynker</surname><given-names>I</given-names></name></person-group><article-title>Suicide crisis syndrome: a review of supporting evidence for a new suicide-specific diagnosis</article-title><source>Behav Sci Law</source><year>2019</year><month>05</month><volume>37</volume><issue>3</issue><fpage>223</fpage><lpage>239</lpage><pub-id pub-id-type="doi">10.1002/bsl.2397</pub-id><pub-id pub-id-type="medline">30900347</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kotov</surname><given-names>R</given-names></name><name name-style="western"><surname>Krueger</surname><given-names>RF</given-names></name><name name-style="western"><surname>Watson</surname><given-names>D</given-names></name><etal/></person-group><article-title>The Hierarchical Taxonomy of Psychopathology (HiTOP): a quantitative nosology based on consensus of evidence</article-title><source>Annu Rev Clin Psychol</source><year>2021</year><month>05</month><day>7</day><volume>17</volume><fpage>83</fpage><lpage>108</lpage><pub-id pub-id-type="doi">10.1146/annurev-clinpsy-081219-093304</pub-id><pub-id pub-id-type="medline">33577350</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Munn-Chernoff</surname><given-names>MA</given-names></name><name name-style="western"><surname>Johnson</surname><given-names>EC</given-names></name><name name-style="western"><surname>Chou</surname><given-names>YL</given-names></name><etal/></person-group><article-title>Shared genetic risk between eating disorder- and substance-use-related phenotypes: evidence from genome-wide association studies</article-title><source>Addict Biol</source><year>2021</year><month>01</month><volume>26</volume><issue>1</issue><fpage>e12880</fpage><pub-id pub-id-type="doi">10.1111/adb.12880</pub-id><pub-id pub-id-type="medline">32064741</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tucker</surname><given-names>RP</given-names></name><name name-style="western"><surname>Michaels</surname><given-names>MS</given-names></name><name name-style="western"><surname>Rogers</surname><given-names>ML</given-names></name><name name-style="western"><surname>Wingate</surname><given-names>LR</given-names></name><name name-style="western"><surname>Joiner</surname><given-names>TE Jr</given-names></name></person-group><article-title>Construct validity of a proposed new diagnostic entity: acute suicidal affective disturbance (ASAD)</article-title><source>J Affect Disord</source><year>2016</year><month>01</month><day>1</day><volume>189</volume><fpage>365</fpage><lpage>378</lpage><pub-id pub-id-type="doi">10.1016/j.jad.2015.07.049</pub-id><pub-id pub-id-type="medline">26476421</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Paris</surname><given-names>J</given-names></name></person-group><article-title>Suicidality in borderline personality disorder</article-title><source>Medicina</source><year>2019</year><month>05</month><day>28</day><volume>55</volume><issue>6</issue><fpage>223</fpage><pub-id pub-id-type="doi">10.3390/medicina55060223</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xu</surname><given-names>YE</given-names></name><name name-style="western"><surname>Barron</surname><given-names>DA</given-names></name><name name-style="western"><surname>Sudol</surname><given-names>K</given-names></name><name name-style="western"><surname>Zisook</surname><given-names>S</given-names></name><name name-style="western"><surname>Oquendo</surname><given-names>MA</given-names></name></person-group><article-title>Suicidal behavior across a broad range of psychiatric disorders</article-title><source>Mol Psychiatry</source><year>2023</year><month>07</month><volume>28</volume><issue>7</issue><fpage>2764</fpage><lpage>2810</lpage><pub-id pub-id-type="doi">10.1038/s41380-022-01935-7</pub-id><pub-id pub-id-type="medline">36653675</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Scheunemann</surname><given-names>J</given-names></name><name name-style="western"><surname>K&#x00FC;hn</surname><given-names>S</given-names></name><name name-style="western"><surname>Biedermann</surname><given-names>SV</given-names></name><etal/></person-group><article-title>Implicit cognitions on self-injurious and suicidal behavior in borderline personality disorder</article-title><source>J Behav Ther Exp Psychiatry</source><year>2023</year><month>06</month><volume>79</volume><fpage>101836</fpage><pub-id pub-id-type="doi">10.1016/j.jbtep.2023.101836</pub-id><pub-id pub-id-type="medline">36709601</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Toma</surname><given-names>S</given-names></name><name name-style="western"><surname>Sinyor</surname><given-names>M</given-names></name><name name-style="western"><surname>Mitchell</surname><given-names>RHB</given-names></name><name name-style="western"><surname>Schaffer</surname><given-names>A</given-names></name></person-group><article-title>Transdiagnostic suicidality in depression: more similar than different</article-title><source>Acta Psychiatr Scand</source><year>2023</year><month>09</month><volume>148</volume><issue>3</issue><fpage>219</fpage><lpage>221</lpage><pub-id pub-id-type="doi">10.1111/acps.13600</pub-id><pub-id pub-id-type="medline">37605854</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Watling</surname><given-names>D</given-names></name><name name-style="western"><surname>Preece</surname><given-names>M</given-names></name><name name-style="western"><surname>Hawgood</surname><given-names>J</given-names></name><name name-style="western"><surname>Bloomfield</surname><given-names>S</given-names></name><name name-style="western"><surname>K&#x00F5;lves</surname><given-names>K</given-names></name></person-group><article-title>Developing an intervention for suicide prevention: a rapid review of lived experience involvement</article-title><source>Arch Suicide Res</source><year>2022</year><volume>26</volume><issue>2</issue><fpage>465</fpage><lpage>480</lpage><pub-id pub-id-type="doi">10.1080/13811118.2020.1833799</pub-id><pub-id pub-id-type="medline">33073734</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Proportion of posts from a given subreddit &#x201C;closest&#x201D; to the r/SuicideWatch centroid.</p><media xlink:href="mental_v11i1e57234_app1.pdf" xlink:title="PDF File, 13 KB"/></supplementary-material></app-group></back></article>