<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Ment Health</journal-id>
      <journal-title>JMIR Mental Health</journal-title>
      <issn pub-type="epub">2368-7959</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i12e39747</article-id>
      <article-id pub-id-type="pmid">36583932</article-id>
      <article-id pub-id-type="doi">10.2196/39747</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Cross-Platform Detection of Psychiatric Hospitalization via Social Media Data: Comparison Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Torous</surname>
            <given-names>John</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zulueta</surname>
            <given-names>John</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Hudon</surname>
            <given-names>Alexandre</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Rekhi</surname>
            <given-names>Gurpreet</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Nguyen</surname>
            <given-names>Viet Cuong</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>School of Interactive Computing</institution>
            <institution>Georgia Institute of Technology</institution>
            <addr-line>756 W Peachtree St NW</addr-line>
            <addr-line>Atlanta, GA, 30318</addr-line>
            <country>United States</country>
            <phone>1 404 279 2941</phone>
            <email>johnny.nguyen@gatech.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8504-9350</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>Nathaniel</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9695-2249</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Kane</surname>
            <given-names>John M</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2628-9442</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Birnbaum</surname>
            <given-names>Michael L</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4285-7868</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>De Choudhury</surname>
            <given-names>Munmun</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8939-264X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>School of Interactive Computing</institution>
        <institution>Georgia Institute of Technology</institution>
        <addr-line>Atlanta, GA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Psychiatry</institution>
        <institution>The Zucker Hillside Hospital</institution>
        <institution>Northwell Health</institution>
        <addr-line>Glen Oaks, NY</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>The Feinstein Institute for Medical Research</institution>
        <institution>Northwell Health</institution>
        <addr-line>Manhasset, NY</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>The Donald and Barbara Zucker School of Medicine at Hofstra/Northwell</institution>
        <addr-line>Hempstead, NY</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Viet Cuong Nguyen <email>johnny.nguyen@gatech.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>12</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>30</day>
        <month>12</month>
        <year>2022</year>
      </pub-date>
      <volume>9</volume>
      <issue>12</issue>
      <elocation-id>e39747</elocation-id>
      <history>
        <date date-type="received">
          <day>31</day>
          <month>5</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>17</day>
          <month>7</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>6</day>
          <month>10</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>28</day>
          <month>10</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Viet Cuong Nguyen, Nathaniel Lu, John M Kane, Michael L Birnbaum, Munmun De Choudhury. Originally published in JMIR Mental Health (https://mental.jmir.org), 30.12.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Mental Health, is properly cited. The complete bibliographic information, a link to the original publication on https://mental.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://mental.jmir.org/2022/12/e39747" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Previous research has shown the feasibility of using machine learning models trained on social media data from a single platform (eg, Facebook or Twitter) to distinguish individuals either with a diagnosis of mental illness or experiencing an adverse outcome from healthy controls. However, the performance of such models on data from novel social media platforms unseen in the training data (eg, Instagram and TikTok) has not been investigated in previous literature.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>Our study examined the feasibility of building machine learning classifiers that can effectively predict an upcoming psychiatric hospitalization given social media data from platforms unseen in the classifiers’ training data despite the preliminary evidence on identity fragmentation on the investigated social media platforms.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Windowed timeline data of patients with a diagnosis of schizophrenia spectrum disorder before a known hospitalization event and healthy controls were gathered from 3 platforms: Facebook (254/268, 94.8% of participants), Twitter (51/268, 19% of participants), and Instagram (134/268, 50% of participants). We then used a 3 × 3 combinatorial binary classification design to train machine learning classifiers and evaluate their performance on testing data from all available platforms. We further compared results from models in intraplatform experiments (ie, training and testing data belonging to the same platform) to those from models in interplatform experiments (ie, training and testing data belonging to different platforms). Finally, we used Shapley Additive Explanation values to extract the top predictive features to explain and compare the underlying constructs that predict hospitalization on each platform.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We found that models in intraplatform experiments on average achieved an <italic>F</italic><sub>1</sub>-score of 0.72 (SD 0.07) in predicting a psychiatric hospitalization because of schizophrenia spectrum disorder, which is 68% higher than the average of models in interplatform experiments at an <italic>F</italic><sub>1</sub>-score of 0.428 (SD 0.11). When investigating the key drivers for divergence in construct validities between models, an analysis of top features for the intraplatform models showed both low predictive feature overlap between the platforms and low pairwise rank correlation (&#60;0.1) between the platforms’ top feature rankings. Furthermore, low average cosine similarity of data between platforms within participants in comparison with the same measurement on data within platforms between participants points to evidence of identity fragmentation of participants between platforms.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We demonstrated that models built on one platform’s data to predict critical mental health treatment outcomes such as hospitalization do not generalize to another platform. In our case, this is because different social media platforms consistently reflect different segments of participants’ identities. With the changing ecosystem of social media use among different demographic groups and as web-based identities continue to become fragmented across platforms, further research on holistic approaches to harnessing these diverse data sources is required.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>schizophrenia</kwd>
        <kwd>mental health</kwd>
        <kwd>machine learning</kwd>
        <kwd>clinical informatics</kwd>
        <kwd>social media</kwd>
        <kwd>mobile phone</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Despite its relatively low prevalence compared with other mental health disorders, the burden of schizophrenia spectrum disorder (SSD) on patients, families, and society is substantial [<xref ref-type="bibr" rid="ref1">1</xref>]. To mitigate the burden of SSD, early diagnosis and treatment are crucial. However, psychotic disorders, including SSD, often receive delayed attention and care, resulting in worse health outcomes [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. At the same time, the use of social media is high among patients with serious psychotic disorders such as SSD, especially among adolescents and young adults, when SSD typically emerges [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. For instance, Birnbaum et al [<xref ref-type="bibr" rid="ref4">4</xref>] studied social media use among adolescents and young adults with psychotic and mood disorders and found that 97.5% of participants (mean age 18.3 years) regularly used social media, spending approximately 2.6 (SD 2.5) hours per day on the web. Similarly, Miller et al [<xref ref-type="bibr" rid="ref5">5</xref>] studied the use of digital technologies among patients diagnosed with SSD and found that, among participants with access to the internet, 98% reported using at least one social media service and 57% used social media daily.</p>
        <p>Given this information, there has been an established body of research on using social media data to identify and predict psychiatric outcomes of social media users with SSD using machine learning classifiers [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref8">8</xref>]. The most robust data sources available to train these classifiers consist of textual content posted on the web. Prior work in speech and text analysis among patients with SSD has identified reliable linguistic markers associated with SSD, which have been successfully used as features for the aforementioned classifiers [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. These include certain word frequencies, word categories, and self-referential pronouns [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. Given that the use of image- and video-based social media platforms such as Instagram, Snapchat, and TikTok is associated with youths, there has also been prior work in the analysis of images comparing between patients with SSD and healthy controls [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Hänsel et al [<xref ref-type="bibr" rid="ref14">14</xref>] identified additional image markers associated with SSD, such as the image’s colorfulness and saturation and the average number of faces per image. By exploiting these markers, previous research conducted by Birnbaum et al [<xref ref-type="bibr" rid="ref15">15</xref>] and Ernala et al [<xref ref-type="bibr" rid="ref8">8</xref>] built classifiers to distinguish between users with a confirmed diagnosis of SSD and healthy controls on Facebook and Twitter with area under the receiver operating characteristic curve (AUROC) scores of 0.75 and 0.82, respectively.</p>
        <p>Although such results demonstrate the potential of automated techniques in predicting the mental health outcomes of individuals with SSD via social media data, many research gaps remain that need to be addressed before psychiatrists can reliably deploy such techniques for clinical purposes. Most prior work in this area primarily focused on a single source of social media data, either exclusively from Twitter or Facebook, for downstream classification and analysis tasks [<xref ref-type="bibr" rid="ref16">16</xref>]. However, previous research has also shown that many social media users, especially youths, use different social media platforms for different purposes because of their variety in affordances and culture. Among youths, Facebook use is associated with keeping up with close and distant friends, whereas Instagram and Snapchat use is associated with self-expression and gratification [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. In addition, researchers have argued that social media users have fragmented identities across platforms [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. Therefore, using a single source of social media data to build psychiatric hospitalization prediction models may potentially lead to low-sensitivity prediction models, making them unsuitable for clinical purposes. However, few studies have quantified the extent to which classifiers trained on data from one social media platform are generalizable to other platforms. To this end, our study aimed to measure the generalizability of social media–based classifiers aimed at predicting upcoming psychiatric hospitalizations to data from unseen social media platforms. In addition, we aimed to surface any evidence of the differing fragmented identities that are reflected on 3 popular social media platforms—Twitter, Facebook, and Instagram—that might affect the models’ generalizability.</p>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>The research question we attempted to answer was as follows: given the preliminary evidence of fragmented identities that are reflected on the investigated social media platforms, can we build classifiers that can effectively detect users at risk of an upcoming psychiatric hospitalization using social media data from platforms unseen in the training data?</p>
        <p>To answer our research question, we collated textual and image content (if available) from consenting participants’ social media data from Facebook, Twitter, and Instagram. We then trained platform-specific classifiers to distinguish between social media data from healthy controls and data from patients with SSD with an upcoming psychiatric hospitalization. We compared the performance of classifiers on testing data between seen and unseen social media platforms from the training data. We also compared and analyzed the top predictive features and the feature importance distributions between the 3 platform-specific classifiers, with a view toward finding potential empirical evidence for fragmented identities between the various social media platforms.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Recruitment</title>
        <p>We recruited participants clinically diagnosed with SSD and clinically verified healthy controls aged between 15 and 35 years. These data were collected as part of a broader research initiative involving the authors of this paper to identify technology-based health information to provide early identification, intervention, and treatment for young adults with SSD [<xref ref-type="bibr" rid="ref6">6</xref>].</p>
        <p>For participants with SSD aged between 15 and 35 years (141/268, 52.6%), diagnoses were based on clinical assessment of the most recent episode and were extracted from participants’ medical records at the time of their consent. Participants in this group were recruited from the Northwell Health Zucker Hillside Hospital and collaborating institutions located in East Lansing, Michigan. Participants were excluded if they had an IQ of &#60;70 (per clinical assessment), autism spectrum disorder, or substance-induced psychotic disorder.</p>
        <p>In addition, healthy volunteers aged between 15 and 35 years (127/268, 47.4%) were approached and recruited from an existing database of eligible individuals who had already been screened for previous research projects at Zucker Hillside Hospital and had agreed to be recontacted for additional research opportunities. Healthy status was determined by either the Structured Clinical Interview for the Diagnostic and Statistical Manual of Mental Disorders conducted within the past 2 years or the Psychiatric Diagnostic Screening Questionnaire [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. Participants were excluded if clinically significant psychiatric symptoms were identified during the screening process. Additional healthy volunteers were recruited from a southeastern university via a web-based student community research recruitment site. Finally, healthy volunteers were also recruited from the collaborating institutions located in East Lansing, Michigan.</p>
      </sec>
      <sec>
        <title>Data Collection</title>
        <p>All consenting participants were asked to download and share their Facebook, Twitter, and Instagram data archives. We collected all linguistic content from participants’ Facebook and Twitter archives (ie, status updates and comments on Facebook and posts shared on Twitter). In addition, we collected image content from participants’ Facebook and Instagram archives, including profile pictures and story photos.</p>
        <p>Next, we also collected the medical history of each participant (following consent and adoption of Health Insurance Portability and Accountability Act–compliant policies). This included primary and secondary diagnosis codes, the total number of hospitalizations, and admission and discharge dates for each hospitalization event. Hospitalization data were collected from the medical records at the time of consent. As all consented patient participants in the study had also received care at the Zucker Hillside Hospital, the medical records at the hospital were accurate and up to date to the best of the hospital’s efforts. We only counted psychiatric hospitalizations (not hospitalizations for other nonpsychiatric reasons). Thereafter, the study team accessed the corresponding consented patients’ medical records to extract all their recorded hospitalization events in a similar manner to previous studies using this source of data [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref23">23</xref>].</p>
        <p>Finally, we collected social media data from all available platforms for each participant with at least one known hospitalization event within a 6-month window before the latest hospitalization event, ensuring that there were no hospitalization events within these 6 months. This was done to ensure that the data gathered were representative of the participants’ healthy mental status before symptomatic exacerbation and subsequent hospitalization. A 6-month period, which we refer to as the <italic>windowed data</italic>, was selected as it represents an interval of time long enough to identify changes signaling symptomatic exacerbation while also containing sufficient data required to train machine learning models. For healthy control participants without any hospitalizations, we randomly sampled a nonempty 6-month window of social media data for each available social media platform (nonempty meaning that there was at least some social media activity). <xref rid="figure1" ref-type="fig">Figure 1</xref> provides a visual description of the windowing process.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Diagram representing the windowing process used to gather participants’ social media data before hospitalization events. Bold text represents the selected data windows. Crosses represent hospitalization events. The X represents invalid data windows. A: Windowing—with hospitalizations; B: Windowing—without hospitalizations.</p>
          </caption>
          <graphic xlink:href="mental_v9i12e39747_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Feature Engineering</title>
        <p>To encode participants’ social media data for the downstream classification and analysis tasks outlined in our research objectives, we identified and extracted the following categories of features from these data for all 3 investigated social media platforms: (1) n-gram language features (n=500), (2) Linguistic Inquiry and Word Count (n=78), (3) lexico-semantic features (n=3), (4) activity features (n=9), and (5) image features (n=23; Instagram and Facebook only).</p>
        <p>The specific feature categories were chosen based on relevant previous literature, particularly relating to the use of social media data to infer mental health attributes and psychiatric outcomes [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. Note that all features were computed at the individual participant level. More details about this process can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref29">29</xref>].</p>
      </sec>
      <sec>
        <title>Feature Selection</title>
        <p>Using the aforementioned features, for each of the 3 examined social media platforms, we encoded available participants’ textual and image data on Facebook and Instagram into 613-dimensional feature vectors and textual data on Twitter into 590-dimensional feature vectors. This yielded a Facebook data set of dimension 254 × 613, a Twitter data set of dimension 51 × 590, and an Instagram data set of dimension 134 × 613. We shall refer to these data sets as F, T, and I for Facebook, Twitter, and Instagram, respectively.</p>
        <p>As the feature set might contain features that are noisy and irrelevant, the classification models may be unstable and produce suboptimal results [<xref ref-type="bibr" rid="ref30">30</xref>]. To maximize the predictive power of the models while also reducing the redundancy and computational resources needed to train them, feature selection methods were used [<xref ref-type="bibr" rid="ref30">30</xref>]. More specifically, we adopted the ANOVA <italic>F</italic> test to rank the features based on their <italic>F</italic> statistic under the test, which has been shown to produce optimal feature sets in previous research on the classification of social media data belonging to patients with SSD [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <p>We trained a random forest model, with 5-fold stratified cross-validation to fine-tune hyperparameters, on data sets F, T, and I with an 80:20 train-test split, using only the top <italic>k</italic> percent of features based on the ranking given by the ANOVA <italic>F</italic> test on the classification, where <italic>k</italic> is between 10 and 100 in increments of 10. Via an examination of the evaluation metrics on the test sets (described in the Classification Algorithms and Metrics section), we determined that using only the top 20% of the features (based on their <italic>F</italic> statistic under the ANOVA <italic>F</italic> test) yielded the best results on unseen data across all 3 platforms. We will be using this subset of features moving forward.</p>
      </sec>
      <sec>
        <title>Combinatorial Classification Methods</title>
        <p>To answer the research question laid out in the Introduction section, we adopted a 3 × 3 combinatorial classification design, where we trained and tested machine learning models on the psychiatric hospitalization prediction task using all possible pairs of training and testing data sets. <xref rid="figure2" ref-type="fig">Figure 2</xref> provides a visual description of our experimental design. For intraplatform experiments (where the training and testing data came from the same platform; eg, training and testing on Facebook data), we trained and tested the models on an 80 to 20 train-test label-stratified split based on the Scikit-learn <italic>train_test_split()</italic> function (version 0.24.1) [<xref ref-type="bibr" rid="ref31">31</xref>]. For interplatform experiments (where the training and testing data came from different platforms; eg, training on Facebook data and testing on Instagram data), we trained the model on the entirety of the training data set and evaluated it on the entirety of the testing data set.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Diagram representing the classification experiments performed and their nature within the 3 × 3 combinatorial design.</p>
          </caption>
          <graphic xlink:href="mental_v9i12e39747_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Classification Algorithms and Metrics</title>
        <p>For both intra- and interplatform experiments, training data represented by the top 20% of features (as described in the Feature Selection section) were fed into a model to learn the classification task. We tried training the model over several algorithms, including random forest, logistic regression, support vector machine, and multilayer perceptron [<xref ref-type="bibr" rid="ref32">32</xref>]. We selected these algorithms as they represented a variety of different types of learning algorithms [<xref ref-type="bibr" rid="ref32">32</xref>]. This ensured that our analysis of performance differences between intra- and interplatform experiments would hold irrespective of the learning algorithm selection. We used the Scikit-learn implementation (version 0.24.1) for all the aforementioned algorithms [<xref ref-type="bibr" rid="ref31">31</xref>]. For each algorithm, we fine-tuned its hyperparameters using 5-fold stratified cross-validation via the Scikit-learn <italic>GridSearchCV()</italic> pipeline, retaining the best hyperparameters per algorithm for analysis [<xref ref-type="bibr" rid="ref31">31</xref>]. The chosen hyperparameters for each classification algorithm are provided in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref> (all other hyperparameters were left as default according to the Scikit-learn specification).</p>
        <p>We measured the performance of the models using the metrics outlined in <xref ref-type="boxed-text" rid="box2">Textbox 2</xref>, all of which are commonly used in binary classification models. In this case, we abbreviated the number of true positives, true negatives, false positives, and false negatives as TP, TN, FP, and FN, respectively [<xref ref-type="bibr" rid="ref33">33</xref>].</p>
        <boxed-text id="box1" position="float">
          <title>Hyperparameters chosen for each classification algorithm.</title>
          <p>
            <bold>Random forest</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>max_depth: 15</p>
            </list-item>
            <list-item>
              <p>n_estimators: 100</p>
            </list-item>
            <list-item>
              <p>max_features: none</p>
            </list-item>
          </list>
          <p>
            <bold>Logistic regression</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Penalty: l2</p>
            </list-item>
            <list-item>
              <p>C: 0.1</p>
            </list-item>
          </list>
          <p>
            <bold>Support vector machine</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Kernel: rbf</p>
            </list-item>
            <list-item>
              <p>C: 0.01</p>
            </list-item>
            <list-item>
              <p>Gamma: scale</p>
            </list-item>
          </list>
          <p>
            <bold>Multilayer perceptron</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Alpha: 0.0001</p>
            </list-item>
            <list-item>
              <p>Hidden_layer_sizes: (512, 256, 128)</p>
            </list-item>
          </list>
        </boxed-text>
        <boxed-text id="box2" position="float">
          <title>Metrics used to measure model performance.</title>
          <p>
            <bold>Accuracy</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Also known as Rand accuracy, the ratio of correct predictions to all predictions</p>
            </list-item>
            <list-item>
              <p>
                <disp-formula>
                  <graphic xlink:href="mental_v9i12e39747_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
                </disp-formula>
              </p>
            </list-item>
          </list>
          <p>
            <bold>Precision</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>The ratio of correct positive predictions to the total number of positive predictions</p>
            </list-item>
            <list-item>
              <p>
                <disp-formula>
                  <graphic xlink:href="mental_v9i12e39747_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
                </disp-formula>
              </p>
            </list-item>
          </list>
          <p>
            <bold>Recall</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>The ratio of correct positive predictions to the total number of true positive instances</p>
            </list-item>
            <list-item>
              <p>
                <disp-formula>
                  <graphic xlink:href="mental_v9i12e39747_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
                </disp-formula>
              </p>
            </list-item>
          </list>
          <p>
            <bold><italic>F</italic><sub>1</sub>-score</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>The harmonic mean between precision and recall</p>
            </list-item>
            <list-item>
              <p>
                <disp-formula>
                  <graphic xlink:href="mental_v9i12e39747_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
                </disp-formula>
              </p>
            </list-item>
          </list>
          <p>
            <bold>Area under the receiver operating characteristic curve (AUROC)</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>The AUROC, which plots the false positive rate against the true positive rate and, in practice, is often estimated using the trapezoidal rule with the following formula:</p>
            </list-item>
            <list-item>
              <p>
                <disp-formula>
                  <graphic xlink:href="mental_v9i12e39747_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
                </disp-formula>
              </p>
            </list-item>
          </list>
        </boxed-text>
      </sec>
      <sec>
        <title>Feature Importance Selection</title>
        <p>We used Shapley Additive Explanations (SHAP) to examine how certain features affected our model’s decision to predict users with potential psychiatric hospitalization because of SSD given their social media data from the 3 inspected social media platforms. Our decision to use SHAP rather than other explainability methods stems from the fact that SHAP is not only model-agnostic but also the most theoretically sound explainability framework among the available options. This is because SHAP feature scores can be calculated for localized samples and for the entire global data set [<xref ref-type="bibr" rid="ref34">34</xref>]. SHAP is based on Shapley values, a game-theoretical concept that intuitively describes each feature’s contribution to the outcome after considering all possible combinations of features [<xref ref-type="bibr" rid="ref35">35</xref>].</p>
        <p>For each of the intraplatform experiments within the 3 × 3 combinatorial design and each machine learning model, we calculated the average SHAP values for each of the features (ie, their importance to the prediction) across all instances within the testing set. We then recorded the list of features sorted in descending order according to the average SHAP values measured by each model. In the case of models with native support for feature importance extraction, including random forest (Gini importance) and logistic regression (feature coefficients), we also calculated and recorded them in an equivalent manner to SHAP values.</p>
      </sec>
      <sec>
        <title>Robustness Checks</title>
        <p>To ensure that our findings regarding differences in model performance between models and between intra- and interplatform experiments still held when certain aspects of the training and testing data sets were made more ideal, we performed several robustness checks, which are described in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
      </sec>
      <sec>
        <title>Ethics Approval</title>
        <p>The study was approved by the institutional review board of Northwell Health (the coordinating institution) and the institutional review board of the participating partners (Georgia Tech approval H21403). Participants were recruited from June 23, 2016, to December 4, 2020. Written informed consent was obtained from adult participants and legal guardians of participants aged &#60;18 years. Assent was obtained from participating minors.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data Characteristics</title>
        <p>In total, 268 participants (mean age 24.73, SD 5.64 years; male: 127/268, 47.4%; SSD: 141/268, 52.6%) with nonempty windowed data for at least one platform were included. Of these 268 participants, 254 (94.8%; SSD: 133/254, 52.4%) had valid windowed Facebook data, 51 (19%; SSD: 7/51, 13.7%) had valid windowed Twitter data, and 134 (50%; SSD: 42/134, 31.3%) had valid windowed Instagram data. Among participants with valid data for more than one platform, 17.5% (47/268; SSD: 5/47, 10.6%) had valid data for both Facebook and Twitter, 14.2% (38/268; SSD: 4/38, 10.5%) had valid data for both Twitter and Instagram, and 44.4% (119/268; SSD: 34/119, 28.6%) had valid data for both Facebook and Instagram. Finally, 14.2% (38/268; SSD: 4/38, 10.5%) of participants had valid data for all 3 platforms. <xref ref-type="table" rid="table1">Table 1</xref> shows the demographic and clinical characteristics of these 268 participants. <xref ref-type="table" rid="table2">Table 2</xref> describes the summary statistics, including mean and median, for these windowed data for each of the 3 social media platforms grouped by clinical status (SSD vs control). <xref rid="figure3" ref-type="fig">Figure 3</xref> shows the distribution of available posts for participants in each of the 3 investigated platforms.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Demographic and clinical characteristics of the participants (N=268).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="470"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="180"/>
            <col width="0"/>
            <col width="170"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Characteristic</td>
                <td colspan="2">SSD<sup>a</sup> (n=141)</td>
                <td colspan="2">Control (n=127)</td>
                <td>Full sample</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">Age (years), mean (SD)</td>
                <td colspan="2">24.86 (5.49)</td>
                <td colspan="2">24.57 (5.82)</td>
                <td>24.73 (5.64)</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Sex, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td colspan="2">89 (63.1)</td>
                <td colspan="2">38 (29.9)</td>
                <td colspan="2">127 (47.4)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td colspan="2">52 (36.9)</td>
                <td colspan="2">89 (70.1)</td>
                <td colspan="2">141 (52.6)</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Race or ethnicity, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>African American or Black</td>
                <td colspan="2">64 (45.4)</td>
                <td colspan="2">19 (15)</td>
                <td colspan="2">83 (31)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Asian</td>
                <td colspan="2">20 (14.2)</td>
                <td colspan="2">23 (18.1)</td>
                <td colspan="2">43 (16)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>White</td>
                <td colspan="2">37 (26.2)</td>
                <td colspan="2">75 (59.1)</td>
                <td colspan="2">112 (41.8)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Mixed race or other</td>
                <td colspan="2">15 (10.6)</td>
                <td colspan="2">5 (3.9)</td>
                <td colspan="2">20 (7.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hispanic</td>
                <td colspan="2">5 (3.5)</td>
                <td colspan="2">4 (3.1)</td>
                <td colspan="2">9 (3.4)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Pacific Islander</td>
                <td colspan="2">0 (0)</td>
                <td colspan="2">1 (0.8)</td>
                <td colspan="2">1 (0.4)</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Primary diagnosis, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Schizophrenia</td>
                <td colspan="2">67 (47.5)</td>
                <td colspan="2">N/A<sup>b</sup></td>
                <td colspan="2">67 (25)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Schizophreniform</td>
                <td colspan="2">26 (18.4)</td>
                <td colspan="2">N/A</td>
                <td colspan="2">26 (9.7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Schizoaffective</td>
                <td colspan="2">25 (17.7)</td>
                <td colspan="2">N/A</td>
                <td colspan="2">25 (9.3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Unspecified SSDs</td>
                <td colspan="2">23 (16.3)</td>
                <td colspan="2">N/A</td>
                <td colspan="2">23 (8.6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No diagnosis</td>
                <td colspan="2">N/A</td>
                <td colspan="2">127 (100)</td>
                <td colspan="2">127 (47.4)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>SSD: schizophrenia spectrum disorder.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Summary statistics for windowed data for both the control class and the schizophrenia spectrum disorder (SSD) class (ie, participants hospitalized with SSD). In this table, we consider data from Facebook, Twitter, and Instagram, as mentioned previously.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="190"/>
            <col width="120"/>
            <col width="120"/>
            <col width="0"/>
            <col width="120"/>
            <col width="120"/>
            <col width="0"/>
            <col width="120"/>
            <col width="210"/>
            <thead>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">Facebook (user: n=254; post: n=169,425)</td>
                <td colspan="3">Twitter (user: n=51; post: n=23,777)</td>
                <td colspan="2">Instagram (user: n=134; post: n=23,551)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SSD class</td>
                <td>Control class</td>
                <td colspan="2">SSD class</td>
                <td>Control class</td>
                <td colspan="2">SSD class</td>
                <td>Control class</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Total users, n (%)</td>
                <td>133 (52)</td>
                <td>121 (48)</td>
                <td colspan="2">7 (14)</td>
                <td>44 (86)</td>
                <td colspan="2">42 (31)</td>
                <td>92 (69)</td>
              </tr>
              <tr valign="top">
                <td>Total posts, n (%)</td>
                <td>114,793 (68)</td>
                <td>54,632 (32)</td>
                <td colspan="2">991 (4)</td>
                <td>22,786 (96)</td>
                <td colspan="2">7111 (30)</td>
                <td>16,440 (70)</td>
              </tr>
              <tr valign="top">
                <td>Posts, mean (SD)</td>
                <td>863.1 (2365.1)</td>
                <td>451.5 (818.87)</td>
                <td colspan="2">141.6 (255)</td>
                <td>519.9 (1166.9)</td>
                <td colspan="2">169.3 (445.4)</td>
                <td>178.7 (234.6)</td>
              </tr>
              <tr valign="top">
                <td>Posts, median</td>
                <td>260</td>
                <td>184</td>
                <td colspan="2">37</td>
                <td>138</td>
                <td colspan="2">54.5</td>
                <td>103</td>
              </tr>
              <tr valign="top">
                <td>Posts, range</td>
                <td>2-23,589</td>
                <td>1-4852</td>
                <td colspan="2">1-758</td>
                <td>1-7056</td>
                <td colspan="2">1-2909</td>
                <td>1-1328</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Cumulative distribution function (CDF) curves of users and their number of posts for the schizophrenia spectrum disorder and control classes per data set: (A) Facebook (left), (B) Twitter (center), and (C) Instagram (right).</p>
          </caption>
          <graphic xlink:href="mental_v9i12e39747_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Results of Combinatorial Classification</title>
        <p>We report the full results of the intraplatform experiments in <xref ref-type="table" rid="table3">Table 3</xref>. We also report the full results of the interplatform experiments in <xref ref-type="table" rid="table4">Tables 4</xref> to <xref ref-type="table" rid="table6">6</xref>. Finally, we report the receiver operating characteristic curves for the best-performing logistic regression model for the experiments from <xref ref-type="table" rid="table3">Tables 3</xref> to <xref ref-type="table" rid="table6">6</xref> in <xref rid="figure4" ref-type="fig">Figure 4</xref>.</p>
        <p>Elaborating on the results from <xref ref-type="table" rid="table3">Table 3</xref>, we found that, among the 4 classification algorithms that we used, the logistic regression model performed the best across the 3 intraplatform experiments, with the best performances for all of them. More elaborately, for the intraplatform experiments, performance reached its peak with the logistic regression model with an average <italic>F</italic><sub>1</sub>-score of 0.72 (SD 0.07), accuracy of 0.81 (SD 0.08), and AUROC of 0.749 (SD 0.06). In contrast, the worst-performing model (in this case, multilayer perceptron) achieved an average <italic>F</italic><sub>1</sub>-score of 0.521 (SD 0.19), accuracy of 0.714 (SD 0.19), and AUROC of 0.623 (SD 0.16) for the intraplatform experiments. Thus, we will be using the logistic regression model for further analysis regarding feature importance between platforms. These results align with previous research and, thus, could be considered a soft replication of those findings [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref15">15</xref>].</p>
        <p>By contrast, by aggregating the metrics for the interplatform experiments presented in <xref ref-type="table" rid="table4">Tables 4</xref> to <xref ref-type="table" rid="table6">6</xref>, the average <italic>F</italic><sub>1</sub>-score decreased to 0.428 (SD 0.11), accuracy decreased to 0.559 (SD 0.06), and AUROC decreased to 0.533 (SD 0.03) for the logistic regression model. This constitutes, on average, a drop of 40%, 31.4%, and 28.8% in <italic>F</italic><sub>1</sub>-score, accuracy, and AUROC score, respectively, from the intraplatform experiments. As just demonstrated, when comparing the effectiveness of models between intraplatform and interplatform experiments, we found a consistent drop in performance for all the investigated social media platforms. The drop in test <italic>F</italic><sub>1</sub>-score, given the best-performing logistic regression model, was the most drastic for Facebook at 0.364 (46%) and least drastic for Twitter at 0.08 (14%), averaging a drop of 0.285 (40%, SD 0.13) going from 0.713 for intraplatform experiments to 0.428 for interplatform experiments. Such trends hold even when disparities in data set size and dual-platform data availability (as described in the Methods section under Robustness Checks) are applied to the training and testing data (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Classification results for all intraplatform classification experiments. In this table, for instance, Facebook indicates the Facebook-Facebook experiment.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="80"/>
            <col width="60"/>
            <col width="60"/>
            <col width="60"/>
            <col width="60"/>
            <col width="80"/>
            <col width="0"/>
            <col width="50"/>
            <col width="60"/>
            <col width="60"/>
            <col width="60"/>
            <col width="70"/>
            <col width="0"/>
            <col width="50"/>
            <col width="60"/>
            <col width="60"/>
            <col width="60"/>
            <col width="70"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td colspan="6">Facebook</td>
                <td colspan="6">Twitter</td>
                <td colspan="5">Instagram</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Acc<sup>a</sup></td>
                <td>P<sup>b</sup></td>
                <td>R<sup>c</sup></td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
                <td>AUROC<sup>d</sup></td>
                <td colspan="2">Acc</td>
                <td>P</td>
                <td>R</td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
                <td>AUROC</td>
                <td colspan="2">Acc</td>
                <td>P</td>
                <td>R</td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
                <td>AUROC</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Random forest</td>
                <td>0.739</td>
                <td>0.739</td>
                <td>0.738</td>
                <td>0.738</td>
                <td>0.709</td>
                <td colspan="2">0.745</td>
                <td>0.150</td>
                <td>0.116</td>
                <td>0.116</td>
                <td>0.494</td>
                <td colspan="2">0.7</td>
                <td>0.648</td>
                <td>0.637</td>
                <td>0.637</td>
                <td>0.681</td>
              </tr>
              <tr valign="top">
                <td>SVM<sup>e</sup></td>
                <td>0.722</td>
                <td>0.747</td>
                <td>0.692</td>
                <td>0.715</td>
                <td>0.723</td>
                <td colspan="2">0.854</td>
                <td>0.541</td>
                <td>0.45</td>
                <td>0.463</td>
                <td>0.697</td>
                <td colspan="2">0.740</td>
                <td>0.737</td>
                <td>0.757</td>
                <td>0.743</td>
                <td>0.805</td>
              </tr>
              <tr valign="top">
                <td>MLP<sup>f</sup></td>
                <td>0.506</td>
                <td>0.406</td>
                <td>0.507</td>
                <td>0.367</td>
                <td>0.516</td>
                <td colspan="2">0.845</td>
                <td>0.458</td>
                <td>0.45</td>
                <td>0.426</td>
                <td>0.692</td>
                <td colspan="2">0.792</td>
                <td>0.771</td>
                <td>0.794</td>
                <td>0.77</td>
                <td>0.840</td>
              </tr>
              <tr valign="top">
                <td>Logistic regression</td>
                <td>0.759</td>
                <td>0.767</td>
                <td>0.758</td>
                <td>0.756</td>
                <td>0.727</td>
                <td colspan="2">0.881</td>
                <td>0.742</td>
                <td>0.6</td>
                <td>0.63</td>
                <td>0.772</td>
                <td colspan="2">0.792</td>
                <td>0.771</td>
                <td>0.801</td>
                <td>0.773</td>
                <td>0.848</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>Acc: accuracy.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>P: precision.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>R: recall.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>AUROC: area under the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table3fn6">
              <p><sup>f</sup>MLP: multilayer perceptron.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Classification results for the interplatform classification experiments for Facebook training data.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="220"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="100"/>
            <col width="0"/>
            <col width="70"/>
            <col width="70"/>
            <col width="100"/>
            <col width="70"/>
            <col width="90"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td colspan="6">Twitter</td>
                <td colspan="5">Instagram</td>
              </tr>
              <tr valign="bottom">
                <td>
                  <break/>
                </td>
                <td>Acc<sup>a</sup></td>
                <td>P<sup>b</sup></td>
                <td>R<sup>c</sup></td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
                <td>AUROC<sup>d</sup></td>
                <td colspan="2">Acc</td>
                <td>P</td>
                <td>R</td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
                <td>AUROC</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Random forest</td>
                <td>0.392</td>
                <td>0.221</td>
                <td>0.88</td>
                <td>0.354</td>
                <td>0.579</td>
                <td colspan="2">0.379</td>
                <td>0.328</td>
                <td>0.952</td>
                <td>0.488</td>
                <td>0.537</td>
              </tr>
              <tr valign="top">
                <td>SVM<sup>e</sup></td>
                <td>0.545</td>
                <td>0.253</td>
                <td>0.72</td>
                <td>0.373</td>
                <td>0.612</td>
                <td colspan="2">0.432</td>
                <td>0.337</td>
                <td>0.860</td>
                <td>0.483</td>
                <td>0.550</td>
              </tr>
              <tr valign="top">
                <td>MLP<sup>f</sup></td>
                <td>0.587</td>
                <td>0.240</td>
                <td>0.55</td>
                <td>0.334</td>
                <td>0.573</td>
                <td colspan="2">0.435</td>
                <td>0.332</td>
                <td>0.812</td>
                <td>0.471</td>
                <td>0.539</td>
              </tr>
              <tr valign="top">
                <td>Logistic regression</td>
                <td>0.628</td>
                <td>0.246</td>
                <td>0.47</td>
                <td>0.323</td>
                <td>0.567</td>
                <td colspan="2">0.472</td>
                <td>0.344</td>
                <td>0.775</td>
                <td>0.476</td>
                <td>0.555</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>Acc: accuracy.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>P: precision.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>R: recall.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>AUROC: area under the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table4fn5">
              <p><sup>e</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table4fn6">
              <p><sup>f</sup>MLP: multilayer perceptron.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Classification results for the interplatform classification experiments for Twitter training data.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="220"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="100"/>
            <col width="0"/>
            <col width="70"/>
            <col width="70"/>
            <col width="90"/>
            <col width="70"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td colspan="6">Facebook</td>
                <td colspan="5">Instagram</td>
              </tr>
              <tr valign="bottom">
                <td>
                  <break/>
                </td>
                <td>Acc<sup>a</sup></td>
                <td>P<sup>b</sup></td>
                <td>R<sup>c</sup></td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
                <td>AUROC<sup>d</sup></td>
                <td colspan="2">Acc</td>
                <td>P</td>
                <td>R</td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
                <td>AUROC</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Random forest</td>
                <td>0.531</td>
                <td>0.569</td>
                <td>0.378</td>
                <td>0.452</td>
                <td>0.536</td>
                <td colspan="2">0.628</td>
                <td>0.331</td>
                <td>0.207</td>
                <td>0.252</td>
                <td>0.512</td>
              </tr>
              <tr valign="top">
                <td>SVM<sup>e</sup></td>
                <td>0.514</td>
                <td>0.53</td>
                <td>0.537</td>
                <td>0.530</td>
                <td>0.513</td>
                <td colspan="2">0.563</td>
                <td>0.340</td>
                <td>0.42</td>
                <td>0.373</td>
                <td>0.523</td>
              </tr>
              <tr valign="top">
                <td>MLP<sup>f</sup></td>
                <td>0.533</td>
                <td>0.561</td>
                <td>0.440</td>
                <td>0.492</td>
                <td>0.536</td>
                <td colspan="2">0.557</td>
                <td>0.325</td>
                <td>0.395</td>
                <td>0.356</td>
                <td>0.512</td>
              </tr>
              <tr valign="top">
                <td>Logistic regression</td>
                <td>0.534</td>
                <td>0.552</td>
                <td>0.522</td>
                <td>0.535</td>
                <td>0.535</td>
                <td colspan="2">0.578</td>
                <td>0.362</td>
                <td>0.47</td>
                <td>0.408</td>
                <td>0.548</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>Acc: accuracy.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>P: precision.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>R: recall.</p>
            </fn>
            <fn id="table5fn4">
              <p><sup>d</sup>AUROC: area under the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table5fn5">
              <p><sup>e</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table5fn6">
              <p><sup>f</sup>MLP: multilayer perceptron.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Classification results for the interplatform classification experiments for Instagram training data.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="100"/>
            <col width="0"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="90"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td colspan="6">Facebook</td>
                <td colspan="5">Twitter</td>
              </tr>
              <tr valign="bottom">
                <td>
                  <break/>
                </td>
                <td>Acc<sup>a</sup></td>
                <td>P<sup>b</sup></td>
                <td>R<sup>c</sup></td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
                <td>AUROC<sup>d</sup></td>
                <td colspan="2">Acc</td>
                <td>P</td>
                <td>R</td>
                <td>
                  <italic>F</italic>
                  <sub>1</sub>
                </td>
                <td>AUROC</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Random forest</td>
                <td>0.51</td>
                <td>0.523</td>
                <td>0.612</td>
                <td>0.563</td>
                <td>0.507</td>
                <td colspan="2">0.751</td>
                <td>0.369</td>
                <td>0.42</td>
                <td>0.386</td>
                <td>0.624</td>
              </tr>
              <tr valign="top">
                <td>SVM<sup>e</sup></td>
                <td>0.524</td>
                <td>0.544</td>
                <td>0.51</td>
                <td>0.524</td>
                <td>0.525</td>
                <td colspan="2">0.691</td>
                <td>0.213</td>
                <td>0.25</td>
                <td>0.229</td>
                <td>0.521</td>
              </tr>
              <tr valign="top">
                <td>MLP<sup>f</sup></td>
                <td>0.554</td>
                <td>0.584</td>
                <td>0.48</td>
                <td>0.526</td>
                <td>0.557</td>
                <td colspan="2">0.683</td>
                <td>0.201</td>
                <td>0.23</td>
                <td>0.214</td>
                <td>0.51</td>
              </tr>
              <tr valign="top">
                <td>Logistic regression</td>
                <td>0.516</td>
                <td>0.524</td>
                <td>0.689</td>
                <td>0.595</td>
                <td>0.51</td>
                <td colspan="2">0.628</td>
                <td>0.256</td>
                <td>0.52</td>
                <td>0.342</td>
                <td>0.587</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>Acc: accuracy.</p>
            </fn>
            <fn id="table6fn2">
              <p><sup>b</sup>P: precision.</p>
            </fn>
            <fn id="table6fn3">
              <p><sup>c</sup>R: recall.</p>
            </fn>
            <fn id="table6fn4">
              <p><sup>d</sup>AUROC: area under the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table6fn5">
              <p><sup>e</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table6fn6">
              <p><sup>f</sup>MLP: multilayer perceptron.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Receiver operating characteristic (ROC) curves for the classification experiments given the best logistic regression model. (A), (B), and (C) are curves for the Facebook, Twitter, and Instagram intraplatform results, respectively, from <xref ref-type="table" rid="table3">Table 3</xref>. (D) and (E) are the ROC curves for the interplatform experiments from <xref ref-type="table" rid="table4">Table 4</xref>, where Facebook was used as the training data.</p>
          </caption>
          <graphic xlink:href="mental_v9i12e39747_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Feature Importance Analysis</title>
        <p>We hypothesized that the decrease in performance from intraplatform experiments to interplatform experiments, as presented previously, was driven by differences in feature importance learned by models when trained on data from different social media platforms (even when they shared the same feature set). By extracting the list of SHAP features from the models per the method described previously, we found support for this hypothesis. Specifically, we observed little overlap between them across platforms among the top 25 features for each model and platform (when holding the model constant). On average, there were only 4.66 overlapping features for the same logistic regression classification model across platforms (the best-performing model based on the previous discussions). In addition, we found that the lists of feature importance for each of the platforms, based on the logistic regression model, had very weak rank correlation pairwise. Fully elaborating on the statistical results for the Kendall rank correlation coefficient, we found very weak rank correlations between the ranked lists of feature importance for Facebook and Twitter (τ<sub>b</sub>=0.081; <italic>P</italic>=.003), Facebook and Instagram (τ<sub>b</sub>=0.041; <italic>P</italic>=.01), and Twitter and Instagram (τ<sub>b</sub>=0.055; <italic>P</italic>=.05). We report the average SHAP values and logistic regression coefficients of the top 10 features based on their SHAP values, along with their average value in the SSD class and the control class, in <xref ref-type="table" rid="table7">Table 7</xref>.</p>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>Top 10 features for the logistic regression (LR) model for each of the platforms (Linguistic Inquiry and Word Count features are italicized) based on their Shapley Additive Explanations (SHAP) values.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="180"/>
            <col width="200"/>
            <col width="0"/>
            <col width="100"/>
            <col width="0"/>
            <col width="110"/>
            <col width="0"/>
            <col width="180"/>
            <col width="0"/>
            <col width="200"/>
            <thead>
              <tr valign="bottom">
                <td colspan="2">Platform and feature acronym</td>
                <td colspan="2">Feature description</td>
                <td colspan="2">SHAP value</td>
                <td colspan="2">LR coefficient</td>
                <td colspan="2">SSD<sup>a</sup> group average (SD)</td>
                <td>Control group average (SD)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="11">
                  <bold>Facebook</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Avg_post_readability</td>
                <td>Average post readability, as measured using the SMOG<sup>b</sup> index</td>
                <td colspan="2">0.761</td>
                <td colspan="2">−0.268</td>
                <td colspan="2">5.6341 (2.74)</td>
                <td colspan="2">6.8048 (1.92)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Quant</italic>
                </td>
                <td>Ratio of words within the “quantifiers” category</td>
                <td colspan="2">0.4195</td>
                <td colspan="2">−0.189</td>
                <td colspan="2">0.0012 (0.0012)</td>
                <td colspan="2">0.0016 (0.0012)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Negemo</italic>
                </td>
                <td>Ratio of words within the “negative emotions” category</td>
                <td colspan="2">0.0953</td>
                <td colspan="2">0.244</td>
                <td colspan="2">0.0043 (0.0035)</td>
                <td colspan="2">0.0031 (0.0022)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Money</italic>
                </td>
                <td>Ratio of words within the “money” category</td>
                <td colspan="2">0.0739</td>
                <td colspan="2">−0.216</td>
                <td colspan="2">0.0007 (0.001)</td>
                <td colspan="2">0.0011 (0.002)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Swear</italic>
                </td>
                <td>Ratio of words within the “swear” category</td>
                <td colspan="2">0.0628</td>
                <td colspan="2">0.236</td>
                <td colspan="2">0.0017 (0.0025)</td>
                <td colspan="2">0.0007 (0.001)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio_octile8</td>
                <td>Ratio of activities from 9 PM to midnight</td>
                <td colspan="2">0.0443</td>
                <td colspan="2">0.077</td>
                <td colspan="2">0.1443 (0.149)</td>
                <td colspan="2">0.1241 (0.158)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio_octile7</td>
                <td>Ratio of activities from 6 PM to 9 PM</td>
                <td colspan="2">0.0409</td>
                <td colspan="2">0.177</td>
                <td colspan="2">0.1561 (0.1745)</td>
                <td colspan="2">0.1054 (0.125)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Anger</italic>
                </td>
                <td>Ratio of words within the “anger” category</td>
                <td colspan="2">0.0095</td>
                <td colspan="2">0.191</td>
                <td colspan="2">0.0018 (0.002)</td>
                <td colspan="2">0.0009 (0.001)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Dream</td>
                <td>Ratio of “dream” within the overall bag of words</td>
                <td colspan="2">0.0077</td>
                <td colspan="2">0.224</td>
                <td colspan="2">0.2028 (0.468)</td>
                <td colspan="2">0.0746 (0.24)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Fun</td>
                <td>Ratio of “fun” within the overall bag of words</td>
                <td colspan="2">0.0043</td>
                <td colspan="2">−0.209</td>
                <td colspan="2">0.5722 (1.19)</td>
                <td colspan="2">1.1315 (1.76)</td>
              </tr>
              <tr valign="top">
                <td colspan="11">
                  <bold>Twitter</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Conj</italic>
                </td>
                <td>Ratio of words within the “conjunctions” category</td>
                <td colspan="2">0.2319</td>
                <td colspan="2">−0.063</td>
                <td colspan="2">0.0001 (0.0002)</td>
                <td colspan="2">0.0003 (0.0004)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Adj</italic>
                </td>
                <td>Ratio of words within the “adjectives” category</td>
                <td colspan="2">0.1825</td>
                <td colspan="2">−0.05</td>
                <td colspan="2">0.0057 (0.004)</td>
                <td colspan="2">0.0080 (0.005)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Avg_post_negativity</td>
                <td>Average post negativity, as calculated using the VADER<sup>c</sup> library</td>
                <td colspan="2">0.1509</td>
                <td colspan="2">0.082</td>
                <td colspan="2">0.071 (0.042)</td>
                <td colspan="2">0.0519 (0.036)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Male</italic>
                </td>
                <td>Ratio of words within the “male” category</td>
                <td colspan="2">0.1355</td>
                <td colspan="2">0.039</td>
                <td colspan="2">0.0011 (0.0013)</td>
                <td colspan="2">0.0007 (0.001)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ratio_octile_8</td>
                <td>Ratio of activities from 9 PM to midnight</td>
                <td colspan="2">0.1265</td>
                <td colspan="2">0.045</td>
                <td colspan="2">0.0231 (0.356)</td>
                <td colspan="2">0.1227 (0.188)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Ingest</italic>
                </td>
                <td>Ratio of words within the “ingest” category</td>
                <td colspan="2">0.0627</td>
                <td colspan="2">−0.056</td>
                <td colspan="2">0.0003 (0.0007)</td>
                <td colspan="2">0.0014 (0.0018)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Insight</italic>
                </td>
                <td>Ratio of words within the “insight” category</td>
                <td colspan="2">0.0516</td>
                <td colspan="2">0.053</td>
                <td colspan="2">0.0044 (0.004)</td>
                <td colspan="2">0.0035 (0.003)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Power</italic>
                </td>
                <td>Ratio of words within the “power” category</td>
                <td colspan="2">0.0308</td>
                <td colspan="2">−0.058</td>
                <td colspan="2">0.0024 (0.0026)</td>
                <td colspan="2">0.0042 (0.0036)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>We</italic>
                </td>
                <td>Ratio of words within the “we” category</td>
                <td colspan="2">0.0196</td>
                <td colspan="2">−0.056</td>
                <td colspan="2">0.0001 (0.0002)</td>
                <td colspan="2">0.0002 (0.0004)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Prep</italic>
                </td>
                <td>Ratio of words within the “prepositions” category</td>
                <td colspan="2">0.0117</td>
                <td colspan="2">0.063</td>
                <td colspan="2">0.0028 (0.0026)</td>
                <td colspan="2">0.0017 (0.0017)</td>
              </tr>
              <tr valign="top">
                <td colspan="11">
                  <bold>Instagram</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Avg_post_readability</td>
                <td>Average post readability, as measured using the SMOG index</td>
                <td colspan="2">0.761</td>
                <td colspan="2">−0.203</td>
                <td colspan="2">5.1018 (1.15)</td>
                <td colspan="2">6.2564 (1.638)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Space</italic>
                </td>
                <td>Ratio of words within the “space” category</td>
                <td colspan="2">0.733</td>
                <td colspan="2">−0.147</td>
                <td colspan="2">0.0031 (0.0025)</td>
                <td colspan="2">0.0042 (0.0025)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Affiliation</italic>
                </td>
                <td>Ratio of words within the “affiliation” category</td>
                <td colspan="2">0.6839</td>
                <td colspan="2">−0.181</td>
                <td colspan="2">0.0032 (0.0027)</td>
                <td colspan="2">0.0056 (0.0034)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Friend</italic>
                </td>
                <td>Ratio of words within the “friend” category</td>
                <td colspan="2">0.5336</td>
                <td colspan="2">−0.159</td>
                <td colspan="2">0.0009 (0.0027)</td>
                <td colspan="2">0.0018 (0.0034)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Female</italic>
                </td>
                <td>Ratio of words within the “female” category</td>
                <td colspan="2">0.4576</td>
                <td colspan="2">−0.168</td>
                <td colspan="2">0.0008 (0.001)</td>
                <td colspan="2">0.0019 (0.0023)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Sad</italic>
                </td>
                <td>Ratio of words within the “sad” category</td>
                <td colspan="2">0.4554</td>
                <td colspan="2">0.113</td>
                <td colspan="2">0.0011 (0.0008)</td>
                <td colspan="2">0.0007 (0.0012)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Quant</italic>
                </td>
                <td>Ratio of words within the “quantifier” category</td>
                <td colspan="2">0.4195</td>
                <td colspan="2">−0.118</td>
                <td colspan="2">0.0012 (0.0013)</td>
                <td colspan="2">0.0019 (0.0016)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Away</td>
                <td>Ratio of “away” within the overall bag of words</td>
                <td colspan="2">0.4064</td>
                <td colspan="2">−0.105</td>
                <td colspan="2">0.0768 (0.276)</td>
                <td colspan="2">0.2505 (0.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>Assent</italic>
                </td>
                <td>Ratio of words within the “assent” category</td>
                <td colspan="2">0.3913</td>
                <td colspan="2">−0.102</td>
                <td colspan="2">0.0008 (0.0012)</td>
                <td colspan="2">0.0013 (0.0014)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Next</td>
                <td>Ratio of “next” within the overall bag of words</td>
                <td colspan="2">0.3854</td>
                <td colspan="2">−0.12</td>
                <td colspan="2">0.0957 (0.267)</td>
                <td colspan="2">0.6466 (1.236)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table7fn1">
              <p><sup>a</sup>SSD: schizophrenia spectrum disorder.</p>
            </fn>
            <fn id="table7fn2">
              <p><sup>b</sup>SMOG: Simple Measure of Gobbledygook.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Attributing Divergent Construct Validity of Models to Divergent Identities on the Web</title>
        <p>What could explain the observed differences in construct validities of the intraplatform models? Early in this paper, we posited that these differences might stem from people’s identities being fragmented across different platforms. To situate that these divergent identities are indeed the drivers behind differential cross-platform model construct validities and, by extension, performance, we adopted a strategy to measure the differences within the extracted feature space between the investigated platforms for a given participant. As social media data for participants on all platforms are encoded via feature vectors in this study, we calculated the pairwise similarity between platform-specific data using cosine similarity [<xref ref-type="bibr" rid="ref36">36</xref>]. More specifically, we calculated the average cosine similarity within participants between platforms and compared it with the average cosine similarity between participants within platforms for participants with SSD with data on all 3 platforms. Given that, even within the same social media platform, different people can have unique modes of expressing their identities, we used the latter as a baseline for assessing whether fragments of identities representing an individual across platforms diverge more or less than the divergence of identities between individuals.</p>
        <p>We found that the average between-platform, within-participant cosine similarity was 0.3093 for Facebook-Twitter, 0.2304 for Facebook-Instagram, and 0.3905 for Twitter-Instagram. This was either lower than or similar to the average within-platform, between-participant cosine similarity for the investigated platforms: 0.5072 for Facebook, 0.5427 for Twitter, and 0.373 for Instagram. The same trend holds even when calculating the averages using data from both participants with SSD and healthy controls with data from all 3 platforms.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Our study aimed to measure the ability (or inability) of mental health classifiers to generalize across platforms and surface evidence of fragmented identities on social media among patients with SSD. Overall, we found that, across the board, models trained on data from social media platforms have poor generalizability when evaluated on data from other social media platforms even when holding the feature set constant across training and testing data. This trend holds true even in the 2 robustness tests, where the same participants and data set size were used in the training and testing data (as described in the Methods section). This trend is also true even when the training data come from a platform with high data availability and the testing data come from a platform with low data availability. For instance, the best <italic>F</italic><sub>1</sub>-score of the intraplatform models for Twitter (0.63) was 0.257 (69%) higher compared with the best <italic>F</italic><sub>1</sub>-score of the interplatform models for Twitter, where the training data came from Facebook (0.373).</p>
        <p>Next, we discuss the findings regarding feature importance in more detail. First, looking at the theoretical validity of the top 10 features per platform and interpretation of the sign of the features’ logistic regression coefficient, we found alignment with previous literature and evidence of clinical meaningfulness [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. For instance, given the positive coefficient from the trained logistic regression model presented in <xref ref-type="table" rid="table7">Table 7</xref>, higher levels of use of lexicon indicative of negative emotions are highly predictive of SSD for Facebook (see the example post in <xref ref-type="boxed-text" rid="box3">Textbox 3</xref> highlighting words such as “fear,” “fail,” and “hurts”). This confirms literature noting that a reduced ability to feel or express pleasure (anhedonia) is common in patients with SSD [<xref ref-type="bibr" rid="ref37">37</xref>]. Similarly, previous research has found anger-related terms commonly appearing in social media posts before the onset of early psychosis as well as preceding a psychiatric hospitalization [<xref ref-type="bibr" rid="ref38">38</xref>]. This may explain why higher levels of use of lexicon indicative of the Linguistic Inquiry and Word Count category <italic>Anger</italic> are also highly predictive of SSD for Facebook (example post in <xref ref-type="boxed-text" rid="box3">Textbox 3</xref> containing <italic>Anger</italic> words such as “shit” and “fucking”). Finally, words and phrases such as those in the Linguistic Inquiry and Word Count <italic>Sad</italic> category (eg, “useless,” “sorry,” and “sob”) point to typical negative symptoms of SSD [<xref ref-type="bibr" rid="ref39">39</xref>]. They can be indicative of a decreased sense of purpose and a seeming lack of interest in the world [<xref ref-type="bibr" rid="ref39">39</xref>]. Models trained on Instagram successfully picked up such cues from the posts, where higher use of such vocabulary was indicative of an impending psychiatric hospitalization because of SSD.</p>
        <p>That said, each model corresponding to each platform seemed to pick up contrasting signals from its respective training data, which is why we note the low overlap in the aforementioned top SHAP features. Among the few that overlap in the top 10 features reported previously, we found “avg_post_readability” to be picked up as a highly predictive feature by both Facebook and Instagram models, whereas “ratio_octile8” was selected by both Facebook and Twitter models. In our case, “avg_post_readability” is calculated using the Simple Measure of Gobbledygook index, which approximates the years of education needed to fully comprehend a piece of written text. The negative logistic regression coefficient and the averages of the SSD and control groups for this feature suggest that texts written by patients with SSD are simpler in nature, which is indicative of language dysfunction. This is a known negative symptom of schizophrenia and related psychotic disorders, as observed in prior work [<xref ref-type="bibr" rid="ref40">40</xref>]. In addition, higher levels of late-night activity such as web or social media use, captured in the “ratio_octile8” feature, have been known to be associated with deteriorated mental health [<xref ref-type="bibr" rid="ref41">41</xref>]. Finally, we found significant divergence in the distribution of feature importance between the platforms, as indicated by the low pairwise Kendall τ (&#60;0.1) for the platforms’ feature importance rankings. These qualitative and quantitative results broadly imply that the models were being trained on considerably different data sources with differing content and contexts of use, which likely contributed to poor cross-platform model generalization.</p>
        <p>At the crux of these differences, we found that the models had inherently different construct validity across platforms. Data on each platform reflect only a segment of an individual’s identity—a segment that may be absent in another platform. The fragmentation of one’s identity on social media can be most clearly seen among participants with data on all 3 platforms. In the analysis presented at the end of the Results section, we found low average pairwise cosine similarities within participants between platforms, especially when comparing with cosine similarities of different participants within the same platform. This indicates that, even within the same feature space for the same participant, social media data between platforms are likely to diverge into multiple distinct directions mapping to these fragments of identities. This divergence is at least equal to, if not even greater than, the divergence in identity presentation between different individuals within the same social media platform. Therefore, when models trained on data from one platform learn this specific fragment of identity, they are less effective on testing data that capture a different identity.</p>
        <boxed-text id="box3" position="float">
          <title>Example (paraphrased and deidentified) posts representative of example top features to distinguish between schizophrenia spectrum disorder and control classes. Words indicative of the features are italicized.</title>
          <p>
            <bold>NegEmo</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>I <italic>fear</italic> to try and <italic>fail</italic>, because i don’t want to be part of the STATISTIC of people that <italic>failed</italic>. It <italic>hurts</italic> when the opportunity passes by though.’</p>
            </list-item>
          </list>
          <p>
            <bold>Swear</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Omfg the Damn <italic>mf</italic> #struggle to stay the <italic>fking</italic> sleep I’m like <italic>wtf</italic> this isn’t fair I hate my Damn neck hurting like this <italic>shit</italic> isn’t cool this pain waking me up every Damn hr</p>
            </list-item>
          </list>
          <p>
            <bold>Sad</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Im a <italic>useless sorry sob</italic></p>
            </list-item>
          </list>
          <p>
            <bold>Anger</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Yo stay <italic>tf</italic> out my room unless we <italic>fucking</italic> cause I’m tired too tired for this <italic>shit</italic> <inline-graphic xlink:href="mental_v9i12e39747_fig10.png" xlink:type="simple" mimetype="image"/> and all my <italic>shit</italic> better be where i left it</p>
            </list-item>
          </list>
        </boxed-text>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>Our findings provide replicative validity to several threads in previous research. Specifically, we found that the performance of models trained on social media data with clinically verified labels (ie, SSD or control) is consistent with similar models presented in previous research, including those trained on similar patient populations and clinical sites [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. Furthermore, linguistic differences reflecting serious mental health conditions between social media platforms found in our work have also been elucidated upon in previous work. For instance, Guntuku et al [<xref ref-type="bibr" rid="ref42">42</xref>] found that there is little overlap between words indicative of stress on Twitter and Facebook. In addition, our findings regarding the low performance of models for interplatform tasks compared with intraplatform tasks follow a similar vein to those of the study by Ernala et al [<xref ref-type="bibr" rid="ref8">8</xref>]. In their study, they found that, despite the overwhelming advantage in data availability, models trained on social media data with self-reported labels significantly underperformed models trained on social media data with clinically verified labels when evaluated on clinical testing data [<xref ref-type="bibr" rid="ref8">8</xref>]. Similar to our experiments, such a difference in performance in the study by Ernala et al [<xref ref-type="bibr" rid="ref8">8</xref>] was also noted to be caused by a mismatch in important features learned by the different models to differentiate between language and activity patterns deployed by patients with SSD and healthy controls. Overall, our analysis combined with previous results suggests that construct validities of predictive models trained on data from different social media platforms are dissimilar, reinforcing the need for continued exploration of novel social media–based early identification strategies with a special emphasis on uniting distinct fragments of identities for accurate identification and intervention.</p>
      </sec>
      <sec>
        <title>Clinical Implications</title>
        <p>Our findings have important implications for mental health research and practice. Hospitalization prediction for psychiatric illnesses by harnessing digital trace data has been of significant interest in recent years. These previous studies have explored the utility of smartphone sensor data (ie, geolocation, physical activity, phone use, and speech), wearables, and social media activity to predict symptom fluctuations as well as understand the diagnostic process and hospitalization identification [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref43">43</xref>-<xref ref-type="bibr" rid="ref46">46</xref>]. Our work extends this body of research by critically examining how machine learning efforts that harness data from single sources may not be readily applicable to support hospitalization prediction in contexts where the same source of data is not present. For these models to be usable in the real world, we advocate for a comprehensive approach in which clinicians look to patterns gleaned through the integration of different data sources while augmenting their decision-making with objective measures derived from digital trace data. Social media data are also increasingly becoming a part of consultations [<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref48">48</xref>]. Therefore, we suggest that clinicians consider both acknowledging and incorporating collateral information spanning multiple platforms into the way they monitor symptomatic exacerbation in their patients and modify treatment to prevent further hospitalizations.</p>
        <p>Finally, digital interventions that are touted to be powered by social media data should consider the significant aspect of fragmented web-based identities of patients [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref50">50</xref>]. To intervene at the right time, at the right place, and for the right person, a comprehensive approach to understanding a patient’s context for hospitalization prediction would be beneficial. However, we recognize that, in a domain as sensitive as mental health, combining data sources may further complicate the privacy and ethical risks to those who contribute their data—research has shown that information integration can enable the discovery of otherwise latent attributes, some of which may present grave feelings of discomfort and violation in individuals [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref52">52</xref>]. Therefore, we urge caution and call for new standards to protect the confidentiality and rights of this sensitive population and ensure that the enabled technologies are used in the service of positive outcomes for the patients.</p>
      </sec>
      <sec>
        <title>Limitations and Future Work</title>
        <p>Our work has some limitations that could be addressed in future research. First, despite the use of data augmentation techniques to rebalance the ratio between SSD data and control data for each data set and make the data set sizes of the 3 examined platforms (ie, Instagram, Twitter, and Facebook) comparable with each other, we acknowledge that a limited quantity of available data may have affected the observed classification performance. Although it is widely recognized that patient social media data are challenging to collect, as was the case in this study, future research may consider the potential of creating large benchmarked data sets that may support better reproducible research in this field [<xref ref-type="bibr" rid="ref53">53</xref>]. Second, we acknowledge the demographic dissimilarity between participants with SSD and healthy controls, which may be a confounding factor in our study design. Furthermore, our methods did not examine or extract any features concerning video data, which are available on Facebook and especially Instagram. Given that youths nowadays are increasingly expressing themselves on social media via videos (especially on video-centric platforms such as TikTok), future research should aim to fill these gaps so that we can ensure the completeness of one’s mental health records expressed on social media and other forms of networked communication. Along these lines, future research may also consider data from additional novel social media platforms that are increasingly being used by youths for their social goals, such as Snapchat and TikTok. Finally, it would be worthwhile to examine additional clinical questions such as suicidal risk to explore the extent to which identity fragmentation across social media platforms may affect the quality of inferences made from these data.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this study, we showed that it is challenging to build effective models for predicting future psychiatric hospitalizations of patients with SSD on new social media data from platforms previously unseen in the models’ training data. Specifically, we demonstrated that models built on one platform’s data do not generalize to another as each platform consistently reflects different segments of participants’ identities. This fragmentation of identity is empirically backed up by both significant differences in the construct validity of intraplatform classifiers and divergent feature vectors within participants between the 3 investigated social media platforms. To ensure the effective incorporation of digital technology into early psychosis intervention, especially in the prevention of relapse hospitalizations, further research must explore precisely how symptoms of mental illness manifest on the web through changing patterns of language and activity on various platforms as well as how comprehensive, ethical, and effective treatment and engagement strategies should be devised that function seamlessly across patients’ fragmented web-based identities.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Additional information on the feature selection process and robustness checks.</p>
        <media xlink:href="mental_v9i12e39747_app1.docx" xlink:title="DOCX File , 9 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUROC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">SHAP</term>
          <def>
            <p>Shapley Additive Explanations</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">SSD</term>
          <def>
            <p>schizophrenia spectrum disorder</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was partly funded by National Institute of Mental Health grant R01MH117172 (principal investigator: MDC; co–principal investigators: MLB and JMK). The research team acknowledges the assistance of Anna Van Meter and Asra Ali in the early phases of patient data collection. The authors also thank members of the Social Dynamics and Wellbeing Lab at Georgia Tech for their valuable feedback during the various phases of the study.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>MLB is a consultant for HearMe and Northshore Therapeutics. JMK is a consultant to or receives honoraria from Alkermes, Allergan, Boehringer-Ingelheim, Cerevel, Dainippon Sumitomo, H. Lundbeck, Indivior, Intracellular Therapies, Janssen Pharmaceutical, Johnson &#38; Johnson, LB Pharmaceuticals, Merck, Minerva, Neurocrine, Newron, Novartis, Otsuka, Roche, Saladax, Sunovion, Teva, HLS, and HealthRhythms and is a member of the advisory boards of Cerevel, Click Therapeutics, Teva, Newron, Sumitomo, Otsuka, Lundbeck, and Novartis. He has received grant support from Otsuka, Lundbeck, Sunovion, and Janssen and is a shareholder of Vanguard Research Group; LB Pharmaceuticals, Inc; and North Shore Therapeutics. The other authors have no conflicts of interest to declare.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wolthaus</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Dingemans</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Schene</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Linszen</surname>
              <given-names>DH</given-names>
            </name>
            <name name-style="western">
              <surname>Wiersma</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Van Den Bosch</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Cahn</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hijman</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Caregiver burden in recent-onset schizophrenia and spectrum disorders: the influence of symptoms and personality traits</article-title>
          <source>J Nerv Ment Dis</source>
          <year>2002</year>
          <month>04</month>
          <volume>190</volume>
          <issue>4</issue>
          <fpage>241</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1097/00005053-200204000-00005</pub-id>
          <pub-id pub-id-type="medline">11960085</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Birchwood</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Macmillan</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Early intervention in schizophrenia</article-title>
          <source>Aust N Z J Psychiatry</source>
          <year>1993</year>
          <month>09</month>
          <volume>27</volume>
          <issue>3</issue>
          <fpage>374</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.3109/00048679309075792</pub-id>
          <pub-id pub-id-type="medline">8250779</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lieberman</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Fenton</surname>
              <given-names>WS</given-names>
            </name>
          </person-group>
          <article-title>Delayed detection of psychosis: causes, consequences, and effect on public health</article-title>
          <source>Am J Psychiatry</source>
          <year>2000</year>
          <month>11</month>
          <volume>157</volume>
          <issue>11</issue>
          <fpage>1727</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.1176/appi.ajp.157.11.1727</pub-id>
          <pub-id pub-id-type="medline">11058464</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Birnbaum</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Rizvi</surname>
              <given-names>AF</given-names>
            </name>
            <name name-style="western">
              <surname>Confino</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Correll</surname>
              <given-names>CU</given-names>
            </name>
            <name name-style="western">
              <surname>Kane</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Role of social media and the internet in pathways to care for adolescents and young adults with psychotic disorders and non-psychotic mood disorders</article-title>
          <source>Early Interv Psychiatry</source>
          <year>2017</year>
          <month>08</month>
          <day>23</day>
          <volume>11</volume>
          <issue>4</issue>
          <fpage>290</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25808317"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/eip.12237</pub-id>
          <pub-id pub-id-type="medline">25808317</pub-id>
          <pub-id pub-id-type="pmcid">PMC4580496</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schrimsher</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Peeples</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Buckley</surname>
              <given-names>PF</given-names>
            </name>
          </person-group>
          <article-title>How connected are people with schizophrenia? Cell phone, computer, email, and social media use</article-title>
          <source>Psychiatry Res</source>
          <year>2015</year>
          <month>02</month>
          <day>28</day>
          <volume>225</volume>
          <issue>3</issue>
          <fpage>458</fpage>
          <lpage>63</lpage>
          <pub-id pub-id-type="doi">10.1016/j.psychres.2014.11.067</pub-id>
          <pub-id pub-id-type="medline">25563669</pub-id>
          <pub-id pub-id-type="pii">S0165-1781(14)00960-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Birnbaum</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Ernala</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Rizvi</surname>
              <given-names>AF</given-names>
            </name>
            <name name-style="western">
              <surname>Arenare</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>R Van Meter</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>De Choudhury</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kane</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Detecting relapse in youth with psychotic disorders utilizing patient-generated and patient-contributed digital data from Facebook</article-title>
          <source>NPJ Schizophr</source>
          <year>2019</year>
          <month>10</month>
          <day>07</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>17</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41537-019-0085-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41537-019-0085-9</pub-id>
          <pub-id pub-id-type="medline">31591400</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41537-019-0085-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC6779748</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mitchell</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hollingshead</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Coppersmith</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Quantifying the language of schizophrenia in social media</article-title>
          <source>Proceedings of the 2nd Workshop on Computational Linguistics and Clinical Psychology: From Linguistic Signal to Clinical Reality</source>
          <year>2015</year>
          <conf-name>2nd Workshop on Computational Linguistics and Clinical Psychology: From Linguistic Signal to Clinical Reality</conf-name>
          <conf-date>Jun 5, 2015</conf-date>
          <conf-loc>Denver, Colorado</conf-loc>
          <pub-id pub-id-type="doi">10.3115/v1/w15-1202</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ernala</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Birnbaum</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Candan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rizvi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sterling</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kane</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>De Choudhury</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Methodological gaps in predicting mental health states from social media: triangulating diagnostic signals</article-title>
          <source>Proceedings of the 2019 CHI Conference on Human Factors in Computing Systems</source>
          <year>2019</year>
          <conf-name>CHI '19: CHI Conference on Human Factors in Computing Systems</conf-name>
          <conf-date>May 4 - 9, 2019</conf-date>
          <conf-loc>Glasgow Scotland Uk</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3290605.3300364</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rekhi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ang</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Clinical determinants of social media use in individuals with schizophrenia</article-title>
          <source>PLoS One</source>
          <year>2019</year>
          <month>11</month>
          <day>20</day>
          <volume>14</volume>
          <issue>11</issue>
          <fpage>e0225370</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0225370"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0225370</pub-id>
          <pub-id pub-id-type="medline">31747434</pub-id>
          <pub-id pub-id-type="pii">PONE-D-19-06610</pub-id>
          <pub-id pub-id-type="pmcid">PMC6867641</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zomick</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Levitan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Serper</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Linguistic analysis of schizophrenia in Reddit posts</article-title>
          <source>Proceedings of the Sixth Workshop on Computational Linguistics and Clinical Psychology</source>
          <year>2019</year>
          <conf-name>Sixth Workshop on Computational Linguistics and Clinical Psychology</conf-name>
          <conf-date>Jun, 2019</conf-date>
          <conf-loc>Minneapolis, Minnesota</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/w19-3009</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Birnbaum</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Ernala</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Rizvi</surname>
              <given-names>AF</given-names>
            </name>
            <name name-style="western">
              <surname>De Choudhury</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kane</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>A collaborative approach to identifying social media markers of schizophrenia by employing machine learning and clinical appraisals</article-title>
          <source>J Med Internet Res</source>
          <year>2017</year>
          <month>08</month>
          <day>14</day>
          <volume>19</volume>
          <issue>8</issue>
          <fpage>e289</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2017/8/e289/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.7956</pub-id>
          <pub-id pub-id-type="medline">28807891</pub-id>
          <pub-id pub-id-type="pii">v19i8e289</pub-id>
          <pub-id pub-id-type="pmcid">PMC5575421</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ernala</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Rizvi</surname>
              <given-names>AF</given-names>
            </name>
            <name name-style="western">
              <surname>Birnbaum</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Kane</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>De Choudhury</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Linguistic markers indicating therapeutic outcomes of social media disclosures of schizophrenia</article-title>
          <source>Proc ACM Human Comput Interact</source>
          <year>2017</year>
          <month>12</month>
          <day>06</day>
          <volume>1</volume>
          <issue>CSCW</issue>
          <fpage>1</fpage>
          <lpage>27</lpage>
          <pub-id pub-id-type="doi">10.1145/3134678</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Auxier</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Social media use in 2021</article-title>
          <source>Pew Research Center</source>
          <year>2021</year>
          <month>4</month>
          <day>7</day>
          <access-date>2022-12-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/internet/2021/04/07/social-media-use-in-2021/">https://www.pewresearch.org/internet/2021/04/07/social-media-use-in-2021/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hänsel</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>IW</given-names>
            </name>
            <name name-style="western">
              <surname>Sobolev</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Muscat</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yum-Chan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>De Choudhury</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kane</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Birnbaum</surname>
              <given-names>ML</given-names>
            </name>
          </person-group>
          <article-title>Utilizing Instagram data to identify usage patterns associated with schizophrenia spectrum disorders</article-title>
          <source>Front Psychiatry</source>
          <year>2021</year>
          <month>8</month>
          <day>16</day>
          <volume>12</volume>
          <fpage>691327</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34483987"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpsyt.2021.691327</pub-id>
          <pub-id pub-id-type="medline">34483987</pub-id>
          <pub-id pub-id-type="pmcid">PMC8415353</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Birnbaum</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Norel</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Van Meter</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>AF</given-names>
            </name>
            <name name-style="western">
              <surname>Arenare</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Eyigoz</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Agurto</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Germano</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kane</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Cecchi</surname>
              <given-names>GA</given-names>
            </name>
          </person-group>
          <article-title>Identifying signals associated with psychiatric illness utilizing language and images posted to Facebook</article-title>
          <source>NPJ Schizophr</source>
          <year>2020</year>
          <month>12</month>
          <day>03</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>38</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41537-020-00125-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41537-020-00125-0</pub-id>
          <pub-id pub-id-type="medline">33273468</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41537-020-00125-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC7713057</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chancellor</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>De Choudhury</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Methods in predictive techniques for mental health status on social media: a critical review</article-title>
          <source>NPJ Digit Med</source>
          <year>2020</year>
          <volume>3</volume>
          <fpage>43</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-020-0233-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-020-0233-7</pub-id>
          <pub-id pub-id-type="medline">32219184</pub-id>
          <pub-id pub-id-type="pii">233</pub-id>
          <pub-id pub-id-type="pmcid">PMC7093465</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kircaburun</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Griffiths</surname>
              <given-names>MD</given-names>
            </name>
          </person-group>
          <article-title>Instagram addiction and the big five of personality: the mediating role of self-liking</article-title>
          <source>J Behav Addict</source>
          <year>2018</year>
          <month>03</month>
          <day>01</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>158</fpage>
          <lpage>70</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29461086"/>
          </comment>
          <pub-id pub-id-type="doi">10.1556/2006.7.2018.15</pub-id>
          <pub-id pub-id-type="medline">29461086</pub-id>
          <pub-id pub-id-type="pmcid">PMC6035031</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bayer</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Ellison</surname>
              <given-names>NB</given-names>
            </name>
            <name name-style="western">
              <surname>Schoenebeck</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Falk</surname>
              <given-names>EB</given-names>
            </name>
          </person-group>
          <article-title>Sharing the small moments: ephemeral social interaction on Snapchat</article-title>
          <source>Inform Commun Soc</source>
          <year>2015</year>
          <month>09</month>
          <day>18</day>
          <volume>19</volume>
          <issue>7</issue>
          <fpage>956</fpage>
          <lpage>77</lpage>
          <pub-id pub-id-type="doi">10.1080/1369118x.2015.1084349</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Purwaningtyas</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Alicya</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <article-title>The fragmented self: having multiple accounts in Instagram usage practice among Indonesian youth</article-title>
          <source>J Media dan Komunikasi Indonesia</source>
          <year>2020</year>
          <month>09</month>
          <day>24</day>
          <volume>1</volume>
          <issue>2</issue>
          <fpage>171</fpage>
          <pub-id pub-id-type="doi">10.22146/jmki.58459</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gündüz</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>The effect of social media on identity construction</article-title>
          <source>Mediterranean J Social Sci</source>
          <year>2017</year>
          <volume>8</volume>
          <issue>5</issue>
          <fpage>85</fpage>
          <lpage>92</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>First</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Spitzer</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Gibbon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Structured Clinical Interview for DSM-IV-TR Axis I Disorders, Research Version</source>
          <year>2002</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>Biometrics Research, New York State Psychiatric Institute</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zimmerman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mattia</surname>
              <given-names>JI</given-names>
            </name>
          </person-group>
          <article-title>A self-report scale to help make psychiatric diagnoses: the psychiatric diagnostic screening questionnaire</article-title>
          <source>Arch Gen Psychiatry</source>
          <year>2001</year>
          <month>08</month>
          <day>01</day>
          <volume>58</volume>
          <issue>8</issue>
          <fpage>787</fpage>
          <lpage>94</lpage>
          <pub-id pub-id-type="doi">10.1001/archpsyc.58.8.787</pub-id>
          <pub-id pub-id-type="medline">11483146</pub-id>
          <pub-id pub-id-type="pii">yoa20167</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ernala</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Kashiparekh</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Bolous</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kane</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Birnbaum</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>DE Choudhury</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A social media study on mental health status transitions surrounding psychiatric hospitalizations</article-title>
          <source>Proc ACM Hum Comput Interact</source>
          <year>2021</year>
          <month>04</month>
          <day>13</day>
          <volume>5</volume>
          <issue>CSCW1</issue>
          <fpage>1</fpage>
          <lpage>32</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36267476"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3449229</pub-id>
          <pub-id pub-id-type="medline">36267476</pub-id>
          <pub-id pub-id-type="pii">155</pub-id>
          <pub-id pub-id-type="pmcid">PMC9581345</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aizawa</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>An information-theoretic perspective of tf–idf measures</article-title>
          <source>Inform Process Manag</source>
          <year>2003</year>
          <month>1</month>
          <volume>39</volume>
          <issue>1</issue>
          <fpage>45</fpage>
          <lpage>65</lpage>
          <pub-id pub-id-type="doi">10.1016/s0306-4573(02)00021-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tausczik</surname>
              <given-names>YR</given-names>
            </name>
            <name name-style="western">
              <surname>Pennebaker</surname>
              <given-names>JW</given-names>
            </name>
          </person-group>
          <article-title>The psychological meaning of words: LIWC and computerized text analysis methods</article-title>
          <source>J Language Social Psychol</source>
          <year>2009</year>
          <month>12</month>
          <day>08</day>
          <volume>29</volume>
          <issue>1</issue>
          <fpage>24</fpage>
          <lpage>54</lpage>
          <pub-id pub-id-type="doi">10.1177/0261927x09351676</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mclaughlin</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>SMOG grading - a new readability formula</article-title>
          <source>J Reading</source>
          <year>1969</year>
          <volume>12</volume>
          <issue>8</issue>
          <fpage>639</fpage>
          <lpage>46</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://psycnet.apa.org/record/1969-14260-001"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hutto</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gilbert</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>VADER: a parsimonious rule-based model for sentiment analysis of social media text</article-title>
          <source>Proceedings of the International AAAI Conference on Web and Social Media</source>
          <year>2014</year>
          <conf-name>International AAAI Conference on Web and Social Media</conf-name>
          <conf-date>Jun 1–4, 2014</conf-date>
          <conf-loc>Ann Arbor, Michigan, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garimella</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Alfayad</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Weber</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Social media image analysis for public health</article-title>
          <source>Proceedings of the 2016 CHI Conference on Human Factors in Computing Systems</source>
          <year>2016</year>
          <conf-name>CHI'16: CHI Conference on Human Factors in Computing Systems</conf-name>
          <conf-date>May 7 - 12, 2016</conf-date>
          <conf-loc>San Jose California USA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2858036.2858234</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chawla</surname>
              <given-names>NV</given-names>
            </name>
            <name name-style="western">
              <surname>Bowyer</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>LO</given-names>
            </name>
            <name name-style="western">
              <surname>Kegelmeyer</surname>
              <given-names>WP</given-names>
            </name>
          </person-group>
          <article-title>SMOTE: synthetic minority over-sampling technique</article-title>
          <source>J Artif Intell Res</source>
          <year>2002</year>
          <month>06</month>
          <day>01</day>
          <volume>16</volume>
          <fpage>321</fpage>
          <lpage>57</lpage>
          <pub-id pub-id-type="doi">10.1613/jair.953</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guyon</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Elisseeff</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>An introduction to variable and feature selection</article-title>
          <source>J Mach Learn Res</source>
          <year>2003</year>
          <volume>3</volume>
          <fpage>1157</fpage>
          <lpage>82</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Blondel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prettenhofer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dubourg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vanderplas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Passos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cournapeau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brucher</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Perrot</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Duchesnay</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: machine learning in Python</article-title>
          <source>JMLR</source>
          <year>2011</year>
          <volume>12</volume>
          <issue>85</issue>
          <fpage>2825</fpage>
          <lpage>30</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hastie</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>The Elements of Statistical Learning Data Mining, Inference, and Prediction</source>
          <year>2009</year>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Powers</surname>
              <given-names>DM</given-names>
            </name>
          </person-group>
          <article-title>Evaluation: from precision, recall and F-measure to ROC, informedness, markedness and correlation</article-title>
          <source>Int J Mach Learn Technol</source>
          <year>2020</year>
          <fpage>37</fpage>
          <lpage>63</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lundberg</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>A unified approach to interpreting model predictions</article-title>
          <source>arXiv</source>
          <year>2017</year>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="book">
          <source>Handbook of Game Theory with Economic Applications Volume 2</source>
          <year>1992</year>
          <publisher-loc>Amsterdam, Netherlands</publisher-loc>
          <publisher-name>Elsevier</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jurafsky</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>JH</given-names>
            </name>
          </person-group>
          <source>Speech and Language Processing An Introduction to Natural Language Processing, Computational Linguistics, and Speech Recognition</source>
          <year>2000</year>
          <publisher-loc>Hoboken, New Jersey, United States</publisher-loc>
          <publisher-name>Pearson Prentice Hall</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kwapil</surname>
              <given-names>TR</given-names>
            </name>
          </person-group>
          <article-title>Social anhedonia as a predictor of the development of schizophrenia-spectrum disorders</article-title>
          <source>J Abnormal Psychol</source>
          <year>1998</year>
          <month>11</month>
          <volume>107</volume>
          <issue>4</issue>
          <fpage>558</fpage>
          <lpage>65</lpage>
          <pub-id pub-id-type="doi">10.1037/0021-843X.107.4.558</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ringer</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Lysaker</surname>
              <given-names>PH</given-names>
            </name>
          </person-group>
          <article-title>Anger expression styles in schizophrenia spectrum disorders: associations with anxiety, paranoia, emotion recognition, and trauma history</article-title>
          <source>J Nerv Ment Dis</source>
          <year>2014</year>
          <month>12</month>
          <volume>202</volume>
          <issue>12</issue>
          <fpage>853</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1097/NMD.0000000000000212</pub-id>
          <pub-id pub-id-type="medline">25386763</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chua</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Chong</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Subramaniam</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mahendran</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>The impact of emotion dysregulation on positive and negative symptoms in schizophrenia spectrum disorders: a systematic review</article-title>
          <source>J Clin Psychol</source>
          <year>2020</year>
          <month>04</month>
          <volume>76</volume>
          <issue>4</issue>
          <fpage>612</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.1002/jclp.22915</pub-id>
          <pub-id pub-id-type="medline">31909833</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuperberg</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Caplan</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Language dysfunction in schizophrenia</article-title>
          <source>Neuropsychiatry</source>
          <year>2003</year>
          <publisher-loc>Philadelphia</publisher-loc>
          <publisher-name>Lippincott Williams and Wilkins</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Palmese</surname>
              <given-names>LB</given-names>
            </name>
            <name name-style="western">
              <surname>DeGeorge</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Ratliff</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Srihari</surname>
              <given-names>VH</given-names>
            </name>
            <name name-style="western">
              <surname>Wexler</surname>
              <given-names>BE</given-names>
            </name>
            <name name-style="western">
              <surname>Krystal</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Tek</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Insomnia is frequent in schizophrenia and associated with night eating and obesity</article-title>
          <source>Schizophr Res</source>
          <year>2011</year>
          <month>12</month>
          <volume>133</volume>
          <issue>1-3</issue>
          <fpage>238</fpage>
          <lpage>43</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/21856129"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.schres.2011.07.030</pub-id>
          <pub-id pub-id-type="medline">21856129</pub-id>
          <pub-id pub-id-type="pii">S0920-9964(11)00428-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC5581545</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chandra Guntuku</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Buffone</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jaidka</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Eichstaedt</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>LH</given-names>
            </name>
          </person-group>
          <article-title>Understanding and measuring psychological stress using social media</article-title>
          <source>Proceedings of the International AAAI Conference on Web and Social Media</source>
          <year>2019</year>
          <conf-name>International AAAI Conference on Web and Social Media</conf-name>
          <conf-date>Jun 11-14, 2019</conf-date>
          <conf-loc>Munich, Germany</conf-loc>
          <pub-id pub-id-type="doi">10.1609/icwsm.v13i01.3223</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ben-Zeev</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Scherer</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Campbell</surname>
              <given-names>AT</given-names>
            </name>
          </person-group>
          <article-title>Next-generation psychiatric assessment: using smartphone sensors to monitor behavior and mental health</article-title>
          <source>Psychiatr Rehabil J</source>
          <year>2015</year>
          <month>09</month>
          <volume>38</volume>
          <issue>3</issue>
          <fpage>218</fpage>
          <lpage>26</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25844912"/>
          </comment>
          <pub-id pub-id-type="doi">10.1037/prj0000130</pub-id>
          <pub-id pub-id-type="medline">25844912</pub-id>
          <pub-id pub-id-type="pii">2015-14736-001</pub-id>
          <pub-id pub-id-type="pmcid">PMC4564327</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Birnbaum</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Kulkarni</surname>
              <given-names>P"</given-names>
            </name>
            <name name-style="western">
              <surname>Van Meter</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Rizvi</surname>
              <given-names>AF</given-names>
            </name>
            <name name-style="western">
              <surname>Arenare</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>De Choudhury</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kane</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Utilizing machine learning on internet search activity to support the diagnostic process and relapse detection in young individuals with early psychosis: feasibility study</article-title>
          <source>JMIR Ment Health</source>
          <year>2020</year>
          <month>09</month>
          <day>01</day>
          <volume>7</volume>
          <issue>9</issue>
          <fpage>e19348</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mental.jmir.org/2020/9/e19348/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19348</pub-id>
          <pub-id pub-id-type="medline">32870161</pub-id>
          <pub-id pub-id-type="pii">v7i9e19348</pub-id>
          <pub-id pub-id-type="pmcid">PMC7492982</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eisner</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bucci</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Berry</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Emsley</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Barrowclough</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Drake</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>Feasibility of using a smartphone app to assess early signs, basic symptoms and psychotic symptoms over six months: a preliminary report</article-title>
          <source>Schizophr Res</source>
          <year>2019</year>
          <month>06</month>
          <volume>208</volume>
          <fpage>105</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0920-9964(19)30126-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.schres.2019.04.003</pub-id>
          <pub-id pub-id-type="medline">30979665</pub-id>
          <pub-id pub-id-type="pii">S0920-9964(19)30126-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC6551369</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zulueta</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Piscitello</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rasic</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Easter</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Babu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Langenecker</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>McInnis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ajilore</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Leow</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Predicting mood disturbance severity with mobile phone keystroke metadata: a BiAffect digital phenotyping study</article-title>
          <source>J Med Internet Res</source>
          <year>2018</year>
          <month>07</month>
          <day>20</day>
          <volume>20</volume>
          <issue>7</issue>
          <fpage>e241</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2018/7/e241/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.9775</pub-id>
          <pub-id pub-id-type="medline">30030209</pub-id>
          <pub-id pub-id-type="pii">v20i7e241</pub-id>
          <pub-id pub-id-type="pmcid">PMC6076371</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fisher</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Appelbaum</surname>
              <given-names>PS</given-names>
            </name>
          </person-group>
          <article-title>Beyond googling: the ethics of using patients' electronic footprints in psychiatric practice</article-title>
          <source>Harv Rev Psychiatry</source>
          <year>2017</year>
          <volume>25</volume>
          <issue>4</issue>
          <fpage>170</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1097/hrp.0000000000000145</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rieger</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gaines</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Barnett</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Baldassano</surname>
              <given-names>CF</given-names>
            </name>
            <name name-style="western">
              <surname>Connolly Gibbons</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Crits-Christoph</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Psychiatry outpatients’ willingness to share social media posts and smartphone data for research and clinical purposes: survey study</article-title>
          <source>JMIR Form Res</source>
          <year>2019</year>
          <month>8</month>
          <day>29</day>
          <volume>3</volume>
          <issue>3</issue>
          <fpage>e14329</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://formative.jmir.org/2019/3/e14329/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/14329</pub-id>
          <pub-id pub-id-type="medline">31493326</pub-id>
          <pub-id pub-id-type="pii">v3i3e14329</pub-id>
          <pub-id pub-id-type="pmcid">PMC6754680</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yoo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Birnbaum</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Van Meter</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Arenare</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Abowd</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>De Choudhury</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Designing a clinician-facing tool for using insights from patients’ social media activity: iterative co-design approach</article-title>
          <source>JMIR Ment Health</source>
          <year>2020</year>
          <month>8</month>
          <day>12</day>
          <volume>7</volume>
          <issue>8</issue>
          <fpage>e16969</fpage>
          <pub-id pub-id-type="doi">10.2196/16969</pub-id>
          <pub-id pub-id-type="medline">32784180</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yoo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ernala</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Saket</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Weir</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Arenare</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Van Meter</surname>
              <given-names>Ar</given-names>
            </name>
            <name name-style="western">
              <surname>Birnbaum</surname>
              <given-names>Ml</given-names>
            </name>
            <name name-style="western">
              <surname>Abowd</surname>
              <given-names>Gd</given-names>
            </name>
            <name name-style="western">
              <surname>De Choudhury</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Clinician perspectives on using computational mental health insights from patients’ social media activities: design and qualitative evaluation of a prototype</article-title>
          <source>JMIR Ment Health</source>
          <year>2021</year>
          <month>11</month>
          <day>16</day>
          <volume>8</volume>
          <issue>11</issue>
          <fpage>e25455</fpage>
          <pub-id pub-id-type="doi">10.2196/25455</pub-id>
          <pub-id pub-id-type="medline">34783667</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Terrasse</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gorin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sisti</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Social media, e‐health, and medical ethics</article-title>
          <source>Hastings Center Report</source>
          <year>2019</year>
          <month>02</month>
          <day>21</day>
          <volume>49</volume>
          <issue>1</issue>
          <fpage>24</fpage>
          <lpage>33</lpage>
          <pub-id pub-id-type="doi">10.1002/hast.975</pub-id>
          <pub-id pub-id-type="medline">30790306</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thieme</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Belgrave</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sano</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Doherty</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Machine learning applications</article-title>
          <source>Interactions</source>
          <year>2020</year>
          <access-date>2022-11-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.scss.tcd.ie/gavin.doherty/papers/Interactions-MLinMH.pdf">https://www.scss.tcd.ie/gavin.doherty/papers/Interactions-MLinMH.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Househ</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Grainger</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Petersen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bamidis</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Merolli</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Balancing between privacy and patient needs for health information in the age of participatory health and social media: a scoping review</article-title>
          <source>Yearb Med Inform</source>
          <year>2018</year>
          <month>08</month>
          <volume>27</volume>
          <issue>1</issue>
          <fpage>29</fpage>
          <lpage>36</lpage>
          <pub-id pub-id-type="doi">10.1055/s-0038-1641197</pub-id>
          <pub-id pub-id-type="medline">29681040</pub-id>
          <pub-id pub-id-type="pmcid">PMC6115243</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
