<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMH</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Ment Health</journal-id>
      <journal-title>JMIR Mental Health</journal-title>
      <issn pub-type="epub">2368-7959</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v11i1e57306</article-id>
      <article-id pub-id-type="pmid">39042893</article-id>
      <article-id pub-id-type="doi">10.2196/57306</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Exploring the Efficacy of Large Language Models in Summarizing Mental Health Counseling Sessions: Benchmark Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Tal</surname>
            <given-names>Amir</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Shah</surname>
            <given-names>Habib</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Adhikary</surname>
            <given-names>Prottay Kumar</given-names>
          </name>
          <degrees>BTech</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3025-9721</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Srivastava</surname>
            <given-names>Aseem</given-names>
          </name>
          <degrees>BTech</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1239-0707</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Kumar</surname>
            <given-names>Shivani</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8047-8746</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Singh</surname>
            <given-names>Salam Michael</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2249-6081</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Manuja</surname>
            <given-names>Puneet</given-names>
          </name>
          <degrees>BTech, MBA</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0003-5969-9047</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Gopinath</surname>
            <given-names>Jini K</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0005-5646-735X</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Krishnan</surname>
            <given-names>Vijay</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4422-4036</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Gupta</surname>
            <given-names>Swati Kedia</given-names>
          </name>
          <degrees>MPhil, PhD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5895-4382</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Deb</surname>
            <given-names>Koushik Sinha</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9546-9036</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Chakraborty</surname>
            <given-names>Tanmoy</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Electrical Engineering</institution>
            <institution>Indian Institute of Technology Delhi</institution>
            <addr-line>IIT Delhi Main Road, IIT Campus, Hauz Khas</addr-line>
            <addr-line>New Delhi, 110016</addr-line>
            <country>India</country>
            <phone>91 26591076 ext 011</phone>
            <email>tanchak@iitd.ac.in</email>
          </address>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0210-0369</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Electrical Engineering</institution>
        <institution>Indian Institute of Technology Delhi</institution>
        <addr-line>New Delhi</addr-line>
        <country>India</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Computer Science &#38; Engineering</institution>
        <institution>Indraprastha Institute of Information Technology Delhi</institution>
        <addr-line>New Delhi</addr-line>
        <country>India</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>YourDOST</institution>
        <addr-line>Karnataka</addr-line>
        <country>India</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Psychiatry</institution>
        <institution>All India Institute of Medical Sciences</institution>
        <addr-line>Rishikesh</addr-line>
        <country>India</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Psychiatry</institution>
        <institution>All India Institute of Medical Sciences</institution>
        <addr-line>New Delhi</addr-line>
        <country>India</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Yardi School of Artificial Intelligence</institution>
        <institution>Indian Institute of Technology Delhi</institution>
        <addr-line>New Delhi</addr-line>
        <country>India</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Tanmoy Chakraborty <email>tanchak@iitd.ac.in</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>23</day>
        <month>7</month>
        <year>2024</year>
      </pub-date>
      <volume>11</volume>
      <elocation-id>e57306</elocation-id>
      <history>
        <date date-type="received">
          <day>12</day>
          <month>2</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>27</day>
          <month>4</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>21</day>
          <month>5</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>23</day>
          <month>5</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Prottay Kumar Adhikary, Aseem Srivastava, Shivani Kumar, Salam Michael Singh, Puneet Manuja, Jini K Gopinath, Vijay Krishnan, Swati Kedia Gupta, Koushik Sinha Deb, Tanmoy Chakraborty. Originally published in JMIR Mental Health (https://mental.jmir.org), 23.07.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Mental Health, is properly cited. The complete bibliographic information, a link to the original publication on https://mental.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://mental.jmir.org/2024/1/e57306" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Comprehensive session summaries enable effective continuity in mental health counseling, facilitating informed therapy planning. However, manual summarization presents a significant challenge, diverting experts’ attention from the core counseling process. Leveraging advances in automatic summarization to streamline the summarization process addresses this issue because this enables mental health professionals to access concise summaries of lengthy therapy sessions, thereby increasing their efficiency. However, existing approaches often overlook the nuanced intricacies inherent in counseling interactions.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study evaluates the effectiveness of state-of-the-art large language models (LLMs) in selectively summarizing various components of therapy sessions through aspect-based summarization, aiming to benchmark their performance.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We first created Mental Health Counseling-Component–Guided Dialogue Summaries, a benchmarking data set that consists of 191 counseling sessions with summaries focused on 3 distinct counseling components (also known as counseling aspects). Next, we assessed the capabilities of 11 state-of-the-art LLMs in addressing the task of counseling-component–guided summarization. The generated summaries were evaluated quantitatively using standard summarization metrics and verified qualitatively by mental health professionals.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Our findings demonstrated the superior performance of task-specific LLMs such as MentalLlama, Mistral, and MentalBART evaluated using standard quantitative metrics such as Recall-Oriented Understudy for Gisting Evaluation (ROUGE)-1, ROUGE-2, ROUGE-L, and Bidirectional Encoder Representations from Transformers Score across all aspects of the counseling components. Furthermore, expert evaluation revealed that Mistral superseded both MentalLlama and MentalBART across 6 parameters: affective attitude, burden, ethicality, coherence, opportunity costs, and perceived effectiveness. However, these models exhibit a common weakness in terms of room for improvement in the opportunity costs and perceived effectiveness metrics.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>While LLMs fine-tuned specifically on mental health domain data display better performance based on automatic evaluation scores, expert assessments indicate that these models are not yet reliable for clinical application. Further refinement and validation are necessary before their implementation in practice.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>mental health</kwd>
        <kwd>counseling summarization</kwd>
        <kwd>large language models</kwd>
        <kwd>digital health</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>AI</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Counseling refers to a relationship between a professional counselor and individuals, families, or other groups that empowers the clients to achieve mental health, wellness, education, and career goals. Specifically, in individuals with psychological or interpersonal difficulties, mental health counseling may be seen as a key helping intervention. Counseling sessions embrace a client-centered approach, fostering an environment of trust and exploration. These sessions delve deep into personal experiences, where clients share intimate details while therapists navigate the dialogue to cultivate a safe and supportive space for healing. Discussions within these sessions span a wide range of topics, from recent life events to profound introspections, all of which contribute to the therapeutic journey. An important aspect of the counseling process lies in the documentation of counseling notes (summary of the entire session), which is essential for summarizing client stressors and therapy principles. Session notes are pivotal in tracking progress and in guiding future sessions. However, capturing the intricacies of these conversations poses a formidable challenge, demanding training, expertise, and experience of mental health professionals. These summaries distill key insights, including symptom and history (SH) exploration, patient discovery (PD), and reflection, while filtering out nonessential details. However, the need for meticulous recordkeeping can sometimes detract from the primary focus of therapy. Maintaining a seamless flow of conversation is paramount in effective therapy, where any disruption can impede progress. To streamline this process and ensure continuity, automation emerges as a promising solution for the counseling summarization task. While advances in artificial intelligence (AI) have revolutionized document summarization, the application of these technologies to mental health counseling remains relatively unexplored.</p>
        <p>Previous studies [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>] have recognized the potential of counseling summarization in optimizing therapeutic outcomes. However, existing models often overlook the unique nuances inherent in mental health interactions. Standard counseling dialogues, using reflective listening, involve identifying current issues; developing a biopsychosocial conceptualization, including past traumas and coping strategies; and chalking out treatment plans. The counseling dialogues also include discussion on between-session issues as well as crises, if any. An effective counseling summary should selectively capture information pertinent to each of these categories while eliminating extraneous details.</p>
        <p>Despite the demonstrated capabilities of large language models (LLMs) in various domains, research in mental health counseling summarization is scarce. One major obstacle is the lack of specialized data sets tailored to counseling contexts. To bridge this gap, we embarked on a two-pronged approach: (1) creating a novel counseling-component–guided summarization data set, called Mental Health Counseling-Component–Guided Dialogue Summaries (MentalCLOUDS); and (2) evaluating state-of-the-art LLMs on the task of counseling-component–guided summarization. Through these efforts, we aim to propel the integration of AI technologies into mental health practice, ultimately enhancing the quality and accessibility of therapeutic interventions.</p>
      </sec>
      <sec>
        <title>Related Work</title>
        <sec>
          <title>Overview</title>
          <p>Summarizing counseling conversations enhances session continuity and facilitates the development of comprehensive therapy plans. However, analyzing these interactions manually is an arduous task. To address this challenge, advances in AI and natural language processing, particularly in summarization techniques, offer a promising solution. Summarization tasks can be approached via an extractive [<xref ref-type="bibr" rid="ref4">4</xref>] or an abstractive [<xref ref-type="bibr" rid="ref5">5</xref>] viewpoint. Extractive summarization involves identifying the most relevant sentences from an article and systematically organizing them. Given the simplicity of the approach, the resultant extractive summaries are often less fluent. By contrast, abstractive summarization extracts important aspects of a text and generates more coherent summaries. By using summarization, therapists can access recaps of sessions, sparing them the need to sift through lengthy dialogues. While summarization has been a long-studied problem in natural language processing [<xref ref-type="bibr" rid="ref6">6</xref>], recent attention has shifted toward aspect-based summarization, a method that focuses on generating summaries pivoted on specific points of interest within documents.</p>
          <p>Chen and Verma [<xref ref-type="bibr" rid="ref1">1</xref>] proposed a retrieval-based medical document summarization approach in which the user query is fine-tuned using a medical ontology, but their method is limited due to its overall primitive design. Konovalov et al [<xref ref-type="bibr" rid="ref7">7</xref>] highlight the importance of identifying emotional reactions and “early counseling” components. Strauss et al [<xref ref-type="bibr" rid="ref8">8</xref>] used machine learning approaches to automate the analysis of clinical forms, and they envision using machine learning in mental health to a certain extent. Furthermore, research on major depressive disorder [<xref ref-type="bibr" rid="ref9">9</xref>] underscores the significance of identifying crucial indicators from patient conversations, such as age, anxiety levels, and long episode duration, in the choice of the appropriate level of antidepressant medication, guiding subsequent sessions and prescriptions. Subsequently, the effectiveness of the prescribed antidepressants is monitored to assess the patient’s response.</p>
          <p>This concept identifies crucial indicators from the patient’s conversations with the therapist and guides subsequent follow-up sessions based on the patient’s history of interactions and prescriptions. Deep learning approaches, such as the use of recurrent neural networks and long short-term memory, have been used to predict 13 predefined mental illnesses based on neuropsychiatric notes that contain 300 words each, on average, about the patient’s present illness and events associated with it, followed by a psychiatric review system that mentions the mental illness related to the patient [<xref ref-type="bibr" rid="ref10">10</xref>]. Chen et al [<xref ref-type="bibr" rid="ref11">11</xref>] proposed an extractive summarization approach using the Bidirectional Encoder Representations from Transformers (BERT) model [<xref ref-type="bibr" rid="ref12">12</xref>] to reduce physicians’ efforts in analyzing tedious amounts of diagnosis reports. However, there remains a notable gap in effectively capturing medical information in session summaries.</p>
          <p>In addition, some contemporary works used authentic mental health records to create synthetic data sets [<xref ref-type="bibr" rid="ref13">13</xref>]. Afzal et al [<xref ref-type="bibr" rid="ref14">14</xref>] reported the summarization of medical documents to identify PICO (Population, Intervention, Comparison, and Outcomes) elements. Manas et al [<xref ref-type="bibr" rid="ref15">15</xref>] proposed an unsupervised abstractive summarization in which domain knowledge from the Patient Health Questionnaire-9 was used to build knowledge graphs to filter relevant utterances. A 2-step summarization was devised by Zhang et al [<xref ref-type="bibr" rid="ref16">16</xref>] wherein partial summaries were initially consolidated, and the final summary was generated by fusing these chunks. Furthermore, Zafari and Zulkernine [<xref ref-type="bibr" rid="ref17">17</xref>] demonstrated a web-based application built using information extraction and annotation tailored to the medical domain.</p>
          <p>For dialogue summarization, abstractive summarization has been the de facto standard due to its ability to capture critical points coherently. Nallapati et al [<xref ref-type="bibr" rid="ref18">18</xref>] used an encoder-decoder–based abstractive summarization method, which was further improved via the attention mechanism [<xref ref-type="bibr" rid="ref19">19</xref>]. Subsequently, See et al [<xref ref-type="bibr" rid="ref20">20</xref>] introduced a hybrid approach of extractive and abstractive summarization. Chen and Bansal [<xref ref-type="bibr" rid="ref2">2</xref>] proposed a reinforcement learning-based approach as a mixture of extractive and abstractive approaches for summarization wherein emphasis is given to redundancy reduction in the utterances extracted from the conversation. Recent research reveals the dependence of specific utterances in the extraction of salient sentences from the conversation utterances. In this regard, Narayan et al [<xref ref-type="bibr" rid="ref3">3</xref>] analyzed topic distribution based on latent Dirichlet allocation [<xref ref-type="bibr" rid="ref21">21</xref>]. Subsequently, Song et al [<xref ref-type="bibr" rid="ref22">22</xref>] segregated utterances into 3 labels: problem description, diagnosis, and other. In medical counseling, Quiroz et al [<xref ref-type="bibr" rid="ref23">23</xref>] and Krishna et al [<xref ref-type="bibr" rid="ref24">24</xref>] adopted the method of selecting significant utterances for summarizing medical conversations.</p>
          <p>In aspect-based summarization, instead of an overall summary of the entire document, summaries at different aspect levels are made based on specific points of interest. These aspects could be movie reviews [<xref ref-type="bibr" rid="ref25">25</xref>-<xref ref-type="bibr" rid="ref28">28</xref>] or summarization guided by different domains [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>] where the documents or the segments of the documents are tagged with these aspects. Hayashi et al [<xref ref-type="bibr" rid="ref31">31</xref>] released a benchmarking data set on multidomain aspect-based summarization where they annotated 20 different domains as aspects using the section titles and boundaries of each article chosen from Wikipedia. Frermann et al [<xref ref-type="bibr" rid="ref29">29</xref>] reported an aspect-based summarization of the news domain. Their method can segment documents by aspect, and the model can generalize from the synthetic data to natural documents. The study further revealed the models’ efficacy in summarizing long documents. Recently, aspect-based summarization has garnered considerable traction; however, the data set is limited. Yang et al [<xref ref-type="bibr" rid="ref32">32</xref>] released a large-scale, high-quality data set on aspect-based summarization from Wikipedia. The data set contains approximately 3.7 million instances covering approximately 1 million aspects sourced from 2 million Wikipedia pages. Apart from releasing the data set, the authors also benchmarked it on the Longformer-Encoder-Decoder [<xref ref-type="bibr" rid="ref33">33</xref>] model where they performed zero-shot, few-shot, and fine-tuning on 7 downstream domains where data are scarce. Joshi et al [<xref ref-type="bibr" rid="ref34">34</xref>] address the general summarization of medical dialogues. They proposed combining extractive and abstractive methods that leverage the independent and distinctive local structures formed during a patient’s medical history compilation. Liu et al [<xref ref-type="bibr" rid="ref35">35</xref>] reported a topic-based summarization of general medical domains pertaining to topics such as swelling, headache, chest pain, and dizziness. Their encoder-decoder model tries to generate 1 symptom (topic) at a time. Besides, work on formalizing the conversation text has been reported in the study by Kazi and Kahanda [<xref ref-type="bibr" rid="ref36">36</xref>]. This work treats the formalization of the case notes from digital transcripts of physician-patient conversations as a summarization task. The method involves 2 steps: prediction of the electronic health record categories and formal text generation. Gundogdu et al [<xref ref-type="bibr" rid="ref37">37</xref>] used a BERT-based sequence-to-sequence model for summarizing clinical radiology reports. The experimental results indicated that at least 76% of their summary generations were as accurate as those generated by radiologists. There is also a report on topic-guided dialogue summarization for clinical physician-patient conversations [<xref ref-type="bibr" rid="ref38">38</xref>]. The approach first learns the topic structure of the dialogues and uses these topics to generate the summaries in the desired format (eg, the subjective, objective, assessment, and plan format). Zhang et al [<xref ref-type="bibr" rid="ref39">39</xref>] proposed a method for factually consistent summarization of clinical dialogues. This method involves extracting factual statements and encoding them into the dialogue. In addition, a dialogue segmenter is trained to segment the dialogues based on topic switching, which enhances the model’s overall discourse awareness. Chintagunta et al [<xref ref-type="bibr" rid="ref40">40</xref>] used GPT-3 [<xref ref-type="bibr" rid="ref41">41</xref>] to generate training examples for medical dialogue summarization tasks. Recently, there have been reports of LLMs being used in medical dialogue summarization to expedite diagnosis by focusing on relevant medical facts, thereby reducing screening time [<xref ref-type="bibr" rid="ref42">42</xref>]. The authors conducted benchmarking on GPT-3.5, Bidirectional and Auto-Regressive Transformer (BART) [<xref ref-type="bibr" rid="ref43">43</xref>], and BERT for Summarization [<xref ref-type="bibr" rid="ref44">44</xref>]. The study indicated that GPT-3.5 generated more accurate and human-aligned responses than the other 2 models. Another study [<xref ref-type="bibr" rid="ref45">45</xref>] demonstrated the effectiveness of LLMs in clinical text summarization across 4 different tasks: physician-patient dialogue, radiology reports, patient questions, and progress notes. The quantitative analysis revealed that the summaries generated by the adapted LLMs were comparable, or even superior, in quality to those of the human experts in terms of conciseness, correctness, and completeness. Singh et al [<xref ref-type="bibr" rid="ref46">46</xref>] used open-source LLMs to extract and summarize suicide ideation indicators from social media texts to expedite mental health interventions.</p>
        </sec>
        <sec>
          <title>Opportunities</title>
          <p>The aforementioned previous works either did not focus on aspect-based summarization or reported on general clinical discussions of common symptoms and conditions (eg, cough, cold, and fever). However, there are still avenues to be explored in the aspect-based summarization of mental health therapy conversations, considering that mental health is a pressing global issue requiring urgent consideration. These therapy conversations encompass several counseling components, including patient information, past symptoms, diagnosis history, reflection, and the therapist’s action plans. Focusing the summaries on these counseling components would facilitate targeted and focused summaries, significantly reducing the time and effort and leading to more effective therapy overall. In this direction, our work is motivated by the study conducted by Srivastava et al [<xref ref-type="bibr" rid="ref47">47</xref>], which reported on a summarization-based counseling technique from therapist-client conversations. They released a conversation data set that is structured with the core components of psychotherapy about SH identification or the discovery of the patient’s behavior. The authors proposed an encoder-decoder model based on Text-to-Text Transfer Transformer (T5) [<xref ref-type="bibr" rid="ref48">48</xref>] for their counseling-component–guided summarization model. However, a single, generic summary is generated in the work, and no focus is given to generating aspect-based summaries. Consequently, we extended the work by using the counseling components, namely SH exploration, PD, and reflection, into an aspect-based summarization framework. To this end, we created MentalCLOUDS, a data set that incorporates summaries aligned with the distinct counseling components. We also explored the efficacy of the state-of-the-art LLMs (encoder-decoder as well as decoder-only models) for the summarization of counseling dialogues in this work.</p>
        </sec>
        <sec>
          <title>Taxonomy</title>
          <p>On the basis of the survey of related works on summarization in the medical domain in general and in mental health in particular, we present a taxonomy of task formulations for summarization tasks in the medical domain (<xref rid="figure1" ref-type="fig">Figure 1</xref> [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref49">49</xref>-<xref ref-type="bibr" rid="ref68">68</xref>]). In general, medical text summarization is divided into research articles [<xref ref-type="bibr" rid="ref49">49</xref>-<xref ref-type="bibr" rid="ref52">52</xref>], reports, patient health questions, electronic health records, and dialogue summarization. Report summarization encompasses the summarization of reports, such as impressions or summarizations of radiology findings [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref53">53</xref>-<xref ref-type="bibr" rid="ref55">55</xref>]. Patient health question summarization involves summarizing informal, nontechnical, and lengthy patient questions into technically sound and concise ones [<xref ref-type="bibr" rid="ref56">56</xref>-<xref ref-type="bibr" rid="ref59">59</xref>]. Electronic health record summarization includes the summarization of patient notes such as clinical progress notes [<xref ref-type="bibr" rid="ref60">60</xref>-<xref ref-type="bibr" rid="ref63">63</xref>] and discharge notes [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref64">64</xref>-<xref ref-type="bibr" rid="ref66">66</xref>]. Our work focuses on the abstractive dialogue summarization of mental health counseling conversations, specifically targeting the counseling aspects. In addition, the survey includes general medical dialogue summarization [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref45">45</xref>] and mental health dialogue summarization [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref67">67</xref>,<xref ref-type="bibr" rid="ref68">68</xref>]. Of note, this taxonomy does not represent the global scenario but rather provides a comprehensive depiction based on the aforementioned survey.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>Taxonomy of summarization methods in the medical domain.</p>
            </caption>
            <graphic xlink:href="mental_v11i1e57306_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Challenges</title>
          <p>Mental health counseling conversations often involve sensitive and confidential information. There is an expectation of empathetic and reflective responses from the therapist and action plans based on which the therapy is conducted. Generative AI–based counselors are susceptible to generating insensitive or incorrect suggestions and lacking empathy in their responses, which can negatively impact the therapy process. Moreover, the components or aspects of counseling sessions are subjective, and a counseling conversation can have multiple aspects. Therefore, the scope of the aspect-based summarization is limited to the specific annotated aspects. However, annotating these aspects requires expert manual intervention, which is costly both in terms of human resources and the financial perspective.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview of the Proposed Data Set: MentalCLOUDS</title>
        <p>To evaluate the performance of diverse summarization systems across various aspects of counseling interactions, we expanded upon the Mental Health Summarization (MEMO) data set [<xref ref-type="bibr" rid="ref47">47</xref>]. Comprising 11,543 utterances extracted from 191 counseling sessions involving therapists and patients, this data set draws from publicly accessible platforms such as YouTube. Embracing a heterogeneous demographic spectrum with distinctive mental health concerns and diverse therapists, the data set facilitates the formulation of a comprehensive and inclusive approach for researchers. Using preprocessed transcriptions derived from counseling videos, the constituent dialogues within the data set exhibit a dyadic structure, exclusively featuring patients and therapists as interlocutors. Within each conversation, 3 pivotal counseling components (aspects) emerge: SH exploration, PD, and reflective utterances.</p>
        <p>Our study aims to capture the essence of each aforementioned counseling component, embarking on the creation of 3 distinct summaries for a single dialogue, with each summary tailored to a specific counseling component. Expanding upon the MEMO data set, we augmented it with annotated dialogue summaries corresponding to the 3 identified components. Collaborating closely with a team of leading mental health experts (for their details, refer to the Qualitative Assessment by Experts subsection), we crafted annotation guidelines and subjected the summary annotations to rigorous validation processes. We call the resultant data set MentalCLOUDS. We highlight its key statistics in <xref ref-type="table" rid="table1">Table 1</xref> and <xref rid="figure2" ref-type="fig">Figure 2</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Statistics of the Mental Health Counseling-Component–Guided Dialogue Summaries data set.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="110"/>
            <col width="110"/>
            <col width="120"/>
            <col width="110"/>
            <col width="110"/>
            <col width="110"/>
            <col width="110"/>
            <col width="110"/>
            <col width="110"/>
            <thead>
              <tr valign="top">
                <td>Set</td>
                <td>Dialogues (n=191), n (%)</td>
                <td>Utterances (n=11,543), n (%)</td>
                <td>Utterances per dialogue, mean (SD)</td>
                <td>Patient utterances (n=5722), n (%)</td>
                <td>Therapist utterances (n=5814), n (%)</td>
                <td>SH<sup>a</sup> utterances (n=2379), n (%)</td>
                <td>PD<sup>b</sup> utterances (5428), n (%)</td>
                <td>Reflective utterances (n=1242), n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Training</td>
                <td>131 (68.59)</td>
                <td>8342 (72.3)</td>
                <td>63.68 (38.44)</td>
                <td>4124 (72.1)</td>
                <td>4211 (72.4)</td>
                <td>1882 (79.1)</td>
                <td>3826 (70.5)</td>
                <td>884 (71.2)</td>
              </tr>
              <tr valign="top">
                <td>Validation</td>
                <td>21 (10.99)</td>
                <td>1191 (10.3)</td>
                <td>56.71 (27.06)</td>
                <td>594 (10.4)</td>
                <td>597 (10.3)</td>
                <td>206 (8.7)</td>
                <td>445 (8.2)</td>
                <td>146 (11.8)</td>
              </tr>
              <tr valign="top">
                <td>Test</td>
                <td>39 (20.42)</td>
                <td>2010 (17.4)</td>
                <td>51.53 (39.96)</td>
                <td>1004 (17.5)</td>
                <td>1006 (17.3)</td>
                <td>291 (12.2)</td>
                <td>1157 (21.3)</td>
                <td>212 (17.1)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>SH: symptom and history.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>PD: patient discovery.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Distribution of summary lengths in the Mental Health Counseling-Component–Guided Dialogue Summaries (MentalCLOUDS) data set.</p>
          </caption>
          <graphic xlink:href="mental_v11i1e57306_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data Annotation Process</title>
        <sec>
          <title>Guidelines</title>
          <p>Conversations in counseling situations can be challenging, given the sensitive nature of the information shared. A therapist’s reflective and open attitude can facilitate this expression. This dynamic is reinforced by the proposed MentalCLOUDS data set. This data set distinguishes the utterances dedicated to symptom exploration, discovering the history of mental health issues and patient behavior, as well as providing insights into past narratives, thereby shaping the patient’s present circumstances. These nuanced elements form the core of our identified counseling components. To improve the richness of the data set, we collaborated with mental health experts to formulate a set of annotation guidelines [<xref ref-type="bibr" rid="ref69">69</xref>]. Furthermore, these guidelines serve as a comprehensive framework by which annotators can focus their attention on particular aspects of the conversation that are essential for producing summaries that are customized for each counseling component. By adhering to these guidelines, the therapeutic techniques are captured in the annotations. This ensures that the resulting summaries are concise yet rich in informative content for the specific component.</p>
        </sec>
        <sec>
          <title>Psychotherapy Elements</title>
          <p>Within the realm of mental health therapy sessions, distinct counseling components play a pivotal role in facilitating successful interventions. The MentalCLOUDS data set serves as a valuable resource, furnishing meticulously labeled utterances that encompass 3 fine-grained components [<xref ref-type="bibr" rid="ref47">47</xref>]:</p>
          <list list-type="bullet">
            <list-item>
              <p>SH: this facet encapsulates utterances teeming with insightful information crucial for the therapist’s nuanced assessment of the patient’s situation.</p>
            </list-item>
            <list-item>
              <p>PD: patients entering counseling sessions often bring intricate thoughts to the fore. Therapists, in turn, endeavor to establish therapeutic connections, creating a conducive environment for patients to articulate and unravel their thoughts. Such utterances by the therapist that encourage patients to reveal their concerns lie in this category.</p>
            </list-item>
            <list-item>
              <p>Reflecting: therapists use concise utterances, allowing ample space for patients to share their life stories and events. Encouraging patient narratives, therapists may also use hypothetical scenarios to evaluate actions and enhance understanding.</p>
            </list-item>
          </list>
          <p>When crafting a summary for a dialogue <italic>D</italic>, aligned with a specific counseling component <italic>C</italic>, our primary focus rests on utterances marked with <italic>C</italic> within <italic>D</italic> in the MEMO data set. Consequently, we derived 3 distinct counseling summaries for each counseling component within a single session to create the MentalCLOUDS data set. <xref ref-type="table" rid="table1">Table 1</xref> shows the data statistics, where a balanced distribution of patient and therapist utterances within the data set is evident. Notably, PD emerges as the prevailing label in the data set, highlighting patients’ inclination to discuss ancillary topics rather than focusing solely on their mental health concerns when prompted to share their experiences. By contrast, reflecting emerges as the least tagged label in this comprehensive analysis.</p>
        </sec>
      </sec>
      <sec>
        <title>Benchmarking</title>
        <p>In recent years, the spotlight on LLMs has intensified, captivated by their extraordinary performance across diverse applications. From classification tasks such as emotion recognition [<xref ref-type="bibr" rid="ref70">70</xref>] to generative problems such as response generation [<xref ref-type="bibr" rid="ref71">71</xref>], these models have proven their versatility. In this paper, our focus is directed toward evaluating their capability in the domain of counseling summarization, specifically using MentalCLOUDS. In our comprehensive analysis, we leveraged 11 state-of-the-art pretrained LLM architectures, including a mix of general-purpose and specialized models. These models are considered to carefully assess their performance concerning each facet of the counseling-component summaries. We explain each of these systems in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>.</p>
        <p>This is to highlight that all baseline models are transformer based, and computational complexities associated with the transformer-based architectures while being trained or fine-tuned involve a computational cost of <italic>O</italic>(<italic>L</italic> × <italic>N</italic><sup>2</sup> × <italic>D</italic>), where <italic>N</italic> represents the sequence length, <italic>D</italic> denotes the hidden dimension, and <italic>L</italic> signifies the number of transform layers. As we maintain a constant number of layers across all training steps, the computational complexity simplifies to <italic>O</italic>(<italic>N</italic><sup>2</sup> × <italic>D</italic>).</p>
        <p>Moreover, our selection of benchmarked models comprises both small language models (SLMs), such as BART, T5, the GPT family, Phi-2, and MentalBART, as well as LLMs such as Flan-T5, Mistral, Llama-2, and MentalLlama. SLMs typically operate within the parameter range of 300 million to 2 billion, whereas LLMs are characterized by a higher parameter count, ranging from 7 billion to 9 billion (as kept in our study). In addition to analyzing the models’ complexity for a better understanding of their applicability, another crucial metric to consider is the model’s runtime. LLMs tend to consume more runtime due to their larger parameter count, while SLMs run quickly but may compromise accuracy. A comprehensive analysis of the models’ runtime is provided in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
        <boxed-text id="box1" position="float">
          <title>Description of the 11 models evaluated.</title>
          <list list-type="bullet">
            <list-item>
              <p>Bidirectional and Auto-Regressive Transformer (BART) [<xref ref-type="bibr" rid="ref43">43</xref>]: this is a sequence-to-sequence model designed for various natural language processing (NLP) tasks, including text summarization. It uses a transformer architecture with an encoder-decoder structure. It incorporates a denoising autoencoder objective during pretraining, reconstructing the original input from corrupted versions. We used the pretrained base version of the model in our experiments.</p>
            </list-item>
            <list-item>
              <p>Text-To-Text Transfer Transformer (T5) [<xref ref-type="bibr" rid="ref48">48</xref>]: this is a versatile transformer-based model consisting of an encoder-decoder framework with bidirectional transformers. It reframes all NLP tasks as text-to-text tasks, providing a unified approach. T5 learns representations by denoising corrupted input-output pairs. Its encoder captures contextual information while the decoder generates target sequences. The pretrained base version of T5 was used in our experiments.</p>
            </list-item>
            <list-item>
              <p>GPT-2 [<xref ref-type="bibr" rid="ref72">72</xref>]: this is a transformer-based language model that comprises a stack of identical layers, each with a multihead self-attention mechanism and position-wise fully connected feed-forward networks. GPT-2 follows an autoregressive training approach, predicting the next token in a sequence given its context.</p>
            </list-item>
            <list-item>
              <p>GPT-Neo [<xref ref-type="bibr" rid="ref73">73</xref>]: trained from the Pile data set [<xref ref-type="bibr" rid="ref74">74</xref>], GPT-Neo exhibits a similar architecture as GPT-2 except for a few modifications, such as the use of local attention in every other layer with a window size of 256 tokens. In addition, GPT-Neo houses a combination of linear attention [<xref ref-type="bibr" rid="ref75">75</xref>], a mixture of experts [<xref ref-type="bibr" rid="ref76">76</xref>], and axial positional embedding [<xref ref-type="bibr" rid="ref77">77</xref>] to achieve performance comparable to that of larger LLMs, such as GPT-3.</p>
            </list-item>
            <list-item>
              <p>GPT-J [<xref ref-type="bibr" rid="ref78">78</xref>]: this is a transformer model trained using the methodology proposed by Wang [<xref ref-type="bibr" rid="ref78">78</xref>]. It is a GPT-2–like causal language model trained on the Pile data set.</p>
            </list-item>
            <list-item>
              <p>FLAN-T5 [<xref ref-type="bibr" rid="ref79">79</xref>]: this is the instruction fine-tuned version of the T5 model with a particular focus on scaling the number of tasks, scaling the model size, and fine-tuning on chain-of-thought data.</p>
            </list-item>
            <list-item>
              <p>Mistral [<xref ref-type="bibr" rid="ref80">80</xref>]: this is a decoder-based LLM with a sliding-window attention mechanism, where it is trained with an 8k context length and fixed cache size, with a theoretical attention span of 128K tokens. Faster inference and lower cache are ensured by using grouped query attention [<xref ref-type="bibr" rid="ref81">81</xref>].</p>
            </list-item>
            <list-item>
              <p>MentalBART [<xref ref-type="bibr" rid="ref82">82</xref>]: this is an open-source LLM constructed for interpretable mental health analysis with instruction-following capability. The model is fine-tuned using the Interpretable Mental Health Instruction (IMHI) data set [<xref ref-type="bibr" rid="ref82">82</xref>] and is expected to make complex mental health analyses for various mental health conditions.</p>
            </list-item>
            <list-item>
              <p>MentalLlama [<xref ref-type="bibr" rid="ref82">82</xref>]: similar to MentalBART, MentalLlama is the counterpart of the Llama architecture but is trained on the IMHI data set. The model is fine-tuned to integrate the capability of an LLM with domain knowledge in mental health.</p>
            </list-item>
            <list-item>
              <p>Llama-2 [<xref ref-type="bibr" rid="ref83">83</xref>]: this is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine tuning [<xref ref-type="bibr" rid="ref84">84</xref>] and reinforcement learning with human feedback [<xref ref-type="bibr" rid="ref85">85</xref>] to align with human preferences for helpfulness and safety. The model is trained exclusively on publicly available data sets.</p>
            </list-item>
            <list-item>
              <p>Phi-2: this is an extension of Phi-1 [<xref ref-type="bibr" rid="ref86">86</xref>]. Phi-1 is a transformer-based frugal LLM with the largest variant having 1.3 billion parameters. It is trained on textbook-quality data. It emphasizes the quality of the data to compensate for its relatively small number of parameters. Phi-2 has 2.7 billion parameters, which shows comparable performances with other larger LLMs despite its smaller size.</p>
            </list-item>
          </list>
        </boxed-text>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Average runtime of models fine-tuned on Mental Health Counseling-Component–Guided Dialogue Summaries (MentalCLOUDS) for summarization tasks across 3 psychotherapy elements: symptom and history, patient discovery, and reflecting.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="190"/>
            <col width="280"/>
            <col width="210"/>
            <col width="320"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>Variant or parameters</td>
                <td>Time (min)</td>
                <td>GPU<sup>a</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>BART<sup>b</sup></td>
                <td>Base</td>
                <td>2.27</td>
                <td>A100</td>
              </tr>
              <tr valign="top">
                <td>T5<sup>c</sup></td>
                <td>Base</td>
                <td>18.81</td>
                <td>A100</td>
              </tr>
              <tr valign="top">
                <td>MentalBART</td>
                <td>Base</td>
                <td>5.94</td>
                <td>A100</td>
              </tr>
              <tr valign="top">
                <td>Flan-T5</td>
                <td>Base</td>
                <td>16.56</td>
                <td>A100</td>
              </tr>
              <tr valign="top">
                <td>GPT-2</td>
                <td>124 million</td>
                <td>6.30</td>
                <td>A100</td>
              </tr>
              <tr valign="top">
                <td>GPT-Neo</td>
                <td>1.3 billion</td>
                <td>32.98</td>
                <td>A100</td>
              </tr>
              <tr valign="top">
                <td>GPT-J</td>
                <td>6 billion</td>
                <td>44.69</td>
                <td>A100</td>
              </tr>
              <tr valign="top">
                <td>MentalLlama</td>
                <td>7</td>
                <td>48.27</td>
                <td>RTX A6000+RTX A5000</td>
              </tr>
              <tr valign="top">
                <td>Mistral</td>
                <td>7 billion</td>
                <td>43.86</td>
                <td>RTX A6000+RTX A5000</td>
              </tr>
              <tr valign="top">
                <td>Phi-2</td>
                <td>2.7 billion</td>
                <td>9.38</td>
                <td>A100</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>GPU: graphics processing unit.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>BART: Bidirectional and Auto-Regressive Transformer.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>T5: Text-To-Text Transfer Transformer.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The study did not involve any human subject research; hence, we did not seek ethics approval.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>We undertook a comprehensive evaluation of the generated session summaries across various architectures, using a dual approach of quantitative and qualitative assessments.</p>
      <sec>
        <title>Quantitative Assessment</title>
        <sec>
          <title>Overview</title>
          <p>This section reports the aspect-based (psychotherapy element–based) summarization results based on the automatic evaluation scores. Given the generative nature of the task, we used standard summarization evaluation metrics such as Recall-Oriented Understudy for Gisting Evaluation (ROUGE)-1, ROUGE-2, ROUGE-L, and BERT Score (BERTScore) along with their corresponding precision, recall, and <italic>F</italic><sub>1</sub>-score values. As the <italic>F</italic><sub>1</sub>-score accounts for precision and recall, we compared the performance of the LLMs based on <italic>F</italic><sub>1</sub>-score values unless stated otherwise. ROUGE [<xref ref-type="bibr" rid="ref87">87</xref>] assesses the overlap of n-grams (sequences of n consecutive words) between the generated summary and reference summaries. Specifically, this metric measures the number of overlapping units such as n-grams, word sequences, and word pairs in the generated summary evaluated against the gold summary typically created by humans. ROUGE favors the candidate summary with more overlaps with reference summaries. This effectively gives more weight to matching n-grams occurring in multiple reference summaries. This work reports the unigram and bigram ROUGE (namely ROUGE-1 and ROUGE-2) and ROUGE-L evaluations. ROUGE-L takes into account the longest co-occurring n-gram between the candidate and reference summaries. BERTScore [<xref ref-type="bibr" rid="ref88">88</xref>] is harnessed to gauge the semantic coherence between the generated summaries and their ground truths. Notably, in the context of counseling summaries, which are inherently tied to a domain-specific conversation, we embarked on a meticulous qualitative examination of the generated summaries for individual counseling components.</p>
        </sec>
        <sec>
          <title>SH Summarization</title>
          <p><xref ref-type="table" rid="table3">Table 3</xref> reports the automatic evaluation scores of the LLMs on the summarization task for the SH psychotherapy element. MentalLlama outperformed the other LLMs across all automatic evaluation metrics. For the ROUGE-1 metric, MentalLlama achieved an <italic>F</italic><sub>1</sub>-score of 30.86, followed by MentalBART with an <italic>F</italic><sub>1</sub>-score of 28.00. In terms of the ROUGE-2 metric, Mistral was comparable to MentalLlama with a difference of just 0.90 in the <italic>F</italic><sub>1</sub>-score values. Similarly, for the ROUGE-L metric, Mistral was preceded by MentalLlama by a difference of 2.93 in the <italic>F</italic><sub>1</sub>-score values.</p>
          <table-wrap position="float" id="table3">
            <label>Table 3</label>
            <caption>
              <p>Results obtained on Mental Health Counseling-Component–Guided Dialogue Summaries (MentalCLOUDS) for the summarization task on the symptom and history psychotherapy element.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="110"/>
              <col width="90"/>
              <col width="70"/>
              <col width="70"/>
              <col width="90"/>
              <col width="60"/>
              <col width="70"/>
              <col width="0"/>
              <col width="90"/>
              <col width="60"/>
              <col width="70"/>
              <col width="90"/>
              <col width="60"/>
              <col width="70"/>
              <thead>
                <tr valign="top">
                  <td>Model</td>
                  <td colspan="3">ROUGE<sup>a</sup>-1</td>
                  <td colspan="4">ROUGE-2</td>
                  <td colspan="3">ROUGE-L</td>
                  <td colspan="3">BERTScore<sup>b</sup></td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Precision</td>
                  <td>Recall</td>
                  <td><italic>F</italic><sub>1</sub>-score</td>
                  <td>Precision</td>
                  <td>Recall</td>
                  <td><italic>F</italic><sub>1</sub>-score</td>
                  <td colspan="2">Precision</td>
                  <td>Recall</td>
                  <td><italic>F</italic><sub>1</sub>-score</td>
                  <td>Precision</td>
                  <td>Recall</td>
                  <td><italic>F</italic><sub>1</sub>-score</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>BART<sup>c</sup></td>
                  <td>12.91</td>
                  <td>28.84</td>
                  <td>16.26</td>
                  <td>1.88</td>
                  <td>5.07</td>
                  <td>2.47</td>
                  <td colspan="2">10.21</td>
                  <td>23.97</td>
                  <td>13.19</td>
                  <td>85.81</td>
                  <td>85.81</td>
                  <td>85.81</td>
                </tr>
                <tr valign="top">
                  <td>T5<sup>d</sup></td>
                  <td>22.16</td>
                  <td>19.81</td>
                  <td>19.74</td>
                  <td>2.18</td>
                  <td>1.78</td>
                  <td>1.85</td>
                  <td colspan="2">16.12</td>
                  <td>14.51</td>
                  <td>14.36</td>
                  <td>85.38</td>
                  <td>85.38</td>
                  <td>85.38</td>
                </tr>
                <tr valign="top">
                  <td>MentalBART</td>
                  <td>30.31</td>
                  <td>29.02</td>
                  <td>28.00</td>
                  <td>6.06</td>
                  <td>5.29</td>
                  <td>5.46</td>
                  <td colspan="2">20.85</td>
                  <td>20.34</td>
                  <td>19.40</td>
                  <td>88.34</td>
                  <td>88.34</td>
                  <td>88.34</td>
                </tr>
                <tr valign="top">
                  <td>Flan-T5</td>
                  <td>21.45</td>
                  <td>
                    <italic>33.15</italic>
                    <sup>e</sup>
                  </td>
                  <td>24.80</td>
                  <td>3.84</td>
                  <td>6.08</td>
                  <td>4.54</td>
                  <td colspan="2">17.15</td>
                  <td>26.53</td>
                  <td>19.76</td>
                  <td>86.94</td>
                  <td>86.94</td>
                  <td>86.94</td>
                </tr>
                <tr valign="top">
                  <td>GPT-2</td>
                  <td>6.59</td>
                  <td>14.62</td>
                  <td>8.91</td>
                  <td>1.06</td>
                  <td>2.34</td>
                  <td>1.42</td>
                  <td colspan="2">5.12</td>
                  <td>11.37</td>
                  <td>6.93</td>
                  <td>83.65</td>
                  <td>83.65</td>
                  <td>83.65</td>
                </tr>
                <tr valign="top">
                  <td>GPT-Neo</td>
                  <td>9.97</td>
                  <td>19.91</td>
                  <td>13.01</td>
                  <td>1.01</td>
                  <td>2.30</td>
                  <td>1.38</td>
                  <td colspan="2">7.89</td>
                  <td>15.91</td>
                  <td>10.33</td>
                  <td>83.12</td>
                  <td>83.12</td>
                  <td>83.12</td>
                </tr>
                <tr valign="top">
                  <td>GPT-J</td>
                  <td>13.22</td>
                  <td>29.99</td>
                  <td>17.88</td>
                  <td>3.37</td>
                  <td>
                    <italic>7.96</italic>
                  </td>
                  <td>4.59</td>
                  <td colspan="2">10.71</td>
                  <td>24.34</td>
                  <td>14.47</td>
                  <td>86.28</td>
                  <td>86.28</td>
                  <td>86.28</td>
                </tr>
                <tr valign="top">
                  <td>MentalLlama</td>
                  <td>
                    <italic>33.03</italic>
                  </td>
                  <td>32.79</td>
                  <td>
                    <italic>30.86</italic>
                  </td>
                  <td>
                    <italic>8.66</italic>
                  </td>
                  <td>6.50</td>
                  <td>
                    <italic>7.28</italic>
                  </td>
                  <td colspan="2">
                    <italic>27.73</italic>
                  </td>
                  <td>
                    <italic>27.30</italic>
                  </td>
                  <td>
                    <italic>29.55</italic>
                  </td>
                  <td>
                    <italic>89.40</italic>
                  </td>
                  <td>
                    <italic>90.99</italic>
                  </td>
                  <td>
                    <italic>90.99</italic>
                  </td>
                </tr>
                <tr valign="top">
                  <td>Mistral</td>
                  <td>29.07</td>
                  <td>26.56</td>
                  <td>25.41</td>
                  <td>7.03</td>
                  <td>5.20</td>
                  <td>7.19</td>
                  <td colspan="2">25.45</td>
                  <td>25.61</td>
                  <td>26.62</td>
                  <td>83.42</td>
                  <td>85.96</td>
                  <td>83.05</td>
                </tr>
                <tr valign="top">
                  <td>Llama-2</td>
                  <td>28.49</td>
                  <td>24.17</td>
                  <td>23.47</td>
                  <td>6.40</td>
                  <td>4.68</td>
                  <td>6.63</td>
                  <td colspan="2">22.7</td>
                  <td>23.04</td>
                  <td>23.66</td>
                  <td>82.86</td>
                  <td>83.80</td>
                  <td>81.62</td>
                </tr>
                <tr valign="top">
                  <td>Phi-2</td>
                  <td>21.23</td>
                  <td>10.42</td>
                  <td>13.81</td>
                  <td>1.89</td>
                  <td>1.43</td>
                  <td>1.78</td>
                  <td colspan="2">14.56</td>
                  <td>9.19</td>
                  <td>11.26</td>
                  <td>84.25</td>
                  <td>82.00</td>
                  <td>83.11</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table3fn1">
                <p><sup>a</sup>ROUGE: Recall-Oriented Understudy for Gisting Evaluation.</p>
              </fn>
              <fn id="table3fn2">
                <p><sup>b</sup>BERTScore: Bidirectional Encoder Representations from Transformers Score.</p>
              </fn>
              <fn id="table3fn3">
                <p><sup>c</sup>BART: Bidirectional and Auto-Regressive Transformer.</p>
              </fn>
              <fn id="table3fn4">
                <p><sup>d</sup>T5: Text-To-Text Transfer Transformer.</p>
              </fn>
              <fn id="table3fn5">
                <p><sup>e</sup>The best results are italicized.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>PD Summarization</title>
          <p>The experimental results presented in <xref ref-type="table" rid="table4">Table 4</xref> focus on the summarization task for the PD psychotherapy element. Considering the ROUGE-1 metric, MentalLlama demonstrated superior performance compared to the other LLMs. MentalLlama achieved an <italic>F</italic><sub>1</sub>-score of 30.95, followed by MentalBART (with an <italic>F</italic><sub>1</sub>-score of 29.94). For the ROUGE-2 metric, GPT-J outperformed the other models, followed by MentalLlama. In addition, in terms of the ROUGE-L metric, the top 2 models with the highest <italic>F</italic><sub>1</sub>-score values were F1 score models were MentalLlama and Mistral. Finally, MentalBART superseded the other models with an <italic>F</italic><sub>1</sub>-score of 88.61 with respect to the BERTScore metric. Overall, the scores indicate that LLMs such as MentalLlama and MentalBART, which were pretrained on the mental domain data, show consistent superiority. Notably, the base Mistral model also performed comparably to, and sometimes better than, the models trained on the mental health domain data.</p>
          <table-wrap position="float" id="table4">
            <label>Table 4</label>
            <caption>
              <p>Results obtained on Mental Health Counseling-Component–Guided Dialogue Summaries (MentalCLOUDS) for the summarization task on the patient discovery psychotherapy element.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="120"/>
              <col width="90"/>
              <col width="60"/>
              <col width="70"/>
              <col width="0"/>
              <col width="90"/>
              <col width="60"/>
              <col width="70"/>
              <col width="0"/>
              <col width="90"/>
              <col width="60"/>
              <col width="70"/>
              <col width="0"/>
              <col width="90"/>
              <col width="60"/>
              <col width="70"/>
              <thead>
                <tr valign="top">
                  <td>Model</td>
                  <td colspan="4">ROUGE<sup>a</sup>-1</td>
                  <td colspan="4">ROUGE-2</td>
                  <td colspan="4">ROUGE-L</td>
                  <td colspan="3">BERTScore<sup>b</sup></td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Precision</td>
                  <td>Recall</td>
                  <td><italic>F</italic><sub>1</sub>-score</td>
                  <td colspan="2">Precision</td>
                  <td>Recall</td>
                  <td><italic>F</italic><sub>1</sub>-score</td>
                  <td colspan="2">Precision</td>
                  <td>Recall</td>
                  <td><italic>F</italic><sub>1</sub>-score</td>
                  <td colspan="2">Precision</td>
                  <td>Recall</td>
                  <td><italic>F</italic><sub>1</sub>-score</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>BART<sup>c</sup></td>
                  <td>20.82</td>
                  <td>43.24</td>
                  <td>26.72</td>
                  <td colspan="2">5.97</td>
                  <td>12.93</td>
                  <td>7.74</td>
                  <td colspan="2">16.38</td>
                  <td>34.82</td>
                  <td>21.14</td>
                  <td colspan="2">87.35</td>
                  <td>87.35</td>
                  <td>87.35</td>
                </tr>
                <tr valign="top">
                  <td>T5<sup>d</sup></td>
                  <td>9.43</td>
                  <td>47.29</td>
                  <td>15.34</td>
                  <td colspan="2">3.03</td>
                  <td>16.90</td>
                  <td>5.01</td>
                  <td colspan="2">8.39</td>
                  <td>42.58</td>
                  <td>13.67</td>
                  <td colspan="2">84.77</td>
                  <td>84.77</td>
                  <td>84.77</td>
                </tr>
                <tr valign="top">
                  <td>MentalBART</td>
                  <td>33.51</td>
                  <td>29.94</td>
                  <td>29.94</td>
                  <td colspan="2">9.36</td>
                  <td>7.94</td>
                  <td>8.06</td>
                  <td colspan="2">23.39</td>
                  <td>21.44</td>
                  <td>21.10</td>
                  <td colspan="2">
                    <italic>88.61</italic>
                    <sup>e</sup>
                  </td>
                  <td>88.61</td>
                  <td>
                    <italic>88.61</italic>
                  </td>
                </tr>
                <tr valign="top">
                  <td>Flan-T5</td>
                  <td>21.08</td>
                  <td>35.61</td>
                  <td>24.44</td>
                  <td colspan="2">4.81</td>
                  <td>8.89</td>
                  <td>5.63</td>
                  <td colspan="2">16.13</td>
                  <td>28.29</td>
                  <td>18.94</td>
                  <td colspan="2">86.52</td>
                  <td>86.52</td>
                  <td>86.52</td>
                </tr>
                <tr valign="top">
                  <td>GPT-2</td>
                  <td>13.66</td>
                  <td>36.24</td>
                  <td>19.57</td>
                  <td colspan="2">4.08</td>
                  <td>11.27</td>
                  <td>5.94</td>
                  <td colspan="2">10.93</td>
                  <td>29.42</td>
                  <td>15.70</td>
                  <td colspan="2">85.21</td>
                  <td>85.21</td>
                  <td>85.21</td>
                </tr>
                <tr valign="top">
                  <td>GPT-Neo</td>
                  <td>12.96</td>
                  <td>29.93</td>
                  <td>17.83</td>
                  <td colspan="2">2.32</td>
                  <td>5.44</td>
                  <td>3.22</td>
                  <td colspan="2">9.84</td>
                  <td>23.10</td>
                  <td>13.60</td>
                  <td colspan="2">82.72</td>
                  <td>82.72</td>
                  <td>82.72</td>
                </tr>
                <tr valign="top">
                  <td>GPT-J</td>
                  <td>19.78</td>
                  <td>
                    <italic>53.33</italic>
                  </td>
                  <td>28.85</td>
                  <td colspan="2">
                    <italic>12.68</italic>
                  </td>
                  <td>
                    <italic>35.71</italic>
                  </td>
                  <td>
                    <italic>18.71</italic>
                  </td>
                  <td colspan="2">16.12</td>
                  <td>
                    <italic>43.33</italic>
                  </td>
                  <td>23.49</td>
                  <td colspan="2">86.43</td>
                  <td>86.43</td>
                  <td>86.43</td>
                </tr>
                <tr valign="top">
                  <td>MentalLlama</td>
                  <td>
                    <italic>24.56</italic>
                  </td>
                  <td>43.84</td>
                  <td>
                    <italic>30.95</italic>
                  </td>
                  <td colspan="2">9.55</td>
                  <td>26.01</td>
                  <td>12.79</td>
                  <td colspan="2">
                    <italic>23.77</italic>
                  </td>
                  <td>38.98</td>
                  <td>
                    <italic>29.17</italic>
                  </td>
                  <td colspan="2">84.63</td>
                  <td>
                    <italic>88.95</italic>
                  </td>
                  <td>86.68</td>
                </tr>
                <tr valign="top">
                  <td>Mistral</td>
                  <td>22.84</td>
                  <td>39.02</td>
                  <td>27.54</td>
                  <td colspan="2">8.78</td>
                  <td>25.79</td>
                  <td>11.35</td>
                  <td colspan="2">21.90</td>
                  <td>35.98</td>
                  <td>24.02</td>
                  <td colspan="2">86.62</td>
                  <td>87.28</td>
                  <td>84.49</td>
                </tr>
                <tr valign="top">
                  <td>Llama-2</td>
                  <td>20.22</td>
                  <td>34.7</td>
                  <td>26.1</td>
                  <td colspan="2">8.41</td>
                  <td>21.13</td>
                  <td>10.39</td>
                  <td colspan="2">14.73</td>
                  <td>21.44</td>
                  <td>17.79</td>
                  <td colspan="2">78.81</td>
                  <td>88.06</td>
                  <td>81.48</td>
                </tr>
                <tr valign="top">
                  <td>Phi-2</td>
                  <td>18.72</td>
                  <td>9.23</td>
                  <td>12.45</td>
                  <td colspan="2">5.61</td>
                  <td>4.44</td>
                  <td>4.96</td>
                  <td colspan="2">13.94</td>
                  <td>8.73</td>
                  <td>10.98</td>
                  <td colspan="2">84.25</td>
                  <td>82.00</td>
                  <td>80.05</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table4fn1">
                <p><sup>a</sup>ROUGE: Recall-Oriented Understudy for Gisting Evaluation.</p>
              </fn>
              <fn id="table4fn2">
                <p><sup>b</sup>BERTScore: Bidirectional Encoder Representations from Transformers Score.</p>
              </fn>
              <fn id="table4fn3">
                <p><sup>c</sup>BART: Bidirectional and Auto-Regressive Transformer.</p>
              </fn>
              <fn id="table4fn4">
                <p><sup>d</sup>T5: Text-To-Text Transfer Transformer.</p>
              </fn>
              <fn id="table4fn5">
                <p><sup>e</sup>The best results are italicized.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Reflecting</title>
          <p><xref ref-type="table" rid="table5">Table 5</xref> reports the automatic evaluation scores on the summarization task for the reflecting psychotherapy element. In terms of the ROUGE-1 metric, MentalLlama and Mistral were the best 2 models, with <italic>F</italic><sub>1</sub>-score values of 39.52 and 38.33, respectively. Similarly, MentalLlama demonstrated its superiority over the other LLMs in terms of the ROUGE-2, ROUGE-L and BERTScore metrics. Moreover, the scores of the summarization tasks for this psychotherapy element were analogous to those of the previous 2 summarization tasks, namely SH and PD, wherein the mental health–specific LLMs exhibited their superiority over the other LLMs.</p>
          <table-wrap position="float" id="table5">
            <label>Table 5</label>
            <caption>
              <p>Results obtained on Mental Health Counseling-Component–Guided Dialogue Summaries (MentalCLOUDS) for the summarization task on the reflecting psychotherapy element.</p>
            </caption>
            <table border="1" rules="groups" cellpadding="5" frame="hsides" width="1000" cellspacing="0">
              <col width="110"/>
              <col width="90"/>
              <col width="70"/>
              <col width="70"/>
              <col width="90"/>
              <col width="60"/>
              <col width="70"/>
              <col width="90"/>
              <col width="60"/>
              <col width="70"/>
              <col width="90"/>
              <col width="60"/>
              <col width="70"/>
              <thead>
                <tr valign="top">
                  <td>Model</td>
                  <td colspan="3">ROUGE<sup>a</sup>-1</td>
                  <td colspan="3">ROUGE-2</td>
                  <td colspan="3">ROUGE-L</td>
                  <td colspan="3">BERTScore<sup>b</sup></td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Precision</td>
                  <td>Recall</td>
                  <td><italic>F</italic><sub>1</sub>-score</td>
                  <td>Precision</td>
                  <td>Recall</td>
                  <td><italic>F</italic><sub>1</sub>-score</td>
                  <td>Precision</td>
                  <td>Recall</td>
                  <td><italic>F</italic><sub>1</sub>-score</td>
                  <td>Precision</td>
                  <td>Recall</td>
                  <td><italic>F</italic><sub>1</sub>-score</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>BART<sup>c</sup></td>
                  <td>17.01</td>
                  <td>23.04</td>
                  <td>18.08</td>
                  <td>2.87</td>
                  <td>4.25</td>
                  <td>3.22</td>
                  <td>12.68</td>
                  <td>17.79</td>
                  <td>13.66</td>
                  <td>85.26</td>
                  <td>85.26</td>
                  <td>85.26</td>
                </tr>
                <tr valign="top">
                  <td>T5<sup>d</sup></td>
                  <td>34.13</td>
                  <td>19.32</td>
                  <td>24.31</td>
                  <td>7.21</td>
                  <td>3.97</td>
                  <td>5.04</td>
                  <td>22.95</td>
                  <td>12.82</td>
                  <td>16.21</td>
                  <td>84.92</td>
                  <td>84.92</td>
                  <td>84.92</td>
                </tr>
                <tr valign="top">
                  <td>MentalBART</td>
                  <td>
                    <italic>34.99</italic>
                    <sup>e</sup>
                  </td>
                  <td>36.54</td>
                  <td>34.46</td>
                  <td>
                    <italic>10.24</italic>
                  </td>
                  <td>10.66</td>
                  <td>10.07</td>
                  <td>24.52</td>
                  <td>25.80</td>
                  <td>24.25</td>
                  <td>
                    <italic>88.70</italic>
                  </td>
                  <td>
                    <italic>88.70</italic>
                  </td>
                  <td>
                    <italic>88.70</italic>
                  </td>
                </tr>
                <tr valign="top">
                  <td>Flan-T5</td>
                  <td>25.10</td>
                  <td>41.40</td>
                  <td>30.15</td>
                  <td>7.19</td>
                  <td>12.03</td>
                  <td>8.64</td>
                  <td>18.52</td>
                  <td>31.00</td>
                  <td>22.36</td>
                  <td>87.41</td>
                  <td>87.41</td>
                  <td>87.41</td>
                </tr>
                <tr valign="top">
                  <td>GPT-2</td>
                  <td>2.84</td>
                  <td>7.54</td>
                  <td>4.08</td>
                  <td>0.14</td>
                  <td>0.33</td>
                  <td>0.20</td>
                  <td>2.35</td>
                  <td>6.34</td>
                  <td>3.39</td>
                  <td>82.66</td>
                  <td>82.66</td>
                  <td>82.66</td>
                </tr>
                <tr valign="top">
                  <td>GPT-Neo</td>
                  <td>1.14</td>
                  <td>3.97</td>
                  <td>1.74</td>
                  <td>0.00</td>
                  <td>0.00</td>
                  <td>0.00</td>
                  <td>1.14</td>
                  <td>3.97</td>
                  <td>1.74</td>
                  <td>80.88</td>
                  <td>80.88</td>
                  <td>80.88</td>
                </tr>
                <tr valign="top">
                  <td>GPT-J</td>
                  <td>17.60</td>
                  <td>38.33</td>
                  <td>23.71</td>
                  <td>5.07</td>
                  <td>
                    <italic>13.04</italic>
                  </td>
                  <td>7.13</td>
                  <td>14.98</td>
                  <td>32.85</td>
                  <td>20.18</td>
                  <td>86.94</td>
                  <td>86.94</td>
                  <td>86.94</td>
                </tr>
                <tr valign="top">
                  <td>MentalLlama</td>
                  <td>31.68</td>
                  <td>
                    <italic>54.76</italic>
                  </td>
                  <td>
                    <italic>39.52</italic>
                  </td>
                  <td>8.26</td>
                  <td>11.99</td>
                  <td>
                    <italic>10.17</italic>
                  </td>
                  <td>
                    <italic>27.13</italic>
                  </td>
                  <td>
                    <italic>37.59</italic>
                  </td>
                  <td>
                    <italic>26.56</italic>
                  </td>
                  <td>84.77</td>
                  <td>86.92</td>
                  <td>87.43</td>
                </tr>
                <tr valign="top">
                  <td>Mistral</td>
                  <td>29.15</td>
                  <td>49.28</td>
                  <td>38.33</td>
                  <td>8.42</td>
                  <td>11.87</td>
                  <td>8.34</td>
                  <td>24.41</td>
                  <td>34.20</td>
                  <td>23.44</td>
                  <td>78.83</td>
                  <td>79.97</td>
                  <td>84.81</td>
                </tr>
                <tr valign="top">
                  <td>Llama-2</td>
                  <td>26.93</td>
                  <td>43.81</td>
                  <td>31.22</td>
                  <td>6.10</td>
                  <td>9.23</td>
                  <td>8.24</td>
                  <td>16.82</td>
                  <td>20.67</td>
                  <td>16.21</td>
                  <td>78.93</td>
                  <td>86.05</td>
                  <td>82.19</td>
                </tr>
                <tr valign="top">
                  <td>Phi-2</td>
                  <td>10.61</td>
                  <td>5.21</td>
                  <td>6.91</td>
                  <td>0.94</td>
                  <td>0.71</td>
                  <td>0.89</td>
                  <td>7.28</td>
                  <td>4.60</td>
                  <td>5.53</td>
                  <td>86.94</td>
                  <td>82.17</td>
                  <td>84.49</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table5fn1">
                <p><sup>a</sup>ROUGE: Recall-Oriented Understudy for Gisting Evaluation.</p>
              </fn>
              <fn id="table5fn2">
                <p><sup>b</sup>BERTScore: Bidirectional Encoder Representations from Transformers Score.</p>
              </fn>
              <fn id="table5fn3">
                <p><sup>c</sup>BART: Bidirectional and Auto-Regressive Transformer.</p>
              </fn>
              <fn id="table5fn4">
                <p><sup>d</sup>T5: Text-To-Text Transfer Transformer.</p>
              </fn>
              <fn id="table5fn5">
                <p><sup>e</sup>The best results are italicized.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
      </sec>
      <sec>
        <title>Qualitative Assessment by Experts</title>
        <sec>
          <title>Expert Panel Composition and Evaluation Framework</title>
          <p>To conduct a comprehensive expert assessment, 5 health care professionals were employed to assess the clinical appropriateness of the summaries produced by the LLMs based on the evaluation framework postulated by Sekhon et al [<xref ref-type="bibr" rid="ref69">69</xref>]. Of the 5 health care professionals, 2 (40%) were clinical psychologists and 3 (60%) were psychiatrists and medical practitioners; 4 (80%) were male and 1 (20%) was female; and their ages ranged from 40 to 55 years. Furthermore, each health care professional possessed more than a decade of therapeutic experience.</p>
          <p>The evaluation framework encompasses 6 crucial parameters: affective attitude, burden, ethicality, coherence, opportunity costs, and perceived effectiveness. The experts evaluated each session summary against these acceptability parameters, assigning continuous ratings on a scale ranging from 0 to 2, where a higher rating signified enhanced acceptability. In addition, we incorporated a new parameter: the extent of hallucination. It is categorical: 0=extensive hallucination observed, 1=<italic>minimal hallucination observed</italic>, and 2=<italic>no hallucination observed</italic>. These evaluative dimensions are defined in <xref ref-type="table" rid="table6">Table 6</xref>.</p>
          <p><xref ref-type="table" rid="table7">Table 7</xref> reports the clinical experts’ scores averaged over their ratings. The clinical acceptability framework [<xref ref-type="bibr" rid="ref69">69</xref>] involves 6 parameters: affective attitude, burden, ethicality, coherence, opportunity costs, and perceived effectiveness (refer to <xref ref-type="table" rid="table6">Table 6</xref> for more details). We selected the 3 best LLMs (MentalLlama, Mistral, and MentalBART) for the expert evaluation based on the automatic evaluation results. Notably, Mistral outperformed the other 2 LLMs across all metrics, although the other 2 LLMs were fine-tuned on mental health domain data. Overall, all raters were more aligned in rating the MentalBART model with less variance than the other 2 LLMs across all metrics. However, all 3 LLMs were rated higher on the surface-level–characteristic metric (burden) or subjective metric (affective attitude) than the opportunity costs and efficacy metrics (perceived effectiveness). The poor scores of all 3 models on the more sensitive aspects, that is, the overall efficacy and the opportunity costs, indicate that these models share the same weakness and are not suitable for clinical use as they stand now.</p>
          <table-wrap position="float" id="table6">
            <label>Table 6</label>
            <caption>
              <p>Explanation of the experts’ evaluation metrics based on the evaluation framework postulated by Sekhon et al [<xref ref-type="bibr" rid="ref69">69</xref>].</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="160"/>
              <col width="280"/>
              <col width="560"/>
              <thead>
                <tr valign="top">
                  <td>Construct</td>
                  <td>Definition</td>
                  <td>Application</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Affective attitude</td>
                  <td>How an individual feels about an intervention</td>
                  <td>What are your perceptions of the summarization based upon your clinical knowledge?</td>
                </tr>
                <tr valign="top">
                  <td>Burden</td>
                  <td>Perceived amount of effort required to participate</td>
                  <td>How much effort is required to understand the summarization (consider spelling, grammar, and overall interpretation)?</td>
                </tr>
                <tr valign="top">
                  <td>Ethicality</td>
                  <td>Extent to which this is a good fit with your organization’s value system</td>
                  <td>How does this align with your respective code of ethics? Are there concerns?</td>
                </tr>
                <tr valign="top">
                  <td>Coherence</td>
                  <td>Extent to which the intervention is understood</td>
                  <td>How well the summaries are understood</td>
                </tr>
                <tr valign="top">
                  <td>Opportunity costs</td>
                  <td>The extent to which one would benefit from using this intervention</td>
                  <td>Pros and cons of using this intervention in your respective setting</td>
                </tr>
                <tr valign="top">
                  <td>Perceived effectiveness</td>
                  <td>Extent to which this intervention will perform in the intended setting</td>
                  <td>How well this will perform in your clinical setting</td>
                </tr>
                <tr valign="top">
                  <td>Extent of hallucination</td>
                  <td>Extent to which this intervention is hallucinated</td>
                  <td>The generated text is incorrect, nonsensical, or contains global information apart from the context of the conversation</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <table-wrap position="float" id="table7">
            <label>Table 7</label>
            <caption>
              <p>Qualitative evaluation by human experts, with scores averaged from the 5 expert raters. The variances among the raters’ scores are also shown.</p>
            </caption>
            <table border="1" rules="groups" cellpadding="5" frame="hsides" width="1000" cellspacing="0">
              <col width="30"/>
              <col width="130"/>
              <col width="130"/>
              <col width="100"/>
              <col width="100"/>
              <col width="180"/>
              <col width="150"/>
              <col width="180"/>
              <thead>
                <tr valign="top">
                  <td colspan="2">Model</td>
                  <td>Affective attitude</td>
                  <td>Burden</td>
                  <td>Ethicality</td>
                  <td>Intervention coherence</td>
                  <td>Opportunity costs</td>
                  <td>Perceived effectiveness</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="8">
                    <bold>Mistral</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Values, mean (SD)</td>
                  <td>
                    <italic>1.12 (0.47)</italic>
                    <sup>a</sup>
                  </td>
                  <td>
                    <italic>1.33 (0.32)</italic>
                  </td>
                  <td>
                    <italic>1.42 (0.37)</italic>
                  </td>
                  <td>
                    <italic>1.13 (0.45)</italic>
                  </td>
                  <td>
                    <italic>0.98 (0.47)</italic>
                  </td>
                  <td>
                    <italic>0.90 (0.51)</italic>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Variance</td>
                  <td>0.22</td>
                  <td>0.10</td>
                  <td>0.14</td>
                  <td>0.20</td>
                  <td>0.22</td>
                  <td>0.26</td>
                </tr>
                <tr valign="top">
                  <td colspan="8">
                    <bold>MentalLlama</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Values, mean (SD)</td>
                  <td><italic>1.12 (</italic>0.37<italic>)</italic></td>
                  <td><italic>1.33 (</italic>0.22<italic>)</italic></td>
                  <td>1.36 (0.32)</td>
                  <td>1.06 (0.36)</td>
                  <td>0.94 (0.39)</td>
                  <td>0.88 (0.45)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Variance</td>
                  <td>0.14</td>
                  <td>0.05</td>
                  <td>0.10</td>
                  <td>0.13</td>
                  <td>0.15</td>
                  <td>0.20</td>
                </tr>
                <tr valign="top">
                  <td colspan="8">
                    <bold>MentalBART</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Values, mean (SD)</td>
                  <td>0.95 (0.28)</td>
                  <td>1.28 (0.14)</td>
                  <td>1.33 (0.36)</td>
                  <td>1.01 (0.22)</td>
                  <td>0.84 (0.33)</td>
                  <td>0.76 (0.4)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Variance</td>
                  <td>0.08</td>
                  <td>0.02</td>
                  <td>0.13</td>
                  <td>0.05</td>
                  <td>0.11</td>
                  <td>0.16</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table7fn1">
                <p><sup>a</sup>The best results are italicized.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Extent of Hallucination</title>
          <p>The evaluation of hallucination identification in a set of 39 conversations was divided into 3 hallucination levels: <italic>no hallucination</italic> <italic>observed</italic>, minimal hallucination observed, and extensive hallucination observed. These categories essentially determine how well the response is consistent with the context and whether it is also incorrect, nonsensical, or contains global information beyond the scope of the conversation. The results are summarized in <xref ref-type="table" rid="table8">Table 8</xref>. The data show fluctuations in how the phenomenon of hallucination is perceived among different models and stress the importance of reviewing evaluations from numerous appraisers for a complete assessment. Here, we report the average hallucination-level frequencies rated by the 5 evaluators. Subsequently, we provide the percentage of the hallucination-level frequency against the total 39 instances. Of the test conversations, the majority of cases (n=39, 76%), on average, demonstrated <italic>no hallucination observed</italic>: Mistral and MentalBART achieved rates of 75% and 76%, respectively, while MentalLlama showed a slightly higher value: 77%. Among the samples where minimal hallucination observed was reported, all 3 models fell within a similar range: Mistral and MentalLlama had rates of 13% and 14%, respectively, while MentalBART showed a slightly elevated value of 18%. Notably, the models exhibited lower rates in terms of the extensive hallucination observed category, with Mistral at only 11%, MentalLlama at 7%, and MentalBART at 5%. These data confirm the capability of these AI models to faithfully follow whenever there is no hallucination and underscore their ability to detect more subtle degrees of hallucination across the various tasks on which they were tested.</p>
          <p>The results are consistently adequate across all 3 models, with a relatively equal distribution of the level of hallucination observed by different raters. Importantly, all 3 models exhibited a significant number of cases with <italic>no hallucination observed</italic>, indicating reliable performance and implying their ability to maintain fidelity to the original content.</p>
          <table-wrap position="float" id="table8">
            <label>Table 8</label>
            <caption>
              <p>Hallucination-level frequency marked by experts for the top 3 large language models. The average of hallucination-level frequencies for each rater is reported.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="310"/>
              <col width="230"/>
              <col width="230"/>
              <col width="230"/>
              <thead>
                <tr valign="top">
                  <td>Hallucination level</td>
                  <td>Mistral (%), mean (SD)</td>
                  <td>MentalLlama (%), mean (SD)</td>
                  <td>MentalBART (%), mean (SD)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>No hallucination observed</td>
                  <td>29.3 (1.64)</td>
                  <td>30.3 (2.03)</td>
                  <td>29.7 (1.58)</td>
                </tr>
                <tr valign="top">
                  <td>Minimal hallucination observed</td>
                  <td>5.1 (0.51)</td>
                  <td>5.6 (1.07)</td>
                  <td>7.3 (0.96)</td>
                </tr>
                <tr valign="top">
                  <td>Extensive hallucination observed</td>
                  <td>4.3 (1.34)</td>
                  <td>3 (1)</td>
                  <td>2 (0.67)</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this study, we assessed 11 state-of-the-art LLMs on the aspect-based summarization task of mental health therapy conversations. These therapy conversations are long, and it requires a good amount of effort to gain insights from reading them. To address this, we summarized these long conversations, thereby reducing the efforts of the experts. We further proposed MentalCLOUDS, which provides aspect-based summaries of each conversation.</p>
        <p>Specifically, we benchmarked the 11 LLMs for aspect-based summarization and evaluated them using both automatic and human evaluation approaches. The automatic evaluation scores revealed the superiority of the LLMs trained on mental health domain data. Two domain-specific LLMs, MentalLlama and MentalBART, consistently outperformed the rest of the LLMs across all aspects. Notably, although Mistral is not specifically trained on mental health domain data, its scores are comparable to those of MentalLlama, the overall best-performing model.</p>
        <p>This work also showcased the prowess of decoder-only LLMs compared to strong encoder-decoder–based LLMs. Typically, encoder-decoder models favor sequence-to-sequence tasks such as summarization, where a sequence of input texts is mapped to a sequence of output texts. However, the decoder-based models, that is, MentalLlama and Mistral, consistently outperformed the encoder-decoder models such as BART, T5, and Flan-T5. The only exception was MentalBART because it is fine-tuned on the mental health data set.</p>
        <p>The counseling data set was curated from multiple multimedia web-based sources such as YouTube transcripts [<xref ref-type="bibr" rid="ref47">47</xref>]. Hence, most of these natural conversations are incoherent and grammatically unfluent. Even with these imperfections, the LLMs were mostly able to construct meaningful summaries that contained coherent narratives with a clear beginning and end. However, the models did not do as well with the structure separation of the information. The SH, PD, and reflection sections frequently overlapped, posing clinical and legal problems. History is considered clinically sacrosanct and should not be contaminated by the therapist’s interpretation, and it is also citable in legal cases as client evidence, while interpretations are not. The models were also unable to identify psychotherapy types (eg, cognitive behavioral therapy) and therapy techniques, which form an integral part of counseling notes; for example, when participants are engaged in using a motivational interviewing framework, the essential processes and their outcomes, which a human summarizer would have recorded, failed to find a place in the LLM summaries. Important negative histories gathered during the session, such as the history of suicide risk or substance use, were also not recorded; and in at least 1 instance, the presence of suicide risk was not identified. In general, the models exhibited stronger performance in handling medical histories and examinations but struggled when faced with more technical and sensitive aspects, such as conversations related to actual therapeutic strategies.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>It is crucial to address the limitations of this study for a comprehensive understanding. First, this work aimed to benchmark the efficacy of only 11 LLMs on the aspect-based summarization task. Second, for faster and easier reproduction of the results, we did not assess models larger than 7 billion parameters; however, such models can be part of future examinations. Third, for the initial study and to promote research in this field, only open-source models were assessed in this work. However, inspecting closed models such as ChatGPT, Claude, and Gemini can be an interesting future research avenue. Finally, this work explored only 3 aspects (counseling components) of the conversation. However, conversations are subjective and can have &#62;3 components. In addition, the counseling sessions in this work represented a certain demographic region (American) and thus may not apply to therapy counseling for other demographics.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Our study benchmarked the efficacy and role of LLMs in counseling-component–guided summarization tasks. In doing so, we introduced a new data set, MentalCLOUDS, which comprises summaries corresponding to 3 counseling components. The experimental results confirmed the superiority of the LLMs fine-tuned on mental health domain data (MentalLlama and MentalBART) over the out-of-the-box LLMs. Notably, the out-of-the-box Mistral model seemed comparable to, and sometimes better than, the LLMs fine-tuned on mental health domain data. However, as per the experts’ evaluation, these LLMs often failed to distinguish between the counseling components during summary generation. Overall, these models excelled in managing medical histories and examinations but faced challenges with technical and sensitive aspects, such as therapy conversations, thereby limiting their clinical utility as they stand now.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BART</term>
          <def>
            <p>Bidirectional and Auto-Regressive Transformer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">BERTScore</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers Score</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">MEMO</term>
          <def>
            <p>Mental Health Summarization</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">MentalCLOUDS</term>
          <def>
            <p>Mental Health Counseling-Component–Guided Dialogue Summaries</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">PD</term>
          <def>
            <p>patient discovery</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">PICO</term>
          <def>
            <p>Population, Intervention, Comparison, and Outcomes</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">ROUGE</term>
          <def>
            <p>Recall-Oriented Understudy for Gisting Evaluation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">SH</term>
          <def>
            <p>symptom and history</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">SLM</term>
          <def>
            <p>small language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">T5</term>
          <def>
            <p>Text-to-Text Transfer Transformer</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors would like to thank JMIR Publications for providing article processing fee support. The salary of the support staff involved in the project was partly supported by Tower Research Capital Markets toward using machine learning for social good.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data sets generated and analyzed during this study are available from the corresponding author on reasonable request.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Verma</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>A query-based medical information summarization system using ontology knowledge</article-title>
          <source>Proceedings of the 19th IEEE Symposium on Computer-Based Medical Systems</source>
          <year>2006</year>
          <conf-name>CBMS'06</conf-name>
          <conf-date>June 22-23, 2006</conf-date>
          <conf-loc>Salt Lake City, UT</conf-loc>
          <pub-id pub-id-type="doi">10.1109/cbms.2006.25</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>YC</given-names>
            </name>
            <name name-style="western">
              <surname>Bansal</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Fast abstractive summarization with reinforce-selected sentence rewriting</article-title>
          <source>Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2018</year>
          <conf-name>ACL 2018</conf-name>
          <conf-date>July 15-20, 2018</conf-date>
          <conf-loc>Melbourne, Australia</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/p18-1063</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Narayan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Lapata</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Don’t give me the details, just the summary! Topic-aware convolutional neural networks for extreme summarization</article-title>
          <source>Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2018</year>
          <conf-name>EMNLP 2018</conf-name>
          <conf-date>October 31-November 4, 2018</conf-date>
          <conf-loc>Brussels, Belgium</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/d18-1206</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moratanch</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chitrakala</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>A survey on extractive text summarization</article-title>
          <source>Proceedings of the International Conference on Computer, Communication and Signal Processing</source>
          <year>2017</year>
          <conf-name>ICCCSP 2017</conf-name>
          <conf-date>January 10-11, 2017</conf-date>
          <conf-loc>Chennai, India</conf-loc>
          <pub-id pub-id-type="doi">10.1109/icccsp.2017.7944061</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>SK</given-names>
            </name>
          </person-group>
          <article-title>Abstractive summarization: an overview of the state of the art</article-title>
          <source>Expert Syst Appl</source>
          <year>2019</year>
          <month>05</month>
          <volume>121</volume>
          <fpage>49</fpage>
          <lpage>65</lpage>
          <pub-id pub-id-type="doi">10.1016/j.eswa.2018.12.011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tuggener</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mieskes</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Deriu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cieliebak</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Are we summarizing the right way? A survey of dialogue summarization data sets</article-title>
          <source>Proceedings of the Third Workshop on New Frontiers in Summarization</source>
          <year>2021</year>
          <conf-name>NewSum 2021</conf-name>
          <conf-date>November 10, 2021</conf-date>
          <conf-loc>Online</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2021.newsum-1.12</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Konovalov</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Scotch</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Post</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Brandt</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Biomedical informatics techniques for processing and analyzing web blogs of military service members</article-title>
          <source>J Med Internet Res</source>
          <year>2010</year>
          <month>10</month>
          <day>05</day>
          <volume>12</volume>
          <issue>4</issue>
          <fpage>e45</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2010/4/e45/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1538</pub-id>
          <pub-id pub-id-type="medline">20923755</pub-id>
          <pub-id pub-id-type="pii">v12i4e45</pub-id>
          <pub-id pub-id-type="pmcid">PMC3234168</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Strauss</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Peguero</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Hirst</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Machine learning methods for clinical forms analysis in mental health</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2013</year>
          <volume>192</volume>
          <fpage>1024</fpage>
          <pub-id pub-id-type="medline">23920798</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kennedy</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>McIntyre</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Tourjman</surname>
              <given-names>SV</given-names>
            </name>
            <name name-style="western">
              <surname>Bhat</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Blier</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hasnain</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jollant</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Levitt</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>MacQueen</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>McInerney</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>McIntosh</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Milev</surname>
              <given-names>RV</given-names>
            </name>
            <name name-style="western">
              <surname>Müller</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Parikh</surname>
              <given-names>SV</given-names>
            </name>
            <name name-style="western">
              <surname>Pearson</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Ravindran</surname>
              <given-names>AV</given-names>
            </name>
            <name name-style="western">
              <surname>Uher</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Canadian network for mood and anxiety treatments (CANMAT) 2016 clinical guidelines for the management of adults with major depressive disorder: section 3. Pharmacological treatments</article-title>
          <source>Can J Psychiatry</source>
          <year>2016</year>
          <month>09</month>
          <volume>61</volume>
          <issue>9</issue>
          <fpage>540</fpage>
          <lpage>60</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27486148"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/0706743716659417</pub-id>
          <pub-id pub-id-type="medline">27486148</pub-id>
          <pub-id pub-id-type="pii">0706743716659417</pub-id>
          <pub-id pub-id-type="pmcid">PMC4994790</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tran</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kavuluru</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Predicting mental conditions based on "history of present illness" in psychiatric notes with deep neural networks</article-title>
          <source>J Biomed Inform</source>
          <year>2017</year>
          <month>11</month>
          <volume>75S</volume>
          <fpage>S138</fpage>
          <lpage>48</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(17)30133-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2017.06.010</pub-id>
          <pub-id pub-id-type="medline">28606869</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(17)30133-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC5705423</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>YP</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>YY</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Modified bidirectional encoder representations from transformers extractive summarization model for hospital information systems based on character-level tokens (AlphaBERT): development and performance evaluation</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>04</month>
          <day>29</day>
          <volume>8</volume>
          <issue>4</issue>
          <fpage>e17787</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/4/e17787/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17787</pub-id>
          <pub-id pub-id-type="medline">32347806</pub-id>
          <pub-id pub-id-type="pii">v8i4e17787</pub-id>
          <pub-id pub-id-type="pmcid">PMC7221648</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2019</year>
          <conf-name>NAACL 2019</conf-name>
          <conf-date>June 2-7, 2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ive</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Viani</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kam</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Verma</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Puntis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cardinal</surname>
              <given-names>RN</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Velupillai</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Generation and evaluation of artificial mental health records for Natural Language Processing</article-title>
          <source>NPJ Digit Med</source>
          <year>2020</year>
          <month>05</month>
          <day>14</day>
          <volume>3</volume>
          <fpage>69</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-020-0267-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-020-0267-x</pub-id>
          <pub-id pub-id-type="medline">32435697</pub-id>
          <pub-id pub-id-type="pii">267</pub-id>
          <pub-id pub-id-type="pmcid">PMC7224173</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Afzal</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Alam</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Malik</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Malik</surname>
              <given-names>GM</given-names>
            </name>
          </person-group>
          <article-title>Clinical context-aware biomedical text summarization using deep neural network: model development and validation</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>10</month>
          <day>23</day>
          <volume>22</volume>
          <issue>10</issue>
          <fpage>e19810</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/10/e19810/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19810</pub-id>
          <pub-id pub-id-type="medline">33095174</pub-id>
          <pub-id pub-id-type="pii">v22i10e19810</pub-id>
          <pub-id pub-id-type="pmcid">PMC7647812</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Manas</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Aribandi</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Kursuncu</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Alambo</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shalin</surname>
              <given-names>VL</given-names>
            </name>
            <name name-style="western">
              <surname>Thirunarayan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Beich</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Narasimhan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sheth</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Knowledge-infused abstractive summarization of clinical diagnostic interviews: framework development study</article-title>
          <source>JMIR Ment Health</source>
          <year>2021</year>
          <month>05</month>
          <day>10</day>
          <volume>8</volume>
          <issue>5</issue>
          <fpage>e20865</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mental.jmir.org/2021/5/e20865/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/20865</pub-id>
          <pub-id pub-id-type="medline">33970116</pub-id>
          <pub-id pub-id-type="pii">v8i5e20865</pub-id>
          <pub-id pub-id-type="pmcid">PMC8145083</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Negrinho</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ghosh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jagannathan</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Hassanzadeh</surname>
              <given-names>HR</given-names>
            </name>
            <name name-style="western">
              <surname>Schaaf</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gormley</surname>
              <given-names>MR</given-names>
            </name>
          </person-group>
          <article-title>Leveraging pretrained models for automatic summarization of doctor-patient conversations</article-title>
          <source>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2021</year>
          <conf-name>EMNLP 2021</conf-name>
          <conf-date>November 7-11, 2021</conf-date>
          <conf-loc>Virtual event; Punta Cana, Dominican Republic</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2021.findings-emnlp.313</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zafari</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zulkernine</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Chatsum: an intelligent medical chat summarization tool</article-title>
          <source>Proceedings of the 2021 IEEE EMBS International Conference on Biomedical and Health Informatics</source>
          <year>2021</year>
          <conf-name>BHI 2021</conf-name>
          <conf-date>July 27-30, 2021</conf-date>
          <conf-loc>Athens, Greece</conf-loc>
          <pub-id pub-id-type="doi">10.1109/bhi50953.2021.9508585</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nallapati</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gu̇lçehre</surname>
              <given-names>Ç</given-names>
            </name>
            <name name-style="western">
              <surname>dos Santos</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Abstractive text summarization using sequence-to-sequence RNNs and beyond</article-title>
          <source>Proceedings of the 20th SIGNLL Conference on Computational Natural Language Learning</source>
          <year>2016</year>
          <conf-name>CoNLL 2016</conf-name>
          <conf-date>August 11-12, 2016</conf-date>
          <conf-loc>Berlin, Germany</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/k16-1028</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Uszkoreit</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Polosukhin</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Attention is all you need</article-title>
          <source>arXiv</source>
          <comment> Preprint posted online on June 12, 2017</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1706.03762"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>See</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>Get to the point: summarization with pointer-generator networks</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on April 14, 2017</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1704.04368"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/p17-1099</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blei</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>AY</given-names>
            </name>
            <name name-style="western">
              <surname>Jordan</surname>
              <given-names>MI</given-names>
            </name>
          </person-group>
          <article-title>Latent dirichlet allocation</article-title>
          <source>J Mach Learn Res</source>
          <year>2003</year>
          <volume>3</volume>
          <fpage>993</fpage>
          <lpage>1022</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/944919.944937"/>
          </comment>
          <pub-id pub-id-type="doi">10.5555/944919.944937</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Song</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Summarizing medical conversations via identifying important utterances</article-title>
          <source>Proceedings of the 28th International Conference on Computational Linguistics</source>
          <year>2020</year>
          <conf-name>COLING 2020</conf-name>
          <conf-date>December 8-13, 2020</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2020.coling-main.63</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Quiroz</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Laranjo</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kocaballi</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Briatore</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Berkovsky</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rezazadegan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Coiera</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Identifying relevant information in medical conversations to summarize a clinician-patient encounter</article-title>
          <source>Health Informatics J</source>
          <year>2020</year>
          <month>12</month>
          <volume>26</volume>
          <issue>4</issue>
          <fpage>2906</fpage>
          <lpage>14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/1460458220951719?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/1460458220951719</pub-id>
          <pub-id pub-id-type="medline">32865113</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krishna</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Khosla</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bigham</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Lipton</surname>
              <given-names>ZC</given-names>
            </name>
          </person-group>
          <article-title>Generating SOAP notes from doctor-patient conversations using modular summarization techniques</article-title>
          <source>arXiv.</source>
          <comment> Preprint posted online on May 4, 2020</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2005.01795"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2021.acl-long.384</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Titov</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>McDonald</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>A joint model of text and aspect ratings for sentiment summarization</article-title>
          <source>Proceedings of the 46th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2008</year>
          <conf-name>ACL-08: HLT</conf-name>
          <conf-date>June 15, 2008</conf-date>
          <conf-loc>Columbus, OH</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhai</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sundaresan</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Rated aspect summarization of short comments</article-title>
          <source>Proceedings of the 18th International Conference on World Wide Web</source>
          <year>2009</year>
          <conf-name>WWW '09</conf-name>
          <conf-date>April 20-24, 2009</conf-date>
          <conf-loc>Madrid, Spain</conf-loc>
          <pub-id pub-id-type="doi">10.1145/1526709.1526728</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Qu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Aspect and sentiment aware abstractive review summarization</article-title>
          <source>Proceedings of the 27th International Conference on Computational Linguistics</source>
          <year>2018</year>
          <conf-name>COLING 2018</conf-name>
          <conf-date>August 20-26, 2018</conf-date>
          <conf-loc>Santa Fe, NM</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/C18-1095"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3269206.3269273</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ling</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Neural network-based abstract generation for opinions and arguments</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on June 9, 2016</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1606.02785"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/n16-1007</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Frermann</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Klementiev</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Inducing document structure for aspect-based summarization</article-title>
          <source>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2019</year>
          <conf-name>ACL 2019</conf-name>
          <conf-date>July 28-August 2, 2019</conf-date>
          <conf-loc>Florence, Italy</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/p19-1630</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krishna</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Srinivasan</surname>
              <given-names>BV</given-names>
            </name>
          </person-group>
          <article-title>Generating topic-oriented summaries using neural attention</article-title>
          <source>Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2018</year>
          <conf-name>NAACL-HLT 2018</conf-name>
          <conf-date>June 1-6, 2018</conf-date>
          <conf-loc>New Orleans, LA</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/n18-1153</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hayashi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Budania</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ackerson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Neervannan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Neubig</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>WikiAsp: a dataset for multi-domain aspect-based summarization</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on November 16, 2020</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2011.07832"/>
          </comment>
          <pub-id pub-id-type="doi">10.1162/tacl_a_00362</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Petzold</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>OASum: large-scale open domain aspect-based summarization</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on December 19, 2022</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2212.09233"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2023.findings-acl.268</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beltagy</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Cohan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Longformer: the long-document transformer</article-title>
          <source>arXiv.</source>
          <comment> Preprint posted online on April 10, 2020</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2004.05150"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2004.05150</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Katariya</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Amatriain</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Kannan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Dr. Summarize: global summarization of medical dialogue by exploiting local structures</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on September 18, 2020</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2009.08666"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2020.findings-emnlp.335</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Aw</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>NF</given-names>
            </name>
          </person-group>
          <article-title>Topic-aware pointer-generator networks for summarizing spoken conversations</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on October 3, 2019</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1910.01335"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/asru46091.2019.9003764</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kazi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kahanda</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Automatically generating psychiatric case notes from digital transcripts of doctor-patient conversations</article-title>
          <source>Proceedings of the 2nd Clinical Natural Language Processing Workshop</source>
          <year>2019</year>
          <conf-name>ClinicalNLP 2019</conf-name>
          <conf-date>June 7, 2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/w19-1918</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gundogdu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Pamuksuz</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Telleria</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>PJ</given-names>
            </name>
          </person-group>
          <article-title>Customized impression prediction from radiology reports using BERT and LSTMs</article-title>
          <source>IEEE Trans Artif Intell</source>
          <year>2023</year>
          <month>8</month>
          <volume>4</volume>
          <issue>4</issue>
          <fpage>744</fpage>
          <lpage>53</lpage>
          <pub-id pub-id-type="doi">10.1109/tai.2021.3086435</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Han</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Let topic flow: a unified topic-guided segment-wise dialogue summarization framework</article-title>
          <source>IEEE/ACM Trans Audio Speech Lang Process</source>
          <year>2024</year>
          <volume>32</volume>
          <fpage>2021</fpage>
          <lpage>32</lpage>
          <pub-id pub-id-type="doi">10.1109/taslp.2024.3374112</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Novel framework for dialogue summarization based on factual-statement fusion and dialogue segmentation</article-title>
          <source>PLoS One</source>
          <year>2024</year>
          <month>4</month>
          <day>16</day>
          <volume>19</volume>
          <issue>4</issue>
          <fpage>e0302104</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0302104"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0302104</pub-id>
          <pub-id pub-id-type="medline">38625864</pub-id>
          <pub-id pub-id-type="pii">PONE-D-23-36128</pub-id>
          <pub-id pub-id-type="pmcid">PMC11020369</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chintagunta</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Katariya</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Amatriain</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Kannan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Medically aware GPT-3 as a data generator for medical dialogue summarization</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on September 9, 2021</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2110.07356"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2021.nlpmc-1.9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>TB</given-names>
            </name>
            <name name-style="western">
              <surname>Mann</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ryder</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Subbiah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kaplan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dhariwal</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Neelakantan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shyam</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sastry</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Askell</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Herbert-Voss</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Krueger</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Henighan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Child</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ramesh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ziegler</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Winter</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hesse</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sigler</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Litwin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chess</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Berner</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McCandlish</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Radford</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Amodei</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Language models are few-shot learners</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on May 28, 2020</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2005.14165"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ju</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Exploring the potential of ChatGPT in medical dialogue summarization: a study on consistency with human preferences</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2024</year>
          <month>03</month>
          <day>14</day>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>75</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-024-02481-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-024-02481-8</pub-id>
          <pub-id pub-id-type="medline">38486198</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-024-02481-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC10938713</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ghazvininejad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mohamed</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Stoyanov</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>BART: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on October 29, 2019</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1910.13461"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.703</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Fine-tune BERT for extractive summarization</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on March 25, 2019</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1903.10318"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Van Veen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Van Uden</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Blankemeier</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Delbrouck</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Aali</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bluethgen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Pareek</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Polacin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Reis</surname>
              <given-names>EP</given-names>
            </name>
            <name name-style="western">
              <surname>Seehofnerová</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rohatgi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hosamani</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ahuja</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Langlotz</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Hom</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gatidis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pauly</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chaudhari</surname>
              <given-names>AS</given-names>
            </name>
          </person-group>
          <article-title>Adapted large language models can outperform medical experts in clinical text summarization</article-title>
          <source>Nat Med</source>
          <year>2024</year>
          <month>04</month>
          <day>27</day>
          <volume>30</volume>
          <issue>4</issue>
          <fpage>1134</fpage>
          <lpage>42</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-024-02855-5</pub-id>
          <pub-id pub-id-type="medline">38413730</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-024-02855-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>LG</given-names>
            </name>
            <name name-style="western">
              <surname>Mao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mutalik</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Middleton</surname>
              <given-names>SE</given-names>
            </name>
          </person-group>
          <article-title>Extracting and summarizing evidence of suicidal ideation in social media contents using large language models</article-title>
          <source>Proceedings of the 9th Workshop on Computational Linguistics and Clinical Psychology</source>
          <year>2024</year>
          <conf-name>CLPsych 2024</conf-name>
          <conf-date>March 21, 2024</conf-date>
          <conf-loc>St. Julians, Malta</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Srivastava</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Suresh</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lord</surname>
              <given-names>SP</given-names>
            </name>
            <name name-style="western">
              <surname>Akhtar</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Chakraborty</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Counseling summarization using mental health knowledge guided utterance filtering</article-title>
          <source>Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining</source>
          <year>2022</year>
          <conf-name>KDD '22</conf-name>
          <conf-date>August 14-18, 2022</conf-date>
          <conf-loc>Washington, DC</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3534678.3539187</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Raffel</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Narang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Matena</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>PJ</given-names>
            </name>
          </person-group>
          <article-title>Exploring the limits of transfer learning with a unified text-to-text transformer</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on October 23, 2019</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1910.10683"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Barzilay</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Nutri-bullets: summarizing health studies by composing segments</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on March 22, 2021</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2103.11921"/>
          </comment>
          <pub-id pub-id-type="doi">10.1609/aaai.v35i15.17624</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Park</surname>
              <given-names>JW</given-names>
            </name>
          </person-group>
          <article-title>Continual BERT: continual learning for adaptive extractive summarization of COVID-19 literature</article-title>
          <source>arXiv.</source>
          <comment> Preprint posted online on July 27, 2020</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2007.03405"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2007.03405</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Barzilay</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Nutri-bullets hybrid: consensual multi-document summarization</article-title>
          <source>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2021</year>
          <conf-name>NAACL 2021</conf-name>
          <conf-date>June 6-11, 2021</conf-date>
          <conf-loc>Virtual event</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2021.naacl-main.411</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>BC</given-names>
            </name>
            <name name-style="western">
              <surname>Saha</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Soboczenski</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>IJ</given-names>
            </name>
          </person-group>
          <article-title>Generating (factual?) narrative summaries of RCTs: experiments with neural multi-document summarization</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2021</year>
          <volume>2021</volume>
          <fpage>605</fpage>
          <lpage>14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34457176"/>
          </comment>
          <pub-id pub-id-type="medline">34457176</pub-id>
          <pub-id pub-id-type="pii">3476779</pub-id>
          <pub-id pub-id-type="pmcid">PMC8378607</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Leveraging summary guidance on medical report summarization</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2023</year>
          <month>10</month>
          <volume>27</volume>
          <issue>10</issue>
          <fpage>5066</fpage>
          <lpage>75</lpage>
          <pub-id pub-id-type="doi">10.1109/jbhi.2023.3304376</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Learning to summarize Chinese radiology findings with a pre-trained encoder</article-title>
          <source>IEEE Trans Biomed Eng</source>
          <year>2023</year>
          <month>12</month>
          <volume>70</volume>
          <issue>12</issue>
          <fpage>3277</fpage>
          <lpage>87</lpage>
          <pub-id pub-id-type="doi">10.1109/tbme.2023.3280987</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Van Veen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Van Uden</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Attias</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pareek</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bluethgen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Polacin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chiu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Delbrouck</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Chaves</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Langlotz</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Chaudhari</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Pauly</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>RadAdapt: radiology report summarization via lightweight domain adaptation of large language models</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on May 2, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2305.01146"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2023.bionlp-1.42</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jo</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Jung</surname>
              <given-names>SM</given-names>
            </name>
          </person-group>
          <article-title>A scoping review of consumer needs for cancer information</article-title>
          <source>Patient Educ Couns</source>
          <year>2019</year>
          <month>07</month>
          <volume>102</volume>
          <issue>7</issue>
          <fpage>1237</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1016/j.pec.2019.02.004</pub-id>
          <pub-id pub-id-type="medline">30772114</pub-id>
          <pub-id pub-id-type="pii">S0738-3991(18)30718-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Finney Rutten</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Blake</surname>
              <given-names>KD</given-names>
            </name>
            <name name-style="western">
              <surname>Greenberg-Worisek</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Allen</surname>
              <given-names>SV</given-names>
            </name>
            <name name-style="western">
              <surname>Moser</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Hesse</surname>
              <given-names>BW</given-names>
            </name>
          </person-group>
          <article-title>Online health information seeking among US adults: measuring progress toward a healthy people 2020 objective</article-title>
          <source>Public Health Rep</source>
          <year>2019</year>
          <volume>134</volume>
          <issue>6</issue>
          <fpage>617</fpage>
          <lpage>25</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31513756"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/0033354919874074</pub-id>
          <pub-id pub-id-type="medline">31513756</pub-id>
          <pub-id pub-id-type="pmcid">PMC6832079</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mrini</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Dernoncourt</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bui</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Farcas</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Nakashole</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>A gradually soft multi-task and data-augmented approach to medical question understanding</article-title>
          <source>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing</source>
          <year>2021</year>
          <conf-name>ACL/IJCNLP 2021</conf-name>
          <conf-date>August 1-6, 2021</conf-date>
          <conf-loc>Virtual Event</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2021.acl-long.119</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Demner-Fushman</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Interactive use of online health resources: a comparison of consumer and professional questions</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2016</year>
          <month>07</month>
          <volume>23</volume>
          <issue>4</issue>
          <fpage>802</fpage>
          <lpage>11</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27147494"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocw024</pub-id>
          <pub-id pub-id-type="medline">27147494</pub-id>
          <pub-id pub-id-type="pii">ocw024</pub-id>
          <pub-id pub-id-type="pmcid">PMC4926747</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kolhatkar</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Paranjape</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gokhale</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Kadam</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Team converge at ProbSum 2023: abstractive text summarization of patient progress notes</article-title>
          <source>Proceedings of the 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks</source>
          <year>2023</year>
          <conf-name>BioNLP@ACL 2023</conf-name>
          <conf-date>July 13, 2023</conf-date>
          <conf-loc>Toronto, Canada</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2023.bionlp-1.50</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>DeakinNLP at ProbSum 2023: clinical progress note summarization with rules and language models</article-title>
          <source>Proceedings of the 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks</source>
          <year>2023</year>
          <conf-name>BioNLP@ACL 2023</conf-name>
          <conf-date>July 13, 2023</conf-date>
          <conf-loc>Toronto, Canada</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2023.bionlp-1.47</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Dligach</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Churpek</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Afshar</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Overview of the problem list summarization (ProbSum) 2023 shared task on summarizing patients' active diagnoses and problems from electronic health record progress notes</article-title>
          <source>arXiv.</source>
          <comment>Preprint posted online on June 8, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2306.05270"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2023.bionlp-1.43</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dligach</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Churpek</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Afshar</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Summarizing patients' problems from hospital progress notes using pre-trained sequence-to-sequence models</article-title>
          <source>Proc Int Conf Comput Ling</source>
          <year>2022</year>
          <month>10</month>
          <volume>2022</volume>
          <fpage>2979</fpage>
          <lpage>91</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36268128"/>
          </comment>
          <pub-id pub-id-type="medline">36268128</pub-id>
          <pub-id pub-id-type="pmcid">PMC9581107</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shing</surname>
              <given-names>HC</given-names>
            </name>
            <name name-style="western">
              <surname>Shivade</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Pourdamghani</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Nan</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Resnik</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Oard</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bhatia</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Towards clinical encounter summarization: learning to compose discharge summaries from prior notes</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on April 27, 2021</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2104.13498"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2104.13498</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ando</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Okumura</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Komachi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Horiguchi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Matsumoto</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Exploring optimal granularity for extractive summarization of unstructured health records: analysis of the largest multi-institutional archive of health records in Japan</article-title>
          <source>PLOS Digit Health</source>
          <year>2022</year>
          <month>09</month>
          <volume>1</volume>
          <issue>9</issue>
          <fpage>e0000099</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36812582"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pdig.0000099</pub-id>
          <pub-id pub-id-type="medline">36812582</pub-id>
          <pub-id pub-id-type="pii">PDIG-D-21-00099</pub-id>
          <pub-id pub-id-type="pmcid">PMC9931252</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Niu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Automatic generation of discharge summary of EMRs based on multi-granularity information fusion</article-title>
          <source>Proceedings of the 9th China Health Information Processing Conference</source>
          <year>2023</year>
          <conf-name>CHIP 2023</conf-name>
          <conf-date>October 27-29, 2023</conf-date>
          <conf-loc>Hangzhou, China</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-981-99-9864-7_17</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jain</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Saha</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lalwani</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Saha</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Can you summarize my learnings? Towards perspective-based educational dialogue summarization</article-title>
          <source>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2023</year>
          <conf-name>EMNLP 2023</conf-name>
          <conf-date>December 6-10, 2023</conf-date>
          <conf-loc>Singapore, Singapore</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2023.findings-emnlp.208</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saha</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Reddy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Saha</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bhattacharyya</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>A shoulder to cry on: towards a motivational virtual assistant for assuaging mental agony</article-title>
          <source>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2022</year>
          <conf-name>NAACL-HLT 2022</conf-name>
          <conf-date>July 10-15, 2022</conf-date>
          <conf-loc>Online</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2022.naacl-main.174</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sekhon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cartwright</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Francis</surname>
              <given-names>JJ</given-names>
            </name>
          </person-group>
          <article-title>Acceptability of healthcare interventions: an overview of reviews and development of a theoretical framework</article-title>
          <source>BMC Health Serv Res</source>
          <year>2017</year>
          <month>01</month>
          <day>26</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>88</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmchealthservres.biomedcentral.com/articles/10.1186/s12913-017-2031-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12913-017-2031-8</pub-id>
          <pub-id pub-id-type="medline">28126032</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12913-017-2031-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC5267473</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Poria</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Majumder</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Mihalcea</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hovy</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Emotion recognition in conversation: research challenges, datasets, and recent advances</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on May 8, 2019</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1905.02947"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A survey on dialogue systems: recent advances and new frontiers</article-title>
          <source>ACM SIGKDD Explor Newsl</source>
          <year>2017</year>
          <month>11</month>
          <day>21</day>
          <volume>19</volume>
          <issue>2</issue>
          <fpage>25</fpage>
          <lpage>35</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/abs/10.1145/3166054.3166058"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3166054.3166058</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Radford</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Child</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Luan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Amodei</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Language models are unsupervised multitask learners</article-title>
          <source>OpenAI Blog</source>
          <year>2019</year>
          <volume>1</volume>
          <issue>8</issue>
          <fpage>9</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Black</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Biderman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hallahan</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Anthony</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Golding</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Leahy</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McDonell</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Phang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pieler</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prashanth</surname>
              <given-names>US</given-names>
            </name>
            <name name-style="western">
              <surname>Purohit</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Reynolds</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tow</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Weinbach</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>GPT-NeoX-20B: an open-source autoregressive language model</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on April 14, 2022</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2204.06745"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2022.bigscience-1.9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Biderman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Black</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Golding</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hoppe</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Foster</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Phang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Thite</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nabeshima</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Presser</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Leahy</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>The pile: an 800GB dataset of diverse text for language modeling</article-title>
          <source>arXiv.</source>
          <comment> Preprint posted online on December 31, 2020</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2101.00027"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2101.00027</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref75">
        <label>75</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Duan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Linear attention mechanism: an efficient attention for semantic segmentation</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on July 29, 2020</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2007.14902"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref76">
        <label>76</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Mirhoseini</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Maziarz</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Outrageously large neural networks: the sparsely-gated mixture-of-experts layer</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on January 23, 2017</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1701.06538"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1701.06538</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref77">
        <label>77</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kalchbrenner</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Weissenborn</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Salimans</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Axial attention in multidimensional transformers</article-title>
          <source>arXiv.</source>
          <comment> Preprint posted online on December 20, 2019</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1912.12180"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref78">
        <label>78</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Mesh-transformer-JAX: model-parallel implementation of transformer language model with JAX</article-title>
          <source>GitHub</source>
          <year>2021</year>
          <access-date>2024-07-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/kingoflolz/mesh-transformer-jax">https://github.com/kingoflolz/mesh-transformer-jax</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref79">
        <label>79</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>HW</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Longpre</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zoph</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Tay</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Fedus</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Dehghani</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Brahma</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Webson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gu</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Suzgun</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chowdhery</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Castro-Ros</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pellat</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Valter</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Narang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mishra</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Petrov</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>QV</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Scaling instruction-finetuned language models</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on October 20, 2022</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2210.11416"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref80">
        <label>80</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>AQ</given-names>
            </name>
            <name name-style="western">
              <surname>Sablayrolles</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mensch</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bamford</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chaplot</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>de las Casas</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bressand</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Lengyel</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lample</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Saulnier</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lavaud</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>Lachaux</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Stock</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Scao</surname>
              <given-names>TL</given-names>
            </name>
            <name name-style="western">
              <surname>Lavril</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lacroix</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sayed</surname>
              <given-names>WE</given-names>
            </name>
          </person-group>
          <article-title>Mistral 7B</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on October 10, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2310.06825"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2310.06825</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref81">
        <label>81</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ainslie</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee-Thorp</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>de Jong,</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zemlyanskiy</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lebrón</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sanghai</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>GQA: training generalized multi-query transformer models from multi-head checkpoints</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on May 22, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2305.13245"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2023.emnlp-main.298</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref82">
        <label>82</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kuang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Ananiadou</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>MentaLLaMA: interpretable mental health analysis on social media with large language models</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on September 24, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2309.13567"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3589334.3648137</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref83">
        <label>83</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Touvron</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stone</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Albert</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Almahairi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Babaei</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bashlykov</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Batra</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bhargava</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bhosale</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bikel</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Blecher</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrer</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cucurull</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Esiobu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Fernandes</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Fuller</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Goswami</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hartshorn</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hosseini</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Inan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kardas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kerkez</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Khabsa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kloumann</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Korenev</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Koura</surname>
              <given-names>PS</given-names>
            </name>
            <name name-style="western">
              <surname>Lachaux</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Lavril</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liskovich</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Martinet</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Mihaylov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mishra</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Molybog</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Nie</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Poulton</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Reizenstein</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rungta</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Saladi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Schelten</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Silva</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Subramanian</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>XE</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kuan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zarov</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kambadur</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Narang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rodriguez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stojnic</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Edunov</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Scialom</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Llama 2: open foundation and fine-tuned chat models</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on July 18, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2307.09288"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2307.09288</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref84">
        <label>84</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ouyang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Almeida</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wainwright</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Mishkin</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Slama</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ray</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schulman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hilton</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kelton</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Simens</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Askell</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Welinder</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Christiano</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Leike</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lowe</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Training language models to follow instructions with human feedback</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on March 4, 2022</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2203.02155"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref85">
        <label>85</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Christiano</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Leike</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>TB</given-names>
            </name>
            <name name-style="western">
              <surname>Martic</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Legg</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Amodei</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Deep reinforcement learning from human preferences</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on June 12, 2017</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1706.03741"/>
          </comment>
          <pub-id pub-id-type="doi">10.5260/chara.21.2.8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref86">
        <label>86</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gunasekar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Aneja</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mendes</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Del Giorno</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gopi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Javaheripi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kauffmann</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>de Rosa</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Saarikivi</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Salim</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Behl</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Bubeck</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Eldan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kalai</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>YT</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Textbooks are all you need</article-title>
          <source>arXiv. </source>
          <comment>Preprint posted online on June 20, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2306.11644"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref87">
        <label>87</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>CY</given-names>
            </name>
          </person-group>
          <article-title>ROUGE: a package for automatic evaluation of summaries</article-title>
          <source>Proceedings of the Workshop on Text Summarization Branches Out (WAS 2004)</source>
          <year>2004</year>
          <conf-name>WAS 2004</conf-name>
          <conf-date>July 25-26, 2004</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref88">
        <label>88</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kishore</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Weinberger</surname>
              <given-names>KQ</given-names>
            </name>
            <name name-style="western">
              <surname>Artzi</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>BERTScore: evaluating text generation with BERT</article-title>
          <source>International Conference on Learning Representations</source>
          <year>2020</year>
          <month>04</month>
          <day>26</day>
          <conf-name>International Conference on Learning Representations</conf-name>
          <conf-date>2020 April 26</conf-date>
          <conf-loc>Addis Ababa</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openreview.net/forum?id=SkeHuCVFDr"/>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
