<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR AI</journal-id>
      <journal-title>JMIR AI</journal-title>
      <issn pub-type="epub">2817-1705</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v5i1e88816</article-id>
      <article-id pub-id-type="pmid">42398933</article-id>
      <article-id pub-id-type="doi">10.2196/88816</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Real-World Implementation of Large Language Models for Writing Clinical Discharge Summaries Within a Secure Data Environment: Development and Expert Evaluation Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Prasser</surname>
            <given-names>Fabian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Pierce</surname>
            <given-names>Benjamin</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Al Zoubi</surname>
            <given-names>Mohammad</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Carenzo</surname>
            <given-names>Catalina</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0006-5772-8029</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Goldsmith</surname>
            <given-names>Kathleen</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Imperial Clinical Analytics, Research and Evaluation (iCARE), NIHR Imperial Biomedical Research Centre</institution>
            <institution>Imperial College Healthcare NHS Trust</institution>
            <addr-line>1A Sheldon Sqaure</addr-line>
            <addr-line>London, England, W2 6PY</addr-line>
            <country>United Kingdom</country>
            <phone>44 02075891000</phone>
            <email>kathleen.goldsmith17@imperial.ac.uk</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0007-0792-3253</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Arribas</surname>
            <given-names>Maite</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0182-3493</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Atkins</surname>
            <given-names>Benjamin</given-names>
          </name>
          <degrees>BSc, MBChB</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6883-2802</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Ko</surname>
            <given-names>Ina</given-names>
          </name>
          <degrees>BSc, MBBS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1412-9070</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Chong</surname>
            <given-names>Ho Lun</given-names>
          </name>
          <degrees>BSc, MBChB</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8313-1791</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Raja</surname>
            <given-names>Asmita</given-names>
          </name>
          <degrees>MBChB</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6223-5054</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Riad</surname>
            <given-names>Aya M</given-names>
          </name>
          <degrees>BMedSci, MBChB, MRCS(Ed)</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3296-9353</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Lear</surname>
            <given-names>Rachael</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8670-3799</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>Abdullahi</surname>
            <given-names>Yusuf S</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0004-2463-1269</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author">
          <name name-style="western">
            <surname>Glampson</surname>
            <given-names>Ben</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5601-5581</ext-link>
        </contrib>
        <contrib id="contrib12" contrib-type="author">
          <name name-style="western">
            <surname>Orchard</surname>
            <given-names>Tim</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3358-663X</ext-link>
        </contrib>
        <contrib id="contrib13" contrib-type="author">
          <name name-style="western">
            <surname>Mayer</surname>
            <given-names>Erik</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5509-4580</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Imperial Clinical Analytics, Research and Evaluation (iCARE), NIHR Imperial Biomedical Research Centre</institution>
        <institution>Imperial College Healthcare NHS Trust</institution>
        <addr-line>London, England</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Surgery &#38; Cancer, Faculty of Medicine</institution>
        <institution>Imperial College London</institution>
        <addr-line>London, England</addr-line>
        <country>United Kingdom</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Kathleen Goldsmith <email>kathleen.goldsmith17@imperial.ac.uk</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2026</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>3</day>
        <month>7</month>
        <year>2026</year>
      </pub-date>
      <volume>5</volume>
      <elocation-id>e88816</elocation-id>
      <history>
        <date date-type="received">
          <day>2</day>
          <month>12</month>
          <year>2025</year>
        </date>
        <date date-type="rev-request">
          <day>27</day>
          <month>2</month>
          <year>2026</year>
        </date>
        <date date-type="rev-recd">
          <day>17</day>
          <month>3</month>
          <year>2026</year>
        </date>
        <date date-type="accepted">
          <day>11</day>
          <month>6</month>
          <year>2026</year>
        </date>
      </history>
      <copyright-statement>©Catalina Carenzo, Kathleen Goldsmith, Maite Arribas, Benjamin Atkins, Ina Ko, Ho Lun Chong, Asmita Raja, Aya M Riad, Rachael Lear, Yusuf S Abdullahi, Ben Glampson, Tim Orchard, Erik Mayer. Originally published in JMIR AI (https://ai.jmir.org), 03.07.2026.</copyright-statement>
      <copyright-year>2026</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on https://www.ai.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://ai.jmir.org/2026/1/e88816" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>A discharge summary should be a clinical report that documents a patient’s hospital stay, including test results, diagnoses, management, and follow-up. Currently, discharge summaries are written by clinicians who manually locate pertinent information across the electronic health record, of which approximately 80% is free text. This process is time-consuming and may be suitable for automation using large language models.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study developed a template-based prompting system that can produce clinically acceptable discharge summaries, specifically the “clinical summary” and “plan and requested actions” sections, from routinely collected electronic patient records.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This study used electronic health record data from Imperial College Healthcare National Health Service Trust, a network of 5 hospitals in northwest London. It was conducted within the Imperial Secure Data Environment under Data Access and Ethics Committee approval. In total, 52 inpatient encounters were selected by the clinical team to ensure diversity in clinical specialty, reason for admission, complexity, length of stay, and sociodemographic characteristics; 83% (n=43) of the cases were allocated to the development dataset, and 17% (n=9) comprised the test dataset. The system synthesized clinical notes related to an inpatient hospital encounter and used structured template prompts with OpenAI’s generative pretrained transformer-4 to generate a discharge summary. The prompt was co-designed across 3 iterations. Resident physicians completed an evaluation form to assess the clinical acceptability of the generated summaries, including the primary outcome (global confidence rating) and secondary outcomes (accuracy, completeness, readability, formatting, sociodemographic bias, and potential clinical harm). Sensitivity analyses assessed the effect of length of stay and admission type (emergency department vs other and surgical vs other) on the primary outcome.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>A total of 52 patients (n=32, 62% female) were included, with a mean age of 44.8 (SD 27.1) years and an average length of stay of 15.2 (SD 21.1) days. In the test dataset, 89% (8/9) of generative pretrained transformer-4–generated summaries received a positive global confidence rating (“yes” or “yes, with minor changes”). Secondary outcomes were positive for the “clinical summary” section (8/9, 89% complete and 7/9, 78% accurate) and the “plan and requested actions” section (7/9, 78% complete and 7/9, 78% accurate). Readability, formatting, sociodemographic bias, and potential clinical harm also showed positive results in the test dataset. Sensitivity analyses showed no statistically significant variation in the primary outcome across length of stay or admission type (length of stay: <italic>P</italic>=.29; surgical admission: <italic>P</italic>=.99; emergency department admission: <italic>P</italic>=.15).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our results demonstrate the feasibility of the pipeline, but rigorous statistical evaluation in a larger, adequately powered sample is needed.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>artificial intelligence</kwd>
        <kwd>AI</kwd>
        <kwd>machine learning</kwd>
        <kwd>natural language processing</kwd>
        <kwd>large language models</kwd>
        <kwd>generative pretrained transformer</kwd>
        <kwd>GPT</kwd>
        <kwd>clinical documentation</kwd>
        <kwd>discharge summaries</kwd>
        <kwd>electronic health records</kwd>
        <kwd>clinical decision support</kwd>
        <kwd>health information management</kwd>
        <kwd>data security</kwd>
        <kwd>expert validation study</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The discharge summary is a critical clinical document that communicates a patient’s care in hospital, key clinical events, and plans for follow-up care. It typically includes patient demographics, hospital information, diagnoses, a clinical summary, medications, allergies, and pathology results. Currently, the responsibility for compiling the discharge summary often falls on a single clinician. This presents several challenges. First, inpatient care is often delivered by a multidisciplinary team of health care professionals [<xref ref-type="bibr" rid="ref1">1</xref>], making it difficult for any single clinician to have complete oversight of all relevant clinical events and decisions. Second, the required information is dispersed across multiple documents within the electronic health record (EHR), including admission clerking notes, ward round entries, operation notes, and results (eg, pathology and radiology), making the process of locating and synthesizing key data time-consuming [<xref ref-type="bibr" rid="ref2">2</xref>]. Third, EHR data are predominantly unstructured, with approximately 80% existing as free text [<xref ref-type="bibr" rid="ref3">3</xref>], further complicating retrieval and summary generation. Finally, the volume of EHR data is continually increasing [<xref ref-type="bibr" rid="ref4">4</xref>], intensifying the burden on clinicians tasked with producing accurate and comprehensive discharge summaries under time pressure.</p>
      <p>Generative artificial intelligence (AI) refers to a class of AI that can create new content such as text [<xref ref-type="bibr" rid="ref5">5</xref>] based on patterns learned from large datasets. A specific type of generative AI is the large language model (LLM) [<xref ref-type="bibr" rid="ref6">6</xref>], which is trained to understand and generate humanlike text. In a clinical setting, LLMs have the potential to streamline documentation, automate routine tasks, and support decision-making within EHRs. A commonly used LLM is the generative pretrained transformer (GPT), which was first released by OpenAI in June 2018 [<xref ref-type="bibr" rid="ref7">7</xref>] and has been continuously optimized into newer versions, including GPT-4, which we used in this study [<xref ref-type="bibr" rid="ref8">8</xref>]. GPT has demonstrated the ability to generate humanlike text and can be adapted for specific tasks such as producing discharge summaries [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref18">18</xref>].</p>
      <p>To leverage pretrained LLMs in clinical settings, prompt engineering has become increasingly central to research in developing clinical applications that minimize hallucination rate, preserve factual accuracy, and produce outputs for clinical use [<xref ref-type="bibr" rid="ref19">19</xref>]. Prompt engineering is the deliberate design of inputs to generative models to align the output with the intended clinical application without the need for retraining [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. A well-designed prompt provides structured and explicit cues to the model, facilitating accurate and efficient task completion. A range of different prompt patterns have been developed and applied in the context of text generation and summarization, and these techniques are increasingly being applied in clinical settings [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
      <p>The use of LLMs to automate discharge summary generation has been examined using both synthetic or publicly available datasets [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref17">17</xref>] and real-world EHR data [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref25">25</xref>]. Among studies using real hospital data, several imposed constraints on the clinical cohort. Li et al [<xref ref-type="bibr" rid="ref9">9</xref>] and Kim et al [<xref ref-type="bibr" rid="ref12">12</xref>] restricted their populations to specific patient conditions [<xref ref-type="bibr" rid="ref18">18</xref>] or to settings such as the emergency department (ED) [<xref ref-type="bibr" rid="ref11">11</xref>] or narrowed inclusion by limiting the length of stay [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Ganzinger et al [<xref ref-type="bibr" rid="ref23">23</xref>] relied on structured data that had to be manually extracted from semistructured and unstructured EHRs to construct the prompt context, underscoring the challenges of working with heterogenous clinical records.</p>
      <p>This study aimed to develop a template-based prompting system that can produce clinically acceptable discharge summaries, specifically the “clinical summary” and “plan and requested action” sections, from unstructured, routinely collected electronic patient records. The system was iteratively co-designed by data scientists, engineers, and physicians. The system retrieved free-text clinical notes related to an inpatient hospital encounter and used structured template prompts to guide OpenAI’s GPT-4 in synthesizing the clinical documentation and generating a discharge summary that conformed to the UK Royal College of Physicians’ (London) 2021 guidelines [<xref ref-type="bibr" rid="ref26">26</xref>] and National Health Service England’s information standard DAPB4042 [<xref ref-type="bibr" rid="ref27">27</xref>]. The clinical acceptability of the generated summaries was evaluated using an assessment form completed by a clinical team of 4 resident physicians and included the primary outcome (physicians’ global confidence rating) and secondary outcomes (accuracy and completeness, clinical significance of incorrect or missing content, formatting, readability, and sociodemographic bias).</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Setting and Data Source</title>
        <p>This study was undertaken using EHR data from Imperial College Healthcare National Health Service Trust (ICHT), a large network of 5 hospitals providing acute and specialist care in northwest London serving over 1.3 million patients annually. The data were securely hosted in the Imperial Secure Data Environment (SDE), refreshed weekly since April 1, 2023. The Imperial SDE is approved to host identifiable and deidentified clinical and biomedical data. The ICHT instances in the SDE securely host routinely captured clinical information from the care delivered at ICHT, which includes clinical narratives that cover the full clinical record and patient pathways of all ICHT patients. The SDE was provisioned with an OpenAI model end point hosted on Microsoft Azure, with private network access made available to the data scientist from within the SDE. Azure Prompt flow was used to orchestrate prompt engineering, input data processing, and model interaction. The flow was configured to use OpenAI’s GPT-4 model (gpt-4-1106), which had a 128,000-token context window and an 80,000 token per minute limit.</p>
        <p>All data used in this study were curated within the Imperial SDE and deidentified to remove any personal identifiable information, including any names and dates, before research access in line with the SDE processes. The ICHT data protection office and Caldicott guardian reviewed and approved the data and anonymization process. All infrastructure within the SDE was signed off by the ICHT security and data protection offices.</p>
      </sec>
      <sec>
        <title>Multidisciplinary Design Team Composition</title>
        <p>The design team comprised postgraduate year 1 and 2 (equivalent to UK Foundation Programme year 1 and 2) resident physicians at ICHT with routine, hands-on experience writing discharge summaries working alongside data scientists and data engineers to iteratively co-design the system.</p>
      </sec>
      <sec>
        <title>Service Evaluation</title>
        <p>A service evaluation was conducted (ICHT approval 906) where researchers and technical members of the team observed resident physicians completing discharge summaries. This provided insights into the types of documents accessed within the EHR; the order in which they were reviewed; the specific information extracted; and factors considered relevant when assessing a discharge summary, including accuracy and completeness. Documents that were identified as relevant for writing a discharge summary included admission clerking, ward round notes, specialty reviews, operation notes, and results.</p>
      </sec>
      <sec>
        <title>Sample Selection</title>
        <p>A total of 52 inpatient encounters, each corresponding to a unique patient, in which the resident physicians had provided direct care at ICHT between April 1, 2023, and December 5, 2023, were manually selected by the resident physicians (a unique patient encounter will be referred to as a “case” hereafter). The cases were chosen to ensure diversity in clinical specialty, reason for admission, complexity, length of stay, and sociodemographic characteristics. Additionally, cases were selected for which there was a discharge summary documented in the EHR.</p>
        <p>The 52 cases were first divided into 5 subsets, each containing a similar number of encounters and a comparable mean length of stay, which was used as a proxy for clinical case complexity. Four subsets (n=43, 83% of the encounters) were designated as the development dataset, whereas the fifth subset (n=9, 17% of the encounters) was held out as an independent test dataset. This test dataset was isolated prior to model development and was not accessed during prompt design, system development, or iterative testing. An infographic describing this process is shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>A.</p>
        <p>After iteration 1, each physician selected the single best-performing case from their assigned development subset to serve as a few-shot example. This resulted in 4 examples that were incorporated into the few-shot learning prompt used in iterations 2 and above (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for details). Consequently, for iteration 2 onward, the development dataset size was reduced from 43 to 39 cases.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Infographic summary of the methods. (A) 52 cases divided into 5 groups matched on mean length of stay. One group served as the left-out test set (pink); the remaining 4 formed the development set (blue), each assigned to one resident physician for evaluation in iterations 1 and 2. All 4 physicians reviewed test cases. (B) Flowchart showing how clinical documents and GPT–generated discharge summaries were used by physicians to complete the evaluation form for each case. (C) Context preprocessing pipeline (see Methods section “Prompt-Templated Pipeline Development”). (D) Prompt engineering pipeline: prompts from iterations 1 and 2 generated summaries on the development set; the iteration 3 prompt was applied to the test dataset. EHR: electronic health record; RAG: retrieval-augmented generation.</p>
          </caption>
          <graphic xlink:href="ai_v5i1e88816_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Prompt-Templated Pipeline Development (3 Iterations)</title>
        <sec>
          <title>Data Preparation</title>
          <p>An overview of the pipeline, evaluation process, and iterative development is provided in <xref rid="figure1" ref-type="fig">Figures 1</xref>B-1D. For each selected case, all clinical documents associated with the encounter ID were selected and preprocessed. Data preprocessing removed HTML tags, special characters, and irrelevant sequences of words and sentences (eg, “Report starts/ends here”). The Python libraries Pandas, Beautiful Soup, and RegEx were used to facilitate this process.</p>
          <p>After iteration 1, some clinical notes were excluded from the selection. The service evaluation concluded that certain clinical notes were identified as less directly informative for drafting a discharge summary and so were excluded from the knowledge base for all cases, with agreement from the 4 resident physicians. These included some notes labeled as nursing notes, board rounds, and other entries with specific tags or phrases. Additionally, the resident physicians and data scientists put together a list of all possible discharge summary–type documents, and this was used to remove all discharge summary–type documents from the corpus of clinical notes and hold them separately for use in the evaluation of the pipeline. For each patient encounter, cleaned clinical notes were put into the order in which they were created and then combined into one piece of text for use in the case-specific prompt.</p>
        </sec>
        <sec>
          <title>Data Chunking</title>
          <p>In some cases, the total number of tokens of the clinical notes combined with the prompt and output token limit exceeded the overall token limit for GPT-4, which is 128,000 tokens. For these documents, an additional Prompt flow program was used that divided the text into 5 smaller chunks, which overlapped by 1000 tokens. These chunks were then sequentially passed via a prompt to the GPT-4 model, where the prompt instructed it to remove duplicated pieces of information without changing the words or order of the original text. The output of these 5 separate prompts was then combined into a single document, and this document replaced the original document of all clinical notes for that case (<xref rid="figure1" ref-type="fig">Figure 1</xref>C).</p>
        </sec>
        <sec>
          <title>Indexing</title>
          <p>For each case, the resulting combined document was stored as a .txt file in an instance of Azure AI Search. To use the keyword search functionality at inference, each document had the patient identifier number entered at the start of the document. At inference, this patient identifier was used to search the index for the clinical notes relevant to that patient encounter.</p>
        </sec>
        <sec>
          <title>Prompt Engineering and Model Parameters</title>
          <p>The resident physicians and the data scientist co-designed the prompt. Using the development dataset, the system message and different prompting patterns were explored to understand how they affected the generated text, as outlined in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        </sec>
        <sec>
          <title>Content Moderation</title>
          <p>The default content moderation system applied to GPT-4 end points was blocking prompts containing language classified as violent, which led to refusals during inference. To resolve this, a custom content moderation configuration was deployed on the ICHT model end points.</p>
        </sec>
        <sec>
          <title>Evaluation During Development</title>
          <p>Each of the 4 subsets of cases from the development set was randomly assigned to a resident physician for evaluation during iterations 1 and 2. For each case, the physicians were provided with the clinical notes that had been used as inputs to the prompts and the GPT-generated summary.</p>
          <p>For each case, the physician completed an evaluation form in a Microsoft Excel sheet. The evaluation form covered a set of questions, including the primary and secondary outcomes. The primary outcome was defined as the physician’s global confidence rating (“Based on the quality of the discharge summary, would you be willing to sign your name to it?” Response options: “yes,” “no,” and “yes, with minor changes”). The secondary outcomes included accuracy and completeness, clinical significance of incorrect or missing content, formatting, readability, and sociodemographic bias (see Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for further details).</p>
        </sec>
      </sec>
      <sec>
        <title>Evaluation (Test Dataset)</title>
        <p>The final prompt developed from iteration 3 was used to generate discharge summaries for the 9 hold-out cases in the test dataset. These cases were randomly assigned to the resident physicians, with each physician reviewing at least 2 cases and completing an evaluation form for them.</p>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>Due to the limited sample size (N=52) and power, statistical comparisons across iterations were not computed. Instead, the results from the evaluation form in iterations 1 and 2 (development dataset) and the final evaluation (test dataset) are visualized as stacked bar charts. The frequency of binary and categorical responses for the primary and secondary outcomes is reported.</p>
      </sec>
      <sec>
        <title>Sensitivity Analysis</title>
        <p>To examine whether length of stay (measured in days as a continuous variable), emergency department admission (yes or no), or surgical admission (yes or no) were associated with the primary outcome of global confidence rating (categorical: “yes,” “no,” and “yes, with minor changes”), we ran 3 separate Fisher-Freeman-Halton exact tests in R version 4.4.1. Statistical significance was assessed at the .05 level. For the sensitivity analysis, data from the development dataset (iteration 2) and test dataset (final evaluation) were pooled to maximize statistical power.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study was undertaken within the Imperial SDE and received approval from the Data Access and Ethics Committee. The Imperial Clinical Analytics Research and Evaluation (iCARE) research database was given favorable ethics approval by the South West – Central Bristol Research Ethics Committee (reference 21/SW/0120; Integrated Research Application System project ID 282093). All data used in this paper were effectively anonymized before analysis. The systems and processes were reviewed and approved by the ICHT security and data protection offices.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Patient Sample</title>
        <p>A total of 52 patients were included in this study (<xref ref-type="table" rid="table1">Table 1</xref>), with a mean age of 44.8 (SD 27.1) years at admission. In total, 62% (n=32) of the sample were female, with the most common ethnicity being other (n=21, 40%), followed by White—British or Irish (n=12, 23%), White—other (n=8, 15%), Black (n=6, 12%), Asian (n&#60;5, &#60;10%), and mixed (n&#60;5, &#60;10%). The average length of hospital stay was 15.2 (SD 21.1) days. Most patients (n=38, 73%) were admitted via the ED, with a smaller proportion (n=15, 29%) being admitted via the surgical department. The distribution of patients' <italic>International Classification of Diseases</italic> diagnoses and the specialties they received care from during their hospital encounter are shown in Tables S2 and S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Summary of the sociodemographic and clinical variables for the total sample (N=52).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="700"/>
            <col width="0"/>
            <col width="270"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Characteristic</td>
                <td>Values</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <bold>Age at admission (years), mean (SD)</bold>
                </td>
                <td>44.8 (27.1)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Missing</td>
                <td colspan="2">0 (0)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Sex assigned at birth, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td colspan="2">32 (62)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td colspan="2">20 (38)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Intersex</td>
                <td colspan="2">0 (0)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Missing</td>
                <td colspan="2">0 (0)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Ethnicity, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Asian—any other Asian background</td>
                <td colspan="2">&#60;5 (&#60;10)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Black or Black British—African or Caribbean</td>
                <td colspan="2">6 (12)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Mixed—any other mixed background</td>
                <td colspan="2">&#60;5 (&#60;10)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>White—British or Irish</td>
                <td colspan="2">12 (23)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>White—any other White background</td>
                <td colspan="2">8 (15)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Other</td>
                <td colspan="2">21 (40)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Missing</td>
                <td colspan="2">0 (0)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Length of stay (days), mean (SD)</bold>
                </td>
                <td>15.2 (21.1)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>ED<sup>a</sup> admission, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Yes</td>
                <td colspan="2">38 (73)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No</td>
                <td colspan="2">14 (27)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Missing</td>
                <td colspan="2">0 (0)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Surgery admission, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Yes</td>
                <td colspan="2">15 (29)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No</td>
                <td colspan="2">37 (71)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Missing</td>
                <td colspan="2">0 (0)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>ED: emergency department.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Primary Outcome</title>
        <p>The primary outcome, global confidence rating, across iterations (development dataset) and the final evaluation (test dataset) is shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>, with the confidence interval (CI) for the proportions shown in <xref ref-type="table" rid="table2">Table 2</xref>. A greater proportion of GPT-generated discharge summaries received a “yes” or “yes, with minor changes” sign-off from resident physician reviewers than received a “no.” The proportion of negative responses (“no”) decreased from iteration 1 to iteration 2 (20/43, 47% to 7/39, 18%), suggesting that the prompt modifications between iterations 1 and 2 may have improved the quality of the generated summaries. However, it is important to note that the option “yes, with minor changes” was introduced only in iteration 2 alongside the original binary responses (“yes” or “no”). Moreover, evaluation on a held-out test dataset (with no data leakage) revealed a similar pattern, with a minority of GPT-generated discharge summaries (1/9, 11%) declined sign-off by the reviewer.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Stacked bar chart showing the primary outcome (global confidence rating) across iterations (development dataset) and the final evaluation (test dataset). Green indicates “yes,” light orange indicates “yes, with minor changes,” and dark orange indicates “no.” The possible answers to the global confidence rating question in iteration 1 were “yes” and “no.” The option “yes, with minor changes” was introduced in iteration 2 and testing due to an understanding that a discharge summary tool in practice would allow for minor edits.</p>
          </caption>
          <graphic xlink:href="ai_v5i1e88816_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Proportion of observations for the primary outcome (global confidence rating) across iterations 1 and 2 (development dataset) and the final evaluation (test dataset).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="240"/>
            <col width="260"/>
            <col width="260"/>
            <col width="240"/>
            <thead>
              <tr valign="top">
                <td>Would you sign your name to this discharge summary?</td>
                <td>Iteration 1 (n=43 observations), n (%; 95% CI)</td>
                <td>Iteration 2 (n=39 observations), n (%; 95% CI)</td>
                <td>Test (n=9 observations), n (%; 95% CI)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>No</td>
                <td>20 (47; 33-61)</td>
                <td>7 (18; 9-33)</td>
                <td>1 (11; 2-43)</td>
              </tr>
              <tr valign="top">
                <td>Yes, with minor changes</td>
                <td>N/A<sup>a</sup></td>
                <td>11 (28; 17-44)</td>
                <td>5 (56; 27-81)</td>
              </tr>
              <tr valign="top">
                <td>Yes</td>
                <td>23 (53; 39-67)</td>
                <td>21 (54; 39-68)</td>
                <td>3 (33; 12-65)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Sensitivity Analyses</title>
        <p>No statistically significant association was observed between global confidence rating (“Yes”, “Yes with minor changes”, “No”) and any of these variables (length of stay: <italic>P</italic>=.29; surgical admission: <italic>P</italic>=.99; emergency department admission: <italic>P</italic>=.15). These findings suggest no statistically significant variation in global confidence ratings across these patient variables (Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
      </sec>
      <sec>
        <title>Secondary Outcomes</title>
        <p>Accuracy and completeness of the (1) “clinical summary” and (2) “plan and requested actions” sections of the GPT-generated discharge summaries are shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>. For the “clinical summary” section, most responses across iterations 1 and 2 were positive (39/43, 91% and 37/39, 95% complete; 38/42, 90% and 35/37, 95% accurate). For the “plan and requested actions” section, most responses were also positive across development iterations (33/42, 79% and 31/39, 79% complete; 34/37, 92% and 29/39, 74% accurate). In the test dataset, ratings were consistent with those of iteration 2, with positive results for the “clinical summary” section (8/9, 89% complete and 7/9, 78% accurate) and the “plan and requested actions” section (7/9, 78% complete and 7/9, 78% accurate).</p>
        <p>Other secondary outcomes included the clinical significance of inaccurate or missing information, sociodemographic bias, formatting issues, and readability (<xref rid="figure4" ref-type="fig">Figure 4</xref>).</p>
        <p>For the clinical significance of inaccurate or missing information, there was a slight improvement from iteration 1 to 2, with a reduced proportion of ratings of high or moderate risk. In the test dataset, no summaries were rated as having a high risk, and most responses were rated as having no or minor risk (8/9, 89%). For sociodemographic bias, most responses across iterations 1 and 2 were positive (40/43, 93% and 36/39, 92% reported no bias); in the test dataset, this dropped to 78% (7/9). For formatting issues, most responses across both development iterations were positive (25/25, 100% and 30/35, 86% no formatting issues), with the same reflected in the test dataset (7/8, 87%). For readability, there was a considerable improvement from iteration 1 to 2 (19/43, 44% to 31/38,79%), with all responses being “clear and concise” in the test dataset.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Stacked bar chart showing the accuracy and completeness of the (1) “clinical summary” and (2) “plan and requested actions” sections across iterations (development dataset) and the final evaluation (test dataset). Green indicates “yes,” light orange indicates “partially”—an option removed from iteration 2—and dark orange indicates “no.” The lighter colors correspond to the test dataset.</p>
          </caption>
          <graphic xlink:href="ai_v5i1e88816_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Stacked bar charts showing formatting issues (A), the clinical significance of inaccurate or missing information (B), sociodemographic bias (C), and readability (D) across 2 development iterations and the final evaluation (test dataset).</p>
          </caption>
          <graphic xlink:href="ai_v5i1e88816_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>We present the development of a prompt-templated pipeline for generating discharge summaries using OpenAI’s GPT-4 model [<xref ref-type="bibr" rid="ref8">8</xref>] from routinely collected patient EHRs. From our prompt engineering iterative process, our final prompt used examples of high-quality input-output pairs for few-shot learning. The utility and clinical safety of the generated discharge summaries was evaluated by resident physicians. Our results demonstrate the potential effectiveness of this system, with evaluations by resident physicians showing a majority (8/9, 89%) of positive ratings on the primary outcome (global confidence rating), as well as on secondary outcomes (readability, formatting, sociodemographic bias, and potential clinical harm). However, rigorous statistical evaluation in a larger, adequately powered sample is needed [<xref ref-type="bibr" rid="ref23">23</xref>]. As discussed below, this study presents a series of innovations relative to the work previously done in this field.</p>
      <p>First, by harnessing the Imperial SDE [<xref ref-type="bibr" rid="ref28">28</xref>], the data used to generate discharge summaries in this study came from unstructured, real-world EHRs. This constitutes a significant advancement in the field, which has largely been limited to synthetic or publicly available datasets or structured data extracted from the EHR [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref17">17</xref>]. Moreover, the prompt was designed and refined by a multidisciplinary team of resident physicians, data engineers, and data scientists through an iterative approach informed by the Royal College of Physicians discharge summary guidelines [<xref ref-type="bibr" rid="ref27">27</xref>]. The physicians’ domain expertise and active involvement ensured that the system was not only performant but also aligned with the practical realities of clinical workflows.</p>
      <p>Second, although the final evaluation was limited to a small sample (n=9), we found potentially promising results for the clinical acceptability of the generated outputs, aligning with previous research [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. For example, 100% (9/9) of our discharge summaries were rated as clear and concise relative to lower ratings in previous studies (eg, Cai et al [<xref ref-type="bibr" rid="ref29">29</xref>] observed that only 39% of the summaries obtained a full score on readability). The high readability score could be attributed to aspects of our pipeline, such as the low value of the top_p parameter, which reduces the use of excessive text decorations and ensures that the summary remains concise and focused on the relevant medical information. Moreover, the prompt specified that medical acronyms in the text should be translated to their expanded term following the Unified Medical Language System, which could reduce the ambiguity of medical acronyms [<xref ref-type="bibr" rid="ref30">30</xref>].</p>
      <p>Third, in addition to the prompt design, we showed that the selection of input data for the prompt template is important and can affect the quality of the discharge summary output. This was previously shown by Hartman et al [<xref ref-type="bibr" rid="ref31">31</xref>], where progress notes and consults were the primary source of content selected for generating a summary of a patient’s hospital stay, with other documents being removed. We carried out a service evaluation with clinical experts to identify the key inputs needed for effective discharge summary generation. These included admission clerking, ward round notes, specialty reviews, operation notes, and diagnostic results (eg, pathology and radiology). Due to a proportion of the clinical notes in the EHR being of unknown type, such as blank notes (a text file without any preformatting or title), clinical notes were filtered by excluding those deemed irrelevant for drafting a discharge summary, such as nursing notes and board round notes. By using this cleaner set of input documents in iteration 2, improvements were observed in the discharge summary evaluation for the primary outcome compared to iteration 1 (28% more responses were positive for the global confidence rating), as well as some secondary outcomes (eg, the “clinical summary” section improved by 4% on completeness and 5% on accuracy, and readability improved by 37%). However, it is worth noting that these improvements may also be attributed to any changes to the prompt across iterations.</p>
      <p>This study presents several avenues for future research. First, the evaluation of LLM-generated outputs can be further expanded. For example, for a proportion of cases in both iteration 2 and the test dataset (9/37, 24%; 3/9, 33%; <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), resident physicians stated that they needed to consult with the team that treated the patient due to insufficient data in the clinical notes. This corresponds with the work by Ando et al [<xref ref-type="bibr" rid="ref24">24</xref>], who found that 39% of the discharge summary information was sourced externally to the EHR, with up to 11% derived from the clinician’s memory. To further understand the need for a human in the loop, additional metrics such as gap distance could be introduced in the evaluation form, as suggested by Cai et al [<xref ref-type="bibr" rid="ref29">29</xref>]. In addition to qualitative evaluation, quantitative methods such as the Recall-Oriented Understudy for Gisting Evaluation score [<xref ref-type="bibr" rid="ref32">32</xref>], bidirectional encoder representations from transformers score [<xref ref-type="bibr" rid="ref33">33</xref>], and Flesch-Kincaid readability ease [<xref ref-type="bibr" rid="ref34">34</xref>] provide an approach for standardized evaluation of LLM-generated outputs.</p>
      <p>To move toward the safe implementation of our pipeline into a real-world clinical setting, several challenges, such as model bias and fairness [<xref ref-type="bibr" rid="ref35">35</xref>], would need to be addressed, as outlined in previous studies [<xref ref-type="bibr" rid="ref36">36</xref>]. Moreover, a better understanding of the clinical scenarios where the tool could be implemented successfully is required. Case complexity often results in a higher number of documents, which poses a challenge due to the limited effective context window of LLMs [<xref ref-type="bibr" rid="ref37">37</xref>]. To address this, future research should explore methods to improve the selective preprocessing of input data. Research could also stratify cases by factors indicative of case complexity, such as treatment specialty, care transitions, or length of stay. We explored some of these in our sensitivity analysis, but a larger sample would be necessary to inform data-driven clinical guidelines.</p>
      <p>Safe implementation in practice would also require careful attention to data privacy, transparency, and model governance. In this study, data were deidentified, and the model was deployed through Azure OpenAI within a managed virtual network inside the Imperial SDE, providing network isolation and controlled access. Prompts and outputs were processed only for inference and were not used to train the underlying foundation models. If implemented in routine clinical practice, similar technical safeguards would need to be complemented by transparent documentation and clear governance around proprietary model updates. As proprietary LLMs such as the GPT series are typically subject to vendor-driven version changes, mechanisms for version control, auditability, and ongoing performance monitoring would be necessary to ensure that model updates or changes in clinical data environments do not adversely affect reliability, reproducibility, or patient safety.</p>
      <p>There are several limitations worth considering. First, missing data present a challenge for LLMs [<xref ref-type="bibr" rid="ref24">24</xref>]. Task completion is rarely documented by physicians in EHRs, leaving the model to infer which actions require follow-up in discharge plans, often resulting in inaccuracies. Similarly, some remote specialist or senior input, such as phone consultations, is frequently underdocumented or even omitted. Moreover, due to anonymization preprocessing in our study, key details such as time stamps and locations were removed. This hindered the model’s ability to sequence clinical events chronologically. However, this issue would be mitigated in a real-world clinical setting, where the data are not anonymized. Second, different specialties often use inconsistent formats, and plans can sometimes conflict, such as between renal and cardiology teams. In some cases, the model struggled to generate unified discharge plans. Moreover, medical abbreviations can be ambiguous, and their meaning may vary based on context and specialty (eg, “PT” could refer to physiotherapy, prothrombin time, or patient). Directly integrating the model with a terminology or abbreviation database such as the Unified Medical Language System could help improve its interpretative accuracy. Third, given that general practitioners and patients are the key readers of discharge summaries, their evaluation of the GPT-generated summaries would also be valuable. Fourth, the small sample of discharge summaries limits our ability to formally assess the clinical acceptability of the generated summaries or potential sources of bias. In addition, cases were manually selected by clinicians who had directly cared for the patients; therefore, the sample was not representative of the broader patient population and may be subject to selection bias. For the purposes of this study, which primarily aimed to codevelop an effective prompt, resident physicians were asked to select cases with a range of lengths of stay to capture varying levels of clinical complexity. In addition, interrater reliability was not assessed because each case was evaluated by a single physician. Future work should evaluate a larger set of generated discharge summaries across a representative sample and include assessment of interrater reliability and whether specific patient, clinical, or contextual factors systematically influence the quality. However, this would be very resource intensive due to the time-consuming aspect of discharge summary evaluation. For example, in our study, clinician evaluation took approximately 10 minutes per case. As proposed by Gero et al [<xref ref-type="bibr" rid="ref38">38</xref>], LLM self-verification tools could be used to show the source of the input data used in the discharge summary generation and, thus, shorten the time required for physicians to review the input notes.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Prompt engineering methods and model parameters, the evaluation assessment form, sample International Classification of Diseases, Tenth Revision diagnoses and hospital specialties for the cohort, proportions and 95% CIs for secondary outcomes across iterations, and a figure of primary outcomes stratified by length of stay, surgical admission, and accident and emergency admission.</p>
        <media xlink:href="ai_v5i1e88816_app1.docx" xlink:title="DOCX File , 180 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CI</term>
          <def>
            <p>Confidence Interval</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">ED</term>
          <def>
            <p>emergency department</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">GPT</term>
          <def>
            <p>generative pretrained transformer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ICHT</term>
          <def>
            <p>Imperial College Healthcare National Health Service Trust</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">SDE</term>
          <def>
            <p>Secure Data Environment</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>Machine tools, including Claude, were used to suggest language improvements within the manuscript.</p>
    </ack>
    <notes>
      <title>Data Availability</title>
      <p>The datasets generated or analyzed during this study are not publicly available due to the data being held in the Imperial Secure Data Environment (SDE) and therefore being subject to information governance and data protection restrictions. It may be made available from the corresponding author on reasonable request.</p>
    </notes>
    <notes>
      <title>Funding</title>
      <p>This research was undertaken within the Imperial Secure Data Environment (SDE) and used the Imperial Clinical Analytics, Research and Evaluation (iCARE) team and data resources. ICARE receives funding from the National Institute of Health Research (NIHR) Imperial Biomedical Research Centre (NIHR203323), based at Imperial College Healthcare NHS Trust and Imperial College London. The views expressed are those of the authors and not necessarily those of the National Health Service, the National Institute for Health and Care Research, or the Department of Health and Social Care.</p>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>CC, KG, and MA had full access to all the data in the study and take responsibility for the integrity of the data and the accuracy of the data analysis. CC, BG, TO, RL, and EM contributed to concept and design. CC, BA, IK, HLC, AR, and AMR contributed to acquisition of data. CC, KG, and MA contributed to analysis and interpretation of the data. CC, KG, MA, AMR, and EM contributed to drafting the manuscript. All authors contributed to critical revision of the manuscript for important intellectual content. YSA contributed to administrative, technical, or material support. BG, TO, and EM contributed to supervision.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dawe</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cronshaw</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Frerk</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Learning from the multidisciplinary team: advancing patient care through collaboration</article-title>
          <source>Br J Hosp Med (Lond)</source>
          <year>2024</year>
          <month>05</month>
          <day>30</day>
          <volume>85</volume>
          <issue>5</issue>
          <fpage>1</fpage>
          <lpage>4</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.imrpress.com/journal/BJHM/85/5/10.12968/hmed.2023.0387"/>
          </comment>
          <pub-id pub-id-type="doi">10.12968/hmed.2023.0387</pub-id>
          <pub-id pub-id-type="medline">38815972</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>IK</given-names>
            </name>
            <name name-style="western">
              <surname>Tung</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Seet</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yow</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>KL</given-names>
            </name>
            <name name-style="western">
              <surname>Teo</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Chua</surname>
              <given-names>CE</given-names>
            </name>
          </person-group>
          <article-title>How to write a good discharge summary: a primer for junior physicians</article-title>
          <source>Postgrad Med J</source>
          <year>2025</year>
          <month>07</month>
          <day>22</day>
          <volume>101</volume>
          <issue>1198</issue>
          <fpage>764</fpage>
          <lpage>72</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://academic.oup.com/pmj/article-lookup/doi/10.1093/postmj/qgaf020"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/postmj/qgaf020</pub-id>
          <pub-id pub-id-type="medline">39957465</pub-id>
          <pub-id pub-id-type="pii">8016499</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Murdoch</surname>
              <given-names>TB</given-names>
            </name>
            <name name-style="western">
              <surname>Detsky</surname>
              <given-names>AS</given-names>
            </name>
          </person-group>
          <article-title>The inevitable application of big data to health care</article-title>
          <source>JAMA</source>
          <year>2013</year>
          <month>04</month>
          <day>03</day>
          <volume>309</volume>
          <issue>13</issue>
          <fpage>1351</fpage>
          <lpage>2</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2013.393</pub-id>
          <pub-id pub-id-type="medline">23549579</pub-id>
          <pub-id pub-id-type="pii">1674245</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <article-title>A plan for digital health and social care</article-title>
          <source>Department of Health and Social Care United Kingdom Government</source>
          <year>2022</year>
          <month>6</month>
          <day>29</day>
          <access-date>2025-05-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.gov.uk/government/publications/a-plan-for-digital-health-and-social-care/a-plan-for-digital-health-and-social-care">https://www.gov.uk/government/publications/a-plan-for-digital-health-and-social-care/a-plan-for-digital-health-and-social-care</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reddy</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Generative AI in healthcare: an implementation science informed translational path on application, integration and governance</article-title>
          <source>Implement Sci</source>
          <year>2024</year>
          <month>03</month>
          <day>15</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>27</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://implementationscience.biomedcentral.com/articles/10.1186/s13012-024-01357-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13012-024-01357-9</pub-id>
          <pub-id pub-id-type="medline">38491544</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13012-024-01357-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC10941464</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Uszkoreit</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Polosukhin</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Attention is all you need</article-title>
          <source>arXiv. Preprint posted online on June 12, 2017</source>
          <pub-id pub-id-type="doi">10.48550/arXiv.1706.03762</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Radford</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Narasimhan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Salimans</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Improving language understanding by generative pre-training</article-title>
          <source>OpenAI</source>
          <year>2018</year>
          <access-date>2026-06-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf">https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <article-title>Introducing GPT‑4.1 in the API</article-title>
          <source>OpenAI</source>
          <year>2025</year>
          <month>4</month>
          <day>14</day>
          <access-date>2025-05-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openai.com/index/gpt-4-1/">https://openai.com/index/gpt-4-1/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A comparative study of recent large language models on generating hospital discharge summaries for lung cancer patients</article-title>
          <source>J Biomed Inform</source>
          <year>2025</year>
          <month>08</month>
          <volume>168</volume>
          <fpage>104867</fpage>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2025.104867</pub-id>
          <pub-id pub-id-type="medline">40544901</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(25)00096-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Small</surname>
              <given-names>WR</given-names>
            </name>
            <name name-style="western">
              <surname>Austrian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>O'Donnell</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Burk-Rafel</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hochman</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Goodman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zaretsky</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Major</surname>
              <given-names>VJ</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Henke</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Verplanke</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Osso</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Saxena</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mednick</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Simonis</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kesari</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Heery</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Desel</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Baskharoun</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Figman</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Farooq</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Jahan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Testa</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Feldman</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Evaluating hospital course summarization by an electronic health record-based large language model</article-title>
          <source>JAMA Netw Open</source>
          <year>2025</year>
          <month>08</month>
          <day>01</day>
          <volume>8</volume>
          <issue>8</issue>
          <fpage>e2526339</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jamanetwork.com/journals/jamanetworkopen/fullarticle/10.1001/jamanetworkopen.2025.26339"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2025.26339</pub-id>
          <pub-id pub-id-type="medline">40802185</pub-id>
          <pub-id pub-id-type="pii">2837483</pub-id>
          <pub-id pub-id-type="pmcid">PMC12351420</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>CY</given-names>
            </name>
            <name name-style="western">
              <surname>Bains</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lucas</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Miao</surname>
              <given-names>BY</given-names>
            </name>
            <name name-style="western">
              <surname>Butte</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kornblith</surname>
              <given-names>AE</given-names>
            </name>
          </person-group>
          <article-title>Evaluating large language models for drafting emergency department encounter summaries</article-title>
          <source>PLOS Digit Health</source>
          <year>2025</year>
          <month>06</month>
          <day>17</day>
          <volume>4</volume>
          <issue>6</issue>
          <fpage>e0000899</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pdig.0000899"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pdig.0000899</pub-id>
          <pub-id pub-id-type="medline">40526634</pub-id>
          <pub-id pub-id-type="pii">PDIG-D-24-00460</pub-id>
          <pub-id pub-id-type="pmcid">PMC12173386</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>HM</given-names>
            </name>
            <name name-style="western">
              <surname>Jung</surname>
              <given-names>YB</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>SC</given-names>
            </name>
          </person-group>
          <article-title>Patient-friendly discharge summaries in Korea based on ChatGPT: software development and validation</article-title>
          <source>J Korean Med Sci</source>
          <year>2024</year>
          <month>04</month>
          <day>29</day>
          <volume>39</volume>
          <issue>16</issue>
          <fpage>e148</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jkms.org/DOIx.php?id=10.3346/jkms.2024.39.e148"/>
          </comment>
          <pub-id pub-id-type="doi">10.3346/jkms.2024.39.e148</pub-id>
          <pub-id pub-id-type="medline">38685890</pub-id>
          <pub-id pub-id-type="pii">39.e148</pub-id>
          <pub-id pub-id-type="pmcid">PMC11058343</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>CY</given-names>
            </name>
            <name name-style="western">
              <surname>Subramanian</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Apolinario</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Askin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Barish</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Deardorff</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Donthi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ganeshan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Kantor</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Manchanda</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Muniyappa</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Nair</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>PP</given-names>
            </name>
            <name name-style="western">
              <surname>Santhosh</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Schneider</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Torres</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yukawa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hubbard</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Rosner</surname>
              <given-names>BI</given-names>
            </name>
          </person-group>
          <article-title>Physician- and large language model-generated hospital discharge summaries</article-title>
          <source>JAMA Intern Med</source>
          <year>2025</year>
          <month>07</month>
          <day>01</day>
          <volume>185</volume>
          <issue>7</issue>
          <fpage>818</fpage>
          <lpage>25</lpage>
          <pub-id pub-id-type="doi">10.1001/jamainternmed.2025.0821</pub-id>
          <pub-id pub-id-type="medline">40323616</pub-id>
          <pub-id pub-id-type="pii">2833228</pub-id>
          <pub-id pub-id-type="pmcid">PMC12053800</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ellershaw</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tomlinson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Burton</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Frost</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hanrahan</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>DZ</given-names>
            </name>
            <name name-style="western">
              <surname>Horsfall</surname>
              <given-names>HL</given-names>
            </name>
            <name name-style="western">
              <surname>Little</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Malgapo</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Starup-Hansen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Vella-Baldacchino</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Noor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Dobson</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Automated generation of hospital discharge summaries using clinical guidelines and large language models</article-title>
          <source>Proceedings of the AAAI 2024 Spring Symposium on Clinical Foundation Models</source>
          <year>2024</year>
          <conf-name>AAAI Spring Symposium 2024</conf-name>
          <conf-date>March 25-27, 2024</conf-date>
          <conf-loc>Stanford, CA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openreview.net/forum?id=1kDJJPppRG"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sánchez-Rosenberg</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Magnéli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Barle</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kontakis</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Müller</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Wittauer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gordon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Brodén</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT-4 generates orthopedic discharge documents faster than humans maintaining comparable quality: a pilot study of 6 cases</article-title>
          <source>Acta Orthop</source>
          <year>2024</year>
          <month>03</month>
          <day>21</day>
          <volume>95</volume>
          <fpage>152</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.2340/17453674.2024.40182"/>
          </comment>
          <pub-id pub-id-type="doi">10.2340/17453674.2024.40182</pub-id>
          <pub-id pub-id-type="medline">38597205</pub-id>
          <pub-id pub-id-type="pmcid">PMC10959013</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gimeno</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Krause</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>D'Souza</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Walsh</surname>
              <given-names>CG</given-names>
            </name>
          </person-group>
          <article-title>Completeness and readability of GPT-4-generated multilingual discharge instructions in the pediatric emergency department</article-title>
          <source>JAMIA Open</source>
          <year>2024</year>
          <month>7</month>
          <day>1</day>
          <volume>7</volume>
          <issue>3</issue>
          <fpage>ooae050</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://academic.oup.com/jamiaopen/article-lookup/doi/10.1093/jamiaopen/ooae050"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamiaopen/ooae050</pub-id>
          <pub-id pub-id-type="medline">38957592</pub-id>
          <pub-id pub-id-type="pii">ooae050</pub-id>
          <pub-id pub-id-type="pmcid">PMC11216721</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Dobbs</surname>
              <given-names>TD</given-names>
            </name>
            <name name-style="western">
              <surname>Hutchings</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Whitaker</surname>
              <given-names>IS</given-names>
            </name>
          </person-group>
          <article-title>Using ChatGPT to write patient clinic letters</article-title>
          <source>Lancet Digit Health</source>
          <year>2023</year>
          <month>04</month>
          <volume>5</volume>
          <issue>4</issue>
          <fpage>e179</fpage>
          <lpage>81</lpage>
          <pub-id pub-id-type="doi">10.1016/s2589-7500(23)00048-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schwieger</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Angst</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>de Bardeci</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Burrer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cathomas</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrea</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Grätz</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Knorr</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kronenberg</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Spiller</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Troi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Seifritz</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Weber</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Olbrich</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Large language models can support generation of standardized discharge summaries - a retrospective study utilizing ChatGPT-4 and electronic health records</article-title>
          <source>Int J Med Inform</source>
          <year>2024</year>
          <month>12</month>
          <volume>192</volume>
          <fpage>105654</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1386-5056(24)00317-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2024.105654</pub-id>
          <pub-id pub-id-type="medline">39437512</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(24)00317-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meskó</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Prompt engineering as an important emerging skill for medical professionals: tutorial</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <month>10</month>
          <day>04</day>
          <volume>25</volume>
          <fpage>e50638</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e50638/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/50638</pub-id>
          <pub-id pub-id-type="medline">37792434</pub-id>
          <pub-id pub-id-type="pii">v25i1e50638</pub-id>
          <pub-id pub-id-type="pmcid">PMC10585440</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marvin</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Hellen</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Jjingo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Nakatumba-Nabende</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Jacob</surname>
              <given-names>IJ</given-names>
            </name>
            <name name-style="western">
              <surname>Piramuthu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Falkowski-Gilski</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Prompt engineering in large language models</article-title>
          <source>Data Intelligence and Cognitive Informatics</source>
          <year>2023</year>
          <publisher-loc>Singapore, Singapore</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mohati</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nayebi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hemmati</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Prompt engineering or fine-tuning: an empirical assessment of LLMs for code</article-title>
          <source>arXiv. Preprint posted online on October 11, 2023</source>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2310.10508"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/msr66628.2025.00082</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>White</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Hays</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sandborn</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Olea</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gilbert</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Elnashar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Spencer-Smith</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>DC</given-names>
            </name>
          </person-group>
          <article-title>A prompt pattern catalog to enhance prompt engineering with ChatGPT</article-title>
          <source>arXiv. Preprint posted online on February 21, 2023</source>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ui.adsabs.harvard.edu/abs/2023arXiv230211382W"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2302.11382</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ganzinger</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kunz</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Fuchs</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lyu</surname>
              <given-names>CK</given-names>
            </name>
            <name name-style="western">
              <surname>Loos</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dugas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pausch</surname>
              <given-names>TM</given-names>
            </name>
          </person-group>
          <article-title>Automated generation of discharge summaries: leveraging large language models with clinical data</article-title>
          <source>Sci Rep</source>
          <year>2025</year>
          <month>05</month>
          <day>12</day>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>16466</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-025-01618-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-025-01618-7</pub-id>
          <pub-id pub-id-type="medline">40355506</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-025-01618-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC12069548</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ando</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Okumura</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Komachi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Horiguchi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Matsumoto</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Is artificial intelligence capable of generating hospital discharge summaries from inpatient records?</article-title>
          <source>PLOS Digit Health</source>
          <year>2022</year>
          <month>12</month>
          <day>12</day>
          <volume>1</volume>
          <issue>12</issue>
          <fpage>e0000158</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pdig.0000158"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pdig.0000158</pub-id>
          <pub-id pub-id-type="medline">36812600</pub-id>
          <pub-id pub-id-type="pii">PDIG-D-22-00138</pub-id>
          <pub-id pub-id-type="pmcid">PMC9931331</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Song</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>SC</given-names>
            </name>
          </person-group>
          <article-title>Large language model assistant for emergency department discharge documentation</article-title>
          <source>JAMA Netw Open</source>
          <year>2025</year>
          <month>10</month>
          <day>01</day>
          <volume>8</volume>
          <issue>10</issue>
          <fpage>e2538427</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jamanetwork.com/journals/jamanetworkopen/fullarticle/10.1001/jamanetworkopen.2025.38427"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2025.38427</pub-id>
          <pub-id pub-id-type="medline">41118162</pub-id>
          <pub-id pub-id-type="pii">2840377</pub-id>
          <pub-id pub-id-type="pmcid">PMC12541540</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <article-title>Transfer of care - acute inpatient discharge standard</article-title>
          <source>National Health Service England</source>
          <year>2022</year>
          <access-date>2026-06-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://standards.nhs.uk/published-standards/transfer-of-care-acute-inpatient-discharge-standard">https://standards.nhs.uk/published-standards/transfer-of-care-acute-inpatient-discharge-standard</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <article-title>DAPB4042: transfer of care – acute inpatient discharge standard</article-title>
          <source>National Health Service England</source>
          <access-date>2025-07-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://digital.nhs.uk/data-and-information/information-standards/governance/latest-activity/standards-and-collections/dapb4042-transfer-of-care-acute-inpatient-discharge">https://digital.nhs.uk/data-and-information/information-standards/governance/latest-activity/standards-and-collections/dapb4042-transfer-of-care-acute-inpatient-discharge</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <article-title>iCARE</article-title>
          <source>Imperial College London</source>
          <access-date>2025-07-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.imperial.ac.uk/medicine/research-and-impact/groups/icare/">https://www.imperial.ac.uk/medicine/research-and-impact/groups/icare/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Bajracharya</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sills</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kapoor</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Berlowitz</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Pradhan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Generation of patient after-visit summaries to support physicians</article-title>
          <source>Proceedings of the 29th International Conference on Computational Linguistics</source>
          <year>2022</year>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>International Committee on Computational Linguistics</publisher-name>
          <fpage>6234</fpage>
          <lpage>47</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <article-title>Unified Medical Language System (UMLS)</article-title>
          <source>National Institute for Health, National Library of Medicine</source>
          <access-date>2025-07-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nlm.nih.gov/research/umls/index.html">https://www.nlm.nih.gov/research/umls/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hartman</surname>
              <given-names>VC</given-names>
            </name>
            <name name-style="western">
              <surname>Bapat</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Weiner</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Navi</surname>
              <given-names>BB</given-names>
            </name>
            <name name-style="western">
              <surname>Sholle</surname>
              <given-names>ET</given-names>
            </name>
            <name name-style="western">
              <surname>Campion</surname>
              <given-names>TR Jr</given-names>
            </name>
          </person-group>
          <article-title>A method to automate the discharge summary hospital course for neurology patients</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2023</year>
          <month>11</month>
          <day>17</day>
          <volume>30</volume>
          <issue>12</issue>
          <fpage>1995</fpage>
          <lpage>2003</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocad177</pub-id>
          <pub-id pub-id-type="medline">37639624</pub-id>
          <pub-id pub-id-type="pii">7252876</pub-id>
          <pub-id pub-id-type="pmcid">PMC10654848</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>CY</given-names>
            </name>
          </person-group>
          <article-title>ROUGE: a package for automatic evaluation of summaries</article-title>
          <source>Text Summarization Branches Out</source>
          <year>2004</year>
          <publisher-loc>Stroudsburg, PA</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>74</fpage>
          <lpage>81</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kishore</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Weinberger</surname>
              <given-names>KQ</given-names>
            </name>
            <name name-style="western">
              <surname>Artzi</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>BERTScore: evaluating text generation with BERT</article-title>
          <source>arXiv. Preprint posted online on April 21, 2019</source>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.48550/arXiv.1904.09675"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1904.09675</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Challener</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Wen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>O'Horo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nyman</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Flesch-Kincaid Grade Level readability scores to evaluate readability of clinical documentation during an electronic health record transition</article-title>
          <source>Adv Health Inf Sci Pract</source>
          <year>2025</year>
          <month>06</month>
          <day>18</day>
          <volume>1</volume>
          <issue>1</issue>
          <fpage>VBWY7913</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://AHISP.kglmeridian.com/doi/10.63116/VBWY7913"/>
          </comment>
          <pub-id pub-id-type="doi">10.63116/VBWY7913</pub-id>
          <pub-id pub-id-type="medline">40979915</pub-id>
          <pub-id pub-id-type="pii">137463</pub-id>
          <pub-id pub-id-type="pmcid">PMC12366713</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Omiye</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Lester</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Spichak</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rotemberg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Daneshjou</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Large language models propagate race-based medicine</article-title>
          <source>NPJ Digit Med</source>
          <year>2023</year>
          <month>10</month>
          <day>20</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>195</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-023-00939-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-023-00939-z</pub-id>
          <pub-id pub-id-type="medline">37864012</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-023-00939-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC10589311</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ullah</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Parwani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Baig</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Challenges and barriers of using large language models (LLM) such as ChatGPT for diagnostic medicine with a focus on digital pathology - a recent scoping review</article-title>
          <source>Diagn Pathol</source>
          <year>2024</year>
          <month>02</month>
          <day>27</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>43</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://diagnosticpathology.biomedcentral.com/articles/10.1186/s13000-024-01464-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13000-024-01464-7</pub-id>
          <pub-id pub-id-type="medline">38414074</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13000-024-01464-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC10898121</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Albanie</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Needle threading: can LLMs follow threads through near-million-scale haystacks?</article-title>
          <source>arXiv. Preprint posted online on November 7, 2024</source>
          <pub-id pub-id-type="doi">10.48550/arXiv.2411.05000</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gero</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Naumann</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Galley</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Poon</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Self-verification improves few-shot clinical information extraction</article-title>
          <source>arXiv. Preprint posted online on May 30, 2023</source>
          <pub-id pub-id-type="doi">10.48550/arXiv.2306.00024</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
