<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR AI</journal-id><journal-id journal-id-type="publisher-id">ai</journal-id><journal-id journal-id-type="index">41</journal-id><journal-title>JMIR AI</journal-title><abbrev-journal-title>JMIR AI</abbrev-journal-title><issn pub-type="epub">2817-1705</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v5i1e77149</article-id><article-id pub-id-type="doi">10.2196/77149</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Leveraging Large Language Models to Improve the Readability of German Online Medical Texts: Evaluation Study</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Miftaroski</surname><given-names>Amela</given-names></name><degrees>BSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zowalla</surname><given-names>Richard</given-names></name><degrees>Dr sc hum</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wiesner</surname><given-names>Martin</given-names></name><degrees>Dipl Inform med</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Pobiruchin</surname><given-names>Monika</given-names></name><degrees>Dr sc hum</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref></contrib></contrib-group><aff id="aff1"><institution>Faculty of Informatics, Heilbronn University</institution><addr-line>Max-Planck-Str. 39</addr-line><addr-line>Heilbronn</addr-line><country>Germany</country></aff><aff id="aff2"><institution>Research and Innovation Center for Cognitive Service Systems (KODIS), Fraunhofer Institute for Industrial Engineering</institution><addr-line>Stuttgart</addr-line><country>Germany</country></aff><aff id="aff3"><institution>Consumer Health Informatics Special Interest Group, German Association for Medical Informatics, Biometry and Epidemiology (GMDS) e.V.</institution><addr-line>Cologne</addr-line><country>Germany</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Liu</surname><given-names>Hongfang</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Robison</surname><given-names>Barrie</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Shah</surname><given-names>Namra Bhadreshkumar</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Monika Pobiruchin, Dr sc hum, Faculty of Informatics, Heilbronn University, Max-Planck-Str. 39, Heilbronn, 74081, Germany, 49 7131 504 ext 633; <email>monika.pobiruchin@hs-heilbronn.de</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>23</day><month>1</month><year>2026</year></pub-date><volume>5</volume><elocation-id>e77149</elocation-id><history><date date-type="received"><day>08</day><month>05</month><year>2025</year></date><date date-type="rev-recd"><day>30</day><month>10</month><year>2025</year></date><date date-type="accepted"><day>26</day><month>11</month><year>2025</year></date></history><copyright-statement>&#x00A9; Amela Miftaroski, Richard Zowalla, Martin Wiesner, Monika Pobiruchin. Originally published in JMIR AI (<ext-link ext-link-type="uri" xlink:href="https://ai.jmir.org">https://ai.jmir.org</ext-link>), 23.1.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.ai.jmir.org/">https://www.ai.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://ai.jmir.org/2026/1/e77149"/><abstract><sec><title>Background</title><p>Patient education materials (PEMs) found online are often written at a complexity level too high for the average reader, which can hinder understanding and informed decision-making. Large language models (LLMs) may offer a solution by simplifying complex medical texts. To date, little is known about how well LLMs can handle simplification tasks for German-language PEMs.</p></sec><sec><title>Objective</title><p>The study aims to investigate whether LLMs can increase the readability of German online medical texts to a recommended level.</p></sec><sec sec-type="methods"><title>Methods</title><p>A sample of 60 German texts originating from online medical resources was compiled. To improve the readability of these texts, four LLMs were selected and used for text simplification: ChatGPT-3.5, ChatGPT-4o, Microsoft Copilot, and Le Chat. Next, readability scores (Flesch reading ease [FRE] and Wiener Sachtextformel [4th Vienna Formula; WSTF]) of the original texts were computed and compared to the rephrased LLM versions. A Student <italic>t</italic> test for paired samples was used to test the reduction of readability scores, ideally to or lower than the eighth grade level.</p></sec><sec sec-type="results"><title>Results</title><p>Most of the original texts were rated as <italic>difficult</italic> to <italic>quite difficult</italic> (average WSTF 11.24, SD 1.29; FRE 35.92, SD 7.64). On average, the LLMs achieved the following average scores: ChatGPT-3.5 (WSTF 9.96, SD 1.52; FRE 45.04, SD 8.62), ChatGPT-4o (WSTF 10.6, SD 1.37; FRE 39.23, SD 7.45), Microsoft Copilot (WSTF 8.99, SD 1.10; FRE 49.0, SD 6.51), and Le Chat (WSTF 11.71, SD 1.47; FRE 33.72, SD 8.58). ChatGPT-3.5, ChatGPT-40, and Microsoft Copilot showed a statistically significant improvement in readability. However, the <italic>t</italic> tests yielded no statistically significant results for the reduction of scores lower than the eighth grade level.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>LLMs can improve the readability of PEMs in German. This moderate improvement can support patients reading PEMs online. LLMs demonstrated their potential to make complex online medical text more accessible to a broader audience by increasing readability. This is the first study to evaluate this for German online medical texts.</p></sec></abstract><kwd-group><kwd>artificial intelligence</kwd><kwd>health information</kwd><kwd>large language models</kwd><kwd>patient education material</kwd><kwd>readability</kwd><kwd>AI</kwd><kwd>LLM</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Overview</title><p>In the digital era, health information is widely available [<xref ref-type="bibr" rid="ref1">1</xref>] and exists in many different forms, for example, Wikipedia articles, health-related websites, leaflets, and brochures [<xref ref-type="bibr" rid="ref2">2</xref>], categorized as patient education materials (PEM). Such materials support medical laypersons in learning about diseases, diagnoses, therapies, etc [<xref ref-type="bibr" rid="ref3">3</xref>]. Health information should be easy to understand for the general population and to promote health literacy [<xref ref-type="bibr" rid="ref4">4</xref>]. In this context, the COVID-19 pandemic confirmed the need to improve the general scientific and health literacy [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>].</p><p>However, Zowalla et al [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref8">8</xref>], Rooney et al [<xref ref-type="bibr" rid="ref9">9</xref>], Yeung et al [<xref ref-type="bibr" rid="ref5">5</xref>], Gordejeva et al [<xref ref-type="bibr" rid="ref10">10</xref>], and others have shown that the readability of health information is often unsatisfactory regardless of its source (online material, booklets), authors (official bodies and institutions, individuals), or language. Many of these resources pose a challenge due to their sentence complexity and use of expert language, making it difficult for laypersons to effectively read and understand such material.</p><p>Artificial intelligence (AI) offers potential for substantial improvements in health care applications and is becoming omnipresent in recent years [<xref ref-type="bibr" rid="ref11">11</xref>]. In particular, large language models (LLMs) represent promising opportunities [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. In this context, LLMs can be leveraged to improve the readability of existing PEMs intended for citizens.</p><p>Being easily accessible for everyone [<xref ref-type="bibr" rid="ref14">14</xref>], citizens and patients alike will use them to seek health information online, answers to specific questions, or even therapy advice similar to Internet search engines [<xref ref-type="bibr" rid="ref15">15</xref>].</p><p>For these reasons, citizens will use LLMs to <italic>translate</italic> complex medical terminology and to simplify text material, aiming for an improved comprehensibility [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. Moreover, an increased integration of AI in the curation of health information offers several benefits [<xref ref-type="bibr" rid="ref18">18</xref>] for institutions, primarily time and cost savings.</p></sec><sec id="s1-2"><title>Prior Work</title><p>There is a decade-long research tradition about how to use and implement decision support systems, machine learning, and AI solutions in health care. Since 2023, with the widespread availability of LLMs [<xref ref-type="bibr" rid="ref19">19</xref>], those technologies have been explored for beneficial health care use cases [<xref ref-type="bibr" rid="ref15">15</xref>] in several medical domains [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref28">28</xref>].</p><p>Researchers investigated how publicly available LLMs interfere with patients&#x2019; information seeking behavior. Eng et al [<xref ref-type="bibr" rid="ref29">29</xref>] entered questions about rotator cuff repair surgery in ChatGPT-3.5 and let 2 orthopedic surgeons analyze the answers. The questions were derived from frequently asked questions (FAQs) sites from various patient information websites. The answers by the LLM were evaluated in terms of readability (Flesch-Kincaid grade level); the <italic>Journal of the American Medical Association</italic> Benchmark criteria and the DISCERN score [<xref ref-type="bibr" rid="ref30">30</xref>] were also used to evaluate reliability and the quality of health information on the internet. The average readability level of the generated answers was equal to a college freshman (Flesch-Kincaid grade of 13.4).</p><p>Similar work was conducted by Mika et al [<xref ref-type="bibr" rid="ref31">31</xref>] who fed ChatGPT with &#x201C;ten frequently asked questions regarding total hip arthroplasty.&#x201D; They found that the generated answers were fairly accurate and would be easily understood by patients. Another commonly used readability metric is the Flesch reading ease (FRE) score, which ranges from 0 to 100; lower values indicate a low level of readability, that is, it is difficult to read the text.</p><p>Li et al [<xref ref-type="bibr" rid="ref32">32</xref>] let ChatGPT process 400 English radiology reports (the mean length of words was 164, SD 117). The FRE improved from &#x201C;38.0&#x00B1;11.8&#x201D; to &#x201C;83.5&#x00B1;5.6&#x201D;.</p><p>Similar results were reported by Moons et al [<xref ref-type="bibr" rid="ref33">33</xref>] who used ChatGPT and Google Bard to reformulate 3 &#x201C;selected patient information sections from scientific journals.&#x201D; Google Bard was able to reduce the reading level of the texts to that of sixth graders. However, this was achieved by omission of &#x201C;significant information&#x201D; [<xref ref-type="bibr" rid="ref33">33</xref>].</p><p>In an analysis of PEM for bariatric surgery, ChatGPT (version 3.5 and 4) and Google Bard were able to improve the readability levels of 66 frequently asked questions pages of US-based hospitals. The mean FRE scores of the original texts were &#x201C;48.1 (SD 19.0), which corresponded to &#x2018;difficult to read,&#x2019; while initial responses from GPT-3.5, GPT-4.0 and Bard achieved mean scores of 31.4 (SD 11.4), 42.7 (SD 9.7), and 56.3 (SD 11.6), which corresponded to &#x2018;difficult to read,&#x2019; &#x2018;difficult to read,&#x2019; and &#x2018;fairly difficult to read,&#x2019; respectively&#x201D; [<xref ref-type="bibr" rid="ref34">34</xref>]. The authors also evaluated the accuracy of the simplified texts. The majority of the LLM responses were equal in accuracy to the original texts, but quality varies among LLMs. Srinivasan et al [<xref ref-type="bibr" rid="ref34">34</xref>] stress the &#x201C;importance of evaluating both the readability and quality&#x201D; of rephrased texts for patient information.</p><p>This is also in line with the conclusion by Pal et al [<xref ref-type="bibr" rid="ref35">35</xref>], who recommend training more specialized LLMs for tasks in the medical domain. They propose that this will add credit and reliability to the answers produced by LLMs in the clinical setting.</p><p>Focusing on non-English evaluations, some research was published for expert-centric scenarios: a multilingual benchmark set for answering medical exam questions was developed by the &#x201C;MedExpQA&#x201D; study [<xref ref-type="bibr" rid="ref36">36</xref>]. This contains standardized answers from health experts. To assess the accuracy of medical questions, the study analyzed LLMs with and without retrieval-augmented generation methods. It was found that the models in French, Italian, and Spanish were inferior to those in English. In addition, difficulties such as the tendency to generate incorrect answers and the use of outdated information were identified.</p><p>Heilmeyer et al [<xref ref-type="bibr" rid="ref18">18</xref>] focused on German medical text: they &#x201C;assessed the feasibility of using nonproprietary LLMs of the GPT variety as writing assistance for medical professionals.&#x201D; Pretrained LLMs were trained on electronic health records of more than 82,000 patient encounters in a German outpatient clinic. AI tools proved to be &#x201C;helpful writing assistance&#x201D; for medical experts and patient reports.</p><p>As of today, no readability evaluation has been conducted for LLM-rephrased German health texts from the citizens&#x2019; perspective. By <italic>citizens&#x2019; perspective</italic>, this study refers to evaluating LLM-rephrased health texts as they would have been experienced by an average layperson without specialized knowledge or expertise in prompt engineering. This approach reflected the realistic scenario of laypersons seeking health information online, using freely accessible tools without systematically optimizing prompts or using application programming interfaces (APIs) to tune LLM model parameters.</p></sec><sec id="s1-3"><title>Aims of the Study</title><p>The aim of the study is to investigate, from a layperson&#x2019;s perspective, whether LLMs can simplify and increase the readability of German online medical texts to a recommended level of readability, that is, the eighth grade [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>].</p><p>In this context, 3 specific aims were defined as:</p><list list-type="order"><list-item><p>Rephrase German medical texts with LLMs,</p></list-item><list-item><p>Compute their readability, and</p></list-item><list-item><p>Evaluate if the AI-rephrased texts showed a higher level of readability.</p></list-item></list></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Medical Text Corpus</title><p>Previous research and a prior sample size calculation (see <italic>Statistical Analysis</italic>) indicated that the desired reduction in Wiener Sachtextformel (4th Vienna Formula) (WSTF) score to meet the recommended grade level could be shown with a rather small sample (n&#x003C;10). Therefore, a medical text corpus was compiled to represent high prevalence diseases, for example, cancer and diabetes mellitus, and major public health topics, for example, vaccinations, as well as national bodies and institutions such as the national health portals of Germany and Austria or popular online health websites.</p><p>For a full list of included content providers and websites, see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>First, all 60 websites were visited with a Chrome web browser. Next, the corresponding texts were manually extracted without any HTML-related markup. The 60 plain text of the resulting corpus files were then used for further processing.</p></sec><sec id="s2-2"><title>Readability Analysis</title><p>The term readability &#x201C;refers to the properties of written text [&#x2026;] it reflects the (1) complexity of a text&#x2019;s structure, (2) sentence structure, and (3) chosen vocabulary&#x201D; [<xref ref-type="bibr" rid="ref10">10</xref>]. For the German language, 2 well-known readability metrics are established: (1) an adapted version of the (original English) FRE [<xref ref-type="bibr" rid="ref39">39</xref>] for the German language by Amstad [<xref ref-type="bibr" rid="ref40">40</xref>] and (2) the WSTF by Bamberger and Vanecek [<xref ref-type="bibr" rid="ref41">41</xref>]. Both metrics use text parameters like average sentence length and average number of syllables per word.</p><p>The FRE score ranges from 0 to 100; lower values indicate a low level of readability, that is, it is difficult to read the text. The WSTF metric ranges from 4 to 15, which roughly corresponds to school grades. For instance, if a text scores a value of 10, at least 10 years in school are necessary for readers to understand this text.</p><p>For the readability computations of all texts and to address research aim (2), the analysis framework by Wiesner et al [<xref ref-type="bibr" rid="ref42">42</xref>] was used. The analysis was conducted on a Windows 10 64-bit computer with Java Runtime Environment (version 18; Oracle Corporation).</p></sec><sec id="s2-3"><title>Selection of Large Language Models</title><p>A scoping review of well-known code platforms such as GitHub [<xref ref-type="bibr" rid="ref43">43</xref>] or Hugging Face [<xref ref-type="bibr" rid="ref44">44</xref>] was conducted to search for available LLMs. In addition, online literature databases such as the Association of Computing Machinery Digital Library and Institute of Electrical and Electronics Engineers Explore were searched to scan publications that already used LLMs (see <xref ref-type="table" rid="table1">Table 1</xref>).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Overview of various large language models available as of April 2024.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Name</td><td align="left" valign="bottom">Year</td><td align="left" valign="bottom">Domain</td><td align="left" valign="bottom">Developer</td><td align="left" valign="bottom">Availability</td><td align="left" valign="bottom">Open source</td><td align="left" valign="bottom">Free to use</td><td align="left" valign="bottom">Language</td></tr></thead><tbody><tr><td align="left" valign="top">ChatGPT 3.5 [<xref ref-type="bibr" rid="ref45">45</xref>]</td><td align="left" valign="top">2022</td><td align="left" valign="top">General</td><td align="left" valign="top">OpenAI</td><td align="left" valign="top">Web</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td><td align="left" valign="top">EN<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td></tr><tr><td align="left" valign="top">GPT4 [<xref ref-type="bibr" rid="ref45">45</xref>]</td><td align="left" valign="top">2023</td><td align="left" valign="top">General</td><td align="left" valign="top">OpenAI</td><td align="left" valign="top">Web</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">EN</td></tr><tr><td align="left" valign="top">Google Gemini [<xref ref-type="bibr" rid="ref46">46</xref>]</td><td align="left" valign="top">2023</td><td align="left" valign="top">General</td><td align="left" valign="top">Google</td><td align="left" valign="top">Web</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td><td align="left" valign="top">EN</td></tr><tr><td align="left" valign="top">BERT [<xref ref-type="bibr" rid="ref47">47</xref>]</td><td align="left" valign="top">2018</td><td align="left" valign="top">General</td><td align="left" valign="top">Google</td><td align="left" valign="top">Local</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">EN</td></tr><tr><td align="left" valign="top">Llama 2 [<xref ref-type="bibr" rid="ref48">48</xref>]</td><td align="left" valign="top">2023</td><td align="left" valign="top">General</td><td align="left" valign="top">Meta</td><td align="left" valign="top">Local</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">EN</td></tr><tr><td align="left" valign="top">Claude 2 [<xref ref-type="bibr" rid="ref49">49</xref>]</td><td align="left" valign="top">2023</td><td align="left" valign="top">General</td><td align="left" valign="top">Anthropic</td><td align="left" valign="top">Web and Local</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td><td align="left" valign="top">EN</td></tr><tr><td align="left" valign="top">T5 [<xref ref-type="bibr" rid="ref50">50</xref>]</td><td align="left" valign="top">2019</td><td align="left" valign="top">General</td><td align="left" valign="top">Google</td><td align="left" valign="top">Local</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">EN</td></tr><tr><td align="left" valign="top">BLOOM [<xref ref-type="bibr" rid="ref51">51</xref>]</td><td align="left" valign="top">2022</td><td align="left" valign="top">General</td><td align="left" valign="top">Big Science</td><td align="left" valign="top">Local</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">EN</td></tr><tr><td align="left" valign="top">Microsoft Copilot [<xref ref-type="bibr" rid="ref52">52</xref>]</td><td align="left" valign="top">2021</td><td align="left" valign="top">General</td><td align="left" valign="top">Microsoft</td><td align="left" valign="top">Web</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td><td align="left" valign="top">EN</td></tr><tr><td align="left" valign="top">Falcon LLM [<xref ref-type="bibr" rid="ref53">53</xref>]</td><td align="left" valign="top">2023</td><td align="left" valign="top">General</td><td align="left" valign="top">Technology Innovation Institute</td><td align="left" valign="top">Local</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">EN</td></tr><tr><td align="left" valign="top">Le Chat [<xref ref-type="bibr" rid="ref54">54</xref>]</td><td align="left" valign="top">2024</td><td align="left" valign="top">General</td><td align="left" valign="top">Mistral AI</td><td align="left" valign="top">Web</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td><td align="left" valign="top">EN</td></tr><tr><td align="left" valign="top">Pho&#x0308;nix [<xref ref-type="bibr" rid="ref55">55</xref>]</td><td align="left" valign="top">2023</td><td align="left" valign="top">General</td><td align="left" valign="top">Matthias Uhlig</td><td align="left" valign="top">Local</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">GER<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td></tr><tr><td align="left" valign="top">LeoLM 7b/13b [<xref ref-type="bibr" rid="ref56">56</xref>]</td><td align="left" valign="top">2023</td><td align="left" valign="top">General</td><td align="left" valign="top">LAION and HessianAI</td><td align="left" valign="top">Web and Local</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">GER</td></tr><tr><td align="left" valign="top">MedAlpaca [<xref ref-type="bibr" rid="ref57">57</xref>]</td><td align="left" valign="top">2023</td><td align="left" valign="top">Medical</td><td align="left" valign="top">Tianyu Han et al</td><td align="left" valign="top">Local</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">EN</td></tr><tr><td align="left" valign="top">BioMedLM [<xref ref-type="bibr" rid="ref58">58</xref>]</td><td align="left" valign="top">2024</td><td align="left" valign="top">Biomedical</td><td align="left" valign="top">Stanford CRFM</td><td align="left" valign="top">Local</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">EN</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>EN: English.</p></fn><fn id="table1fn2"><p><sup>b</sup>GER: German.</p></fn></table-wrap-foot></table-wrap><p>Some important aspects and criteria influenced the final selection: The language of the LLM&#x2014;preferably a German-trained model should be used&#x2014;as well as the specific field or domain of the LLM (general or medical domain).</p><p>Some LLMs can only be executed locally, while some can be used via a web front end. The latter would be preferable because in our use case, LLMs should be used by laypeople, who do not have the hardware capabilities at their homes nor the technical knowledge to host and operate LLMs. Preferably, the use of the LLM should be free of charge and open to use, that is, not in a beta test phase or only available for persons with a technical background (ie, programming skills).</p><p>Of 15 candidate LLMs, only 3 met the previously outlined criteria and were selected: (1) ChatGPT 3.5, (2) Microsoft Copilot, and (3) Le Chat. In May 2024 (after the LLM scoping review phase), OpenAI presented and launched their new release: GPT-4o. The authors decided to include this model as well. LeoLM (13b) was initially considered but later excluded due to frequent inaccuracies, very short or context-inadequate outputs, occasional language mismatches (answer in English instead of German), and overall unreliability in handling the text material.</p></sec><sec id="s2-4"><title>Accuracy of Rephrased Health Information Texts</title><p>The answers generated by each LLM were independently assessed by 3 reviewers (AM, RZ, MP) with a background in medical informatics. Assessments focused on the medical accuracy, clarity, and plausibility of the information provided, ensuring that each response was evaluated not only for linguistic quality but also for its alignment with established medical knowledge and standards. Interrater agreement was measured by calculating Fleiss &#x03BA; and percent agreement.</p></sec><sec id="s2-5"><title>Prompt Engineering</title><p>Prompt engineering refers to the process of designing and optimizing the input prompts for any LLM. The structure and content of a prompt can greatly influence the quality of the output generated by the LLM. Today, some techniques have evolved to obtain better results by LLMs:</p><list list-type="bullet"><list-item><p><italic>Few-shot prompting</italic> provides examples within a prompt to clarify instructions [<xref ref-type="bibr" rid="ref59">59</xref>]. This approach improves the model&#x2019;s ability to interpret and respond accurately to the task, as the examples provided serve to establish patterns and context. The term &#x2018;few&#x2019; indicates that a limited number of examples are provided as opposed to zero-shot prompting, where no examples are given.</p></list-item><list-item><p><italic>Chain of thought prompting</italic> breaks down complex tasks into steps within a prompt [<xref ref-type="bibr" rid="ref60">60</xref>]. This approach mimics human problem solving, guiding the LLM through structured reasoning to generate more accurate responses, especially for tasks that require multiple levels of reasoning.</p></list-item><list-item><p><italic>Clue and reasoning prompting</italic> introduces a structured reasoning approach [<xref ref-type="bibr" rid="ref61">61</xref>]. Unlike the other methods, clue and reasoning prompting targets complex linguistic features (eg, irony, contrast, and intensification) and involves 2 stages: the LLM (1) identifies <italic>clues</italic> within the input (eg, keywords, tone, and references) and (2) uses these clues to perform a reasoning process. This step-by-step approach aims to fill gaps in the LLM&#x2019;s reasoning capabilities, enabling it to deal more effectively with complex linguistic variations.</p></list-item></list><p>For the average person seeking health information online, advanced prompting techniques may be too complex to apply. These techniques require understanding how to structure input for LLMs. Few-shot prompting, for instance, involves providing examples within a prompt, requiring users to explain their needs clearly for effective interpretation. Most people would find designing such prompts difficult and time-consuming, especially when simply needing help understanding the provided health information.</p><p>For this reason, the authors decided to use a zero-shot prompting approach. Therefore, an extensive role-prompt approach was evaluated with a subset of the medical text corpus (6&#x2010;12 texts) and the 3 web-based LLMs. This prompt contained context information and provided a detailed task description for the LLM. However, using this prompt, the results&#x2019; readability decreased.</p><disp-quote><p>I, a person with no specialist medical knowledge, would like to understand as simply as possible how a stroke is treated by medical staff. I have read a text from gesund.bund.de, which I did not understand because of the medical terminology. Your task as AI ChatBot is to rewrite the following text so that I can understand it completely at the end. Here is the text: {TEXT}</p></disp-quote><p>Iteratively, other approaches were tested, eg, English prompt versus German prompt, or prompts with specific instructions to fine-tune a given readability score. Finally, a reduced German role prompt yielded the best results:</p><disp-quote><p>A person with no medical knowledge wants to understand a text. You, as a large language model, must simplify the following text for this person using simple language without reducing the content. Here is the text: {TEXT}</p></disp-quote><p>Every text and every LLM was input with this prompt, combined with the actual medical text. Due to the limit of 4000 characters for Microsoft Copilot, the texts were split, and several requests were made. Eventually, a total of 240 LLM conversations were conducted between May and July 2024.</p></sec><sec id="s2-6"><title>Statistical Analysis</title><p>Readability scores for the original and rephrased texts are presented as mean and SD. Student <italic>t</italic> test for paired samples was used to test the reduction of readability scores prior to and after the rephrasing. Prior research of German medical texts [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref42">42</xref>] yielded a mean readability of 12.46 (SD 1.84) for the WSTF. This means a reduction of 4.5 grade levels would result in the recommended reading level of 8, that is, a score &#x2264;8.0. Given these parameters, a sample size of 4 would be needed to show such an improvement with an alpha error of 0.05 (adjustment for multiple testing according to the Holm-Bonferroni method [<xref ref-type="bibr" rid="ref62">62</xref>]) and a power of 95%. Sample size was calculated with G*Power 3.1 [<xref ref-type="bibr" rid="ref63">63</xref>].</p><p>After the analysis of the text corpus&#x2019; readability scores, the average readability was calculated as WSTF 11.24 (SD 1.29); FRE 35.92 (SD 7.64). Thus, only a reduction of 3.5 grade levels (for WSTF) would be needed. For the FRE metric, an increase of 25 score points is needed for an eighth grade readability level, that is, a FRE score between 60 and 70.</p><p>The hypotheses were formulated as follows:</p><disp-formula id="E5"><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>H</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">W</mml:mi><mml:mi mathvariant="normal">S</mml:mi><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">F</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:msub><mml:mo>:</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mtext>orig</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mtext>LLM</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x2264;</mml:mo><mml:mn>3.5</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="equWL1"><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>H</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">W</mml:mi><mml:mi mathvariant="normal">S</mml:mi><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">F</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>:</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mtext>orig</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mtext>LLM</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x003E;</mml:mo><mml:mn>3.5</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>The tests for the FRE metrics were constructed in a similar manner:</p><disp-formula id="equWL2"><mml:math id="eqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>H</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">E</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:mrow></mml:msub><mml:mo>:</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">M</mml:mi></mml:mrow></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">g</mml:mi></mml:mrow></mml:mrow></mml:msub><mml:mo>&#x2264;</mml:mo><mml:mn>25</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="E6"><mml:math id="eqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>H</mml:mi><mml:mrow><mml:mtext>FRE</mml:mtext><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>:</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mtext>LLM</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mtext>orig</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x003E;</mml:mo><mml:mn>25</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>In addition, to show if LLMs improved the readability at all, paired <italic>t</italic> tests were conducted. The tests were constructed as follows:</p><disp-formula id="equWL3"><mml:math id="eqn5"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>H</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">W</mml:mi><mml:mi mathvariant="normal">S</mml:mi><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">F</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:mrow></mml:msub><mml:mo>:</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">g</mml:mi></mml:mrow></mml:mrow></mml:msub><mml:mo>&#x2264;</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">M</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="E7"><mml:math id="eqn6"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>H</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">W</mml:mi><mml:mi mathvariant="normal">S</mml:mi><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">F</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:mrow></mml:msub><mml:mo>:</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">g</mml:mi></mml:mrow></mml:mrow></mml:msub><mml:mo>&#x003E;</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">M</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>For the FRE metrics, the hypotheses were:</p><disp-formula id="equWL4"><mml:math id="eqn7"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>H</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">E</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:mrow></mml:msub><mml:mo>:</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">M</mml:mi></mml:mrow></mml:mrow></mml:msub><mml:mo>&#x2264;</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">g</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="E8"><mml:math id="eqn8"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>H</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">E</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:mrow></mml:msub><mml:mo>:</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">L</mml:mi><mml:mi mathvariant="normal">M</mml:mi></mml:mrow></mml:mrow></mml:msub><mml:mo>&#x003E;</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">g</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Readability of the Original Health Information Texts</title><p>Most of the original texts were rated as <italic>difficult</italic> to <italic>quite difficult</italic> (average WSTF score 11.24 (SD 1.29), FRE 35.92 (SD 7.64)); this corresponds to 12 years of schooling. The W39 website had the most difficult text (WSTF 13.97, FRE 16.74) to read; the W7 website had the text with the best readability (WSTF 8.70, FRE 51.02). Only 2 websites achieved a grade level of 8 (W7, W9). <xref ref-type="table" rid="table2">Table 2</xref> presents the calculated WSTF and FRE scores for the original health information texts with their means and SD.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Computed readability scores and number of words for 60 medical information texts.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Website</td><td align="left" valign="bottom">WSTF<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup><sup>,</sup><sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="bottom">FRE<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup><sup>,</sup><sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="bottom">Number of words<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">W1</td><td align="left" valign="top">9.36</td><td align="left" valign="top">43.93</td><td align="left" valign="top">950</td></tr><tr><td align="left" valign="top">W2</td><td align="left" valign="top">10.63</td><td align="left" valign="top">41.92</td><td align="left" valign="top">1021</td></tr><tr><td align="left" valign="top">W3</td><td align="left" valign="top">10.70</td><td align="left" valign="top">44.26</td><td align="left" valign="top">1007</td></tr><tr><td align="left" valign="top">W4</td><td align="left" valign="top">9.83</td><td align="left" valign="top">41.46</td><td align="left" valign="top">784</td></tr><tr><td align="left" valign="top">W5</td><td align="left" valign="top">10.80</td><td align="left" valign="top">36.40</td><td align="left" valign="top">1909</td></tr><tr><td align="left" valign="top">W6</td><td align="left" valign="top">11.01</td><td align="left" valign="top">41.23</td><td align="left" valign="top">1131</td></tr><tr><td align="left" valign="top">W7</td><td align="left" valign="top">8.70</td><td align="left" valign="top">51.02</td><td align="left" valign="top">907</td></tr><tr><td align="left" valign="top">W8</td><td align="left" valign="top">10.84</td><td align="left" valign="top">34.30</td><td align="left" valign="top">1017</td></tr><tr><td align="left" valign="top">W9</td><td align="left" valign="top">8.90</td><td align="left" valign="top">47.80</td><td align="left" valign="top">1279</td></tr><tr><td align="left" valign="top">W10</td><td align="left" valign="top">10.65</td><td align="left" valign="top">38.82</td><td align="left" valign="top">1434</td></tr><tr><td align="left" valign="top">W11</td><td align="left" valign="top">10.01</td><td align="left" valign="top">43.06</td><td align="left" valign="top">898</td></tr><tr><td align="left" valign="top">W12</td><td align="left" valign="top">12.00</td><td align="left" valign="top">28.65</td><td align="left" valign="top">1214</td></tr><tr><td align="left" valign="top">W13</td><td align="left" valign="top">11.68</td><td align="left" valign="top">31.91</td><td align="left" valign="top">780</td></tr><tr><td align="left" valign="top">W14</td><td align="left" valign="top">10.77</td><td align="left" valign="top">43.18</td><td align="left" valign="top">597</td></tr><tr><td align="left" valign="top">W15</td><td align="left" valign="top">12.28</td><td align="left" valign="top">33.36</td><td align="left" valign="top">1205</td></tr><tr><td align="left" valign="top">W16</td><td align="left" valign="top">9.35</td><td align="left" valign="top">46.21</td><td align="left" valign="top">661</td></tr><tr><td align="left" valign="top">W17</td><td align="left" valign="top">10.32</td><td align="left" valign="top">41.83</td><td align="left" valign="top">780</td></tr><tr><td align="left" valign="top">W18</td><td align="left" valign="top">10.30</td><td align="left" valign="top">44.75</td><td align="left" valign="top">832</td></tr><tr><td align="left" valign="top">W19</td><td align="left" valign="top">10.85</td><td align="left" valign="top">39.14</td><td align="left" valign="top">1321</td></tr><tr><td align="left" valign="top">W20</td><td align="left" valign="top">11.96</td><td align="left" valign="top">29.32</td><td align="left" valign="top">839</td></tr><tr><td align="left" valign="top">W21</td><td align="left" valign="top">11.36</td><td align="left" valign="top">34.43</td><td align="left" valign="top">4225</td></tr><tr><td align="left" valign="top">W22</td><td align="left" valign="top">11.11</td><td align="left" valign="top">34.62</td><td align="left" valign="top">2999</td></tr><tr><td align="left" valign="top">W23</td><td align="left" valign="top">11.93</td><td align="left" valign="top">29.02</td><td align="left" valign="top">114</td></tr><tr><td align="left" valign="top">W24</td><td align="left" valign="top">11.43</td><td align="left" valign="top">34.48</td><td align="left" valign="top">2192</td></tr><tr><td align="left" valign="top">W25</td><td align="left" valign="top">11.55</td><td align="left" valign="top">38.69</td><td align="left" valign="top">1058</td></tr><tr><td align="left" valign="top">W26</td><td align="left" valign="top">9.65</td><td align="left" valign="top">45.50</td><td align="left" valign="top">660</td></tr><tr><td align="left" valign="top">W27</td><td align="left" valign="top">10.93</td><td align="left" valign="top">38.60</td><td align="left" valign="top">425</td></tr><tr><td align="left" valign="top">W28</td><td align="left" valign="top">11.35</td><td align="left" valign="top">29.45</td><td align="left" valign="top">706</td></tr><tr><td align="left" valign="top">W29</td><td align="left" valign="top">11.27</td><td align="left" valign="top">27.70</td><td align="left" valign="top">648</td></tr><tr><td align="left" valign="top">W30</td><td align="left" valign="top">11.85</td><td align="left" valign="top">27.67</td><td align="left" valign="top">562</td></tr><tr><td align="left" valign="top">W31</td><td align="left" valign="top">9.27</td><td align="left" valign="top">46.62</td><td align="left" valign="top">1266</td></tr><tr><td align="left" valign="top">W32</td><td align="left" valign="top">9.17</td><td align="left" valign="top">46.23</td><td align="left" valign="top">2657</td></tr><tr><td align="left" valign="top">W33</td><td align="left" valign="top">10.33</td><td align="left" valign="top">43.09</td><td align="left" valign="top">1306</td></tr><tr><td align="left" valign="top">W34</td><td align="left" valign="top">11.50</td><td align="left" valign="top">35.65</td><td align="left" valign="top">760</td></tr><tr><td align="left" valign="top">W35</td><td align="left" valign="top">9.20</td><td align="left" valign="top">46.04</td><td align="left" valign="top">2672</td></tr><tr><td align="left" valign="top">W36</td><td align="left" valign="top">10.82</td><td align="left" valign="top">36.20</td><td align="left" valign="top">1472</td></tr><tr><td align="left" valign="top">W37</td><td align="left" valign="top">9.57</td><td align="left" valign="top">44.36</td><td align="left" valign="top">1370</td></tr><tr><td align="left" valign="top">W38</td><td align="left" valign="top">11.60</td><td align="left" valign="top">32.86</td><td align="left" valign="top">1173</td></tr><tr><td align="left" valign="top">W39</td><td align="left" valign="top">13.97</td><td align="left" valign="top">16.74</td><td align="left" valign="top">1343</td></tr><tr><td align="left" valign="top">W40</td><td align="left" valign="top">11.90</td><td align="left" valign="top">30.39</td><td align="left" valign="top">1948</td></tr><tr><td align="left" valign="top">W41</td><td align="left" valign="top">11.13</td><td align="left" valign="top">36.13</td><td align="left" valign="top">1678</td></tr><tr><td align="left" valign="top">W42</td><td align="left" valign="top">11.08</td><td align="left" valign="top">37.84</td><td align="left" valign="top">3960</td></tr><tr><td align="left" valign="top">W43</td><td align="left" valign="top">11.35</td><td align="left" valign="top">40.01</td><td align="left" valign="top">794</td></tr><tr><td align="left" valign="top">W44</td><td align="left" valign="top">10.97</td><td align="left" valign="top">37.84</td><td align="left" valign="top">2232</td></tr><tr><td align="left" valign="top">W45</td><td align="left" valign="top">11.87</td><td align="left" valign="top">30.18</td><td align="left" valign="top">1236</td></tr><tr><td align="left" valign="top">W46</td><td align="left" valign="top">13.36</td><td align="left" valign="top">21.97</td><td align="left" valign="top">1527</td></tr><tr><td align="left" valign="top">W47</td><td align="left" valign="top">12.49</td><td align="left" valign="top">27.99</td><td align="left" valign="top">2072</td></tr><tr><td align="left" valign="top">W48</td><td align="left" valign="top">13.66</td><td align="left" valign="top">24.65</td><td align="left" valign="top">2063</td></tr><tr><td align="left" valign="top">W49</td><td align="left" valign="top">12.01</td><td align="left" valign="top">32.93</td><td align="left" valign="top">1117</td></tr><tr><td align="left" valign="top">W50</td><td align="left" valign="top">13.86</td><td align="left" valign="top">22.18</td><td align="left" valign="top">1838</td></tr><tr><td align="left" valign="top">W51</td><td align="left" valign="top">11.62</td><td align="left" valign="top">37.14</td><td align="left" valign="top">762</td></tr><tr><td align="left" valign="top">W52</td><td align="left" valign="top">12.58</td><td align="left" valign="top">31.80</td><td align="left" valign="top">1642</td></tr><tr><td align="left" valign="top">W53</td><td align="left" valign="top">10.22</td><td align="left" valign="top">40.70</td><td align="left" valign="top">516</td></tr><tr><td align="left" valign="top">W54</td><td align="left" valign="top">14.28</td><td align="left" valign="top">19.45</td><td align="left" valign="top">1199</td></tr><tr><td align="left" valign="top">W55</td><td align="left" valign="top">13.88</td><td align="left" valign="top">22.92</td><td align="left" valign="top">1197</td></tr><tr><td align="left" valign="top">W56</td><td align="left" valign="top">12.69</td><td align="left" valign="top">30.66</td><td align="left" valign="top">3383</td></tr><tr><td align="left" valign="top">W57</td><td align="left" valign="top">12.02</td><td align="left" valign="top">32.44</td><td align="left" valign="top">2541</td></tr><tr><td align="left" valign="top">W58</td><td align="left" valign="top">11.41</td><td align="left" valign="top">39.02</td><td align="left" valign="top">746</td></tr><tr><td align="left" valign="top">W59</td><td align="left" valign="top">10.90</td><td align="left" valign="top">36.29</td><td align="left" valign="top">1530</td></tr><tr><td align="left" valign="top">W60</td><td align="left" valign="top">12.03</td><td align="left" valign="top">31.86</td><td align="left" valign="top">2411</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>WSTF: Wiener Sachtextformel (4th Vienna Formula).</p></fn><fn id="table2fn2"><p><sup>b</sup>WSTF mean 11.24 (SD 1.29).</p></fn><fn id="table2fn3"><p><sup>c</sup>FRE: Flesch reading ease.</p></fn><fn id="table2fn4"><p><sup>d</sup>FRE mean  35.92 (SD 7.64).</p></fn><fn id="table2fn5"><p><sup>e</sup>Number of words, mean 1409 (SD 840).</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Readability of the Rephrased Health Information Texts</title><p>Overall, the texts rephrased by the LLMs show an improved readability compared to the original texts. However, the degree of the readability improvements varies greatly.</p><p>ChatGPT-3.5 had, on average, a score of 9.96 (SD 1.52) for WSTF, ChatGPT-4o had a mean score of 10.6 (SD 1.37), Microsoft Copilot had a mean score of 8.99 (SD 1.10), and Le Chat had a mean score of 11.7 (SD 1.47). Microsoft Copilot achieved the best readability values compared to the other LLMs (see <xref ref-type="table" rid="table3">Table 3</xref>).</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Computed readability scores and number of words with mean readability score and SDs, and average differences of original and large language model texts.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom" colspan="3">ChatGPT-3.5</td><td align="left" valign="bottom" colspan="3">ChatGPT-4o</td><td align="left" valign="bottom" colspan="3">Microsoft Copilot</td><td align="left" valign="bottom" colspan="3">Le Chat</td></tr></thead><tbody><tr><td align="left" valign="top">Website</td><td align="left" valign="top">WSTF<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="top">FRE<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top">Words</td><td align="left" valign="top">WSTF</td><td align="left" valign="top">FRE</td><td align="left" valign="top">Words</td><td align="left" valign="top">WSTF</td><td align="left" valign="top">FRE</td><td align="left" valign="top">Words</td><td align="left" valign="top">WSTF</td><td align="left" valign="top">FRE</td><td align="left" valign="top">Words</td></tr><tr><td align="left" valign="top">W1</td><td align="left" valign="top">9.81</td><td align="left" valign="top">46.17</td><td align="left" valign="top">242</td><td align="left" valign="top">10.25</td><td align="left" valign="top">39.87</td><td align="left" valign="top">496</td><td align="left" valign="top">8.35</td><td align="left" valign="top">51.72</td><td align="left" valign="top">845</td><td align="left" valign="top">10.13</td><td align="left" valign="top">39.35</td><td align="left" valign="top">446</td></tr><tr><td align="left" valign="top">W2</td><td align="left" valign="top">8.38</td><td align="left" valign="top">56.14</td><td align="left" valign="top">286</td><td align="left" valign="top">9.72</td><td align="left" valign="top">43.59</td><td align="left" valign="top">281</td><td align="left" valign="top">8.72</td><td align="left" valign="top">55.38</td><td align="left" valign="top">710</td><td align="left" valign="top">11.27</td><td align="left" valign="top">40.76</td><td align="left" valign="top">798</td></tr><tr><td align="left" valign="top">W3</td><td align="left" valign="top">10.85</td><td align="left" valign="top">41.58</td><td align="left" valign="top">305</td><td align="left" valign="top">11.36</td><td align="left" valign="top">34.35</td><td align="left" valign="top">501</td><td align="left" valign="top">8.60</td><td align="left" valign="top">53.71</td><td align="left" valign="top">817</td><td align="left" valign="top">12.56</td><td align="left" valign="top">34.88</td><td align="left" valign="top">471</td></tr><tr><td align="left" valign="top">W4</td><td align="left" valign="top">9.52</td><td align="left" valign="top">41.80</td><td align="left" valign="top">364</td><td align="left" valign="top">11.78</td><td align="left" valign="top">35.22</td><td align="left" valign="top">370</td><td align="left" valign="top"><italic>7.69<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></italic></td><td align="left" valign="top">54.29</td><td align="left" valign="top">610</td><td align="left" valign="top">11.15</td><td align="left" valign="top">36.23</td><td align="left" valign="top">456</td></tr><tr><td align="left" valign="top">W5</td><td align="left" valign="top">9.59</td><td align="left" valign="top">43.00</td><td align="left" valign="top">189</td><td align="left" valign="top">13.17</td><td align="left" valign="top">23.16</td><td align="left" valign="top">273</td><td align="left" valign="top">8.45</td><td align="left" valign="top">49.52</td><td align="left" valign="top">1541</td><td align="left" valign="top">11.84</td><td align="left" valign="top">31.16</td><td align="left" valign="top">914</td></tr><tr><td align="left" valign="top">W6</td><td align="left" valign="top">10.63</td><td align="left" valign="top">45.58</td><td align="left" valign="top">182</td><td align="left" valign="top">12.51</td><td align="left" valign="top">29.02</td><td align="left" valign="top">368</td><td align="left" valign="top">9.27</td><td align="left" valign="top">52.57</td><td align="left" valign="top">841</td><td align="left" valign="top">12.06</td><td align="left" valign="top">36.92</td><td align="left" valign="top">518</td></tr><tr><td align="left" valign="top">W7</td><td align="left" valign="top">8.60</td><td align="left" valign="top">47.11</td><td align="left" valign="top">310</td><td align="left" valign="top">11.97</td><td align="left" valign="top">31.22</td><td align="left" valign="top">565</td><td align="left" valign="top"><italic>6.78</italic></td><td align="left" valign="top"><italic>60.42</italic></td><td align="left" valign="top">746</td><td align="left" valign="top">8.60</td><td align="left" valign="top">51.02</td><td align="left" valign="top">540</td></tr><tr><td align="left" valign="top">W8</td><td align="left" valign="top">11.50</td><td align="left" valign="top">31.89</td><td align="left" valign="top">247</td><td align="left" valign="top">10.94</td><td align="left" valign="top">35.65</td><td align="left" valign="top">548</td><td align="left" valign="top">9.51</td><td align="left" valign="top">39.05</td><td align="left" valign="top">839</td><td align="left" valign="top">10.75</td><td align="left" valign="top">33.19</td><td align="left" valign="top">898</td></tr><tr><td align="left" valign="top">W9</td><td align="left" valign="top"><italic>7.01</italic></td><td align="left" valign="top">55.54</td><td align="left" valign="top">392</td><td align="left" valign="top">11.45</td><td align="left" valign="top">31.89</td><td align="left" valign="top">368</td><td align="left" valign="top"><italic>7.50</italic></td><td align="left" valign="top">54.58</td><td align="left" valign="top">905</td><td align="left" valign="top">9.09</td><td align="left" valign="top">46.91</td><td align="left" valign="top">884</td></tr><tr><td align="left" valign="top">W10</td><td align="left" valign="top">9.46</td><td align="left" valign="top">45.68</td><td align="left" valign="top">375</td><td align="left" valign="top">11.10</td><td align="left" valign="top">36.70</td><td align="left" valign="top">404</td><td align="left" valign="top">8.10</td><td align="left" valign="top">54.06</td><td align="left" valign="top">1272</td><td align="left" valign="top">12.59</td><td align="left" valign="top">26.46</td><td align="left" valign="top">502</td></tr><tr><td align="left" valign="top">W11</td><td align="left" valign="top">10.97</td><td align="left" valign="top">38.87</td><td align="left" valign="top">246</td><td align="left" valign="top">12.41</td><td align="left" valign="top">32.79</td><td align="left" valign="top">289</td><td align="left" valign="top"><italic>7.80</italic></td><td align="left" valign="top">57.70</td><td align="left" valign="top">711</td><td align="left" valign="top">13.62</td><td align="left" valign="top">25.75</td><td align="left" valign="top">359</td></tr><tr><td align="left" valign="top">W12</td><td align="left" valign="top">10.01</td><td align="left" valign="top">44.82</td><td align="left" valign="top">278</td><td align="left" valign="top">9.89</td><td align="left" valign="top">44.55</td><td align="left" valign="top">371</td><td align="left" valign="top">10.50</td><td align="left" valign="top">36.75</td><td align="left" valign="top">861</td><td align="left" valign="top">11.08</td><td align="left" valign="top">36.80</td><td align="left" valign="top">385</td></tr><tr><td align="left" valign="top">W13</td><td align="left" valign="top">11.04</td><td align="left" valign="top">39.63</td><td align="left" valign="top">281</td><td align="left" valign="top">9.93</td><td align="left" valign="top">42.85</td><td align="left" valign="top">316</td><td align="left" valign="top"><italic>7.49</italic></td><td align="left" valign="top"><italic>60.94</italic></td><td align="left" valign="top">529</td><td align="left" valign="top">12.85</td><td align="left" valign="top">27.51</td><td align="left" valign="top">588</td></tr><tr><td align="left" valign="top">W14</td><td align="left" valign="top">11.58</td><td align="left" valign="top">42.30</td><td align="left" valign="top">195</td><td align="left" valign="top">11.04</td><td align="left" valign="top">42.11</td><td align="left" valign="top">425</td><td align="left" valign="top">8.42</td><td align="left" valign="top">55.94</td><td align="left" valign="top">519</td><td align="left" valign="top">13.13</td><td align="left" valign="top">33.64</td><td align="left" valign="top">433</td></tr><tr><td align="left" valign="top">W15</td><td align="left" valign="top">11.98</td><td align="left" valign="top">38.45</td><td align="left" valign="top">422</td><td align="left" valign="top">11.51</td><td align="left" valign="top">30.80</td><td align="left" valign="top">335</td><td align="left" valign="top">11.16</td><td align="left" valign="top">42.22</td><td align="left" valign="top">1107</td><td align="left" valign="top">13.55</td><td align="left" valign="top">26.82</td><td align="left" valign="top">476</td></tr><tr><td align="left" valign="top">W16</td><td align="left" valign="top"><italic>7.90</italic></td><td align="left" valign="top">56.36</td><td align="left" valign="top">240</td><td align="left" valign="top">10.24</td><td align="left" valign="top">43.86</td><td align="left" valign="top">403</td><td align="left" valign="top"><italic>7.58</italic></td><td align="left" valign="top">54.60</td><td align="left" valign="top">518</td><td align="left" valign="top">9.74</td><td align="left" valign="top">47.35</td><td align="left" valign="top">304</td></tr><tr><td align="left" valign="top">W17</td><td align="left" valign="top">10.79</td><td align="left" valign="top">40.32</td><td align="left" valign="top">244</td><td align="left" valign="top">13.18</td><td align="left" valign="top">28.21</td><td align="left" valign="top">414</td><td align="left" valign="top"><italic>7.48</italic></td><td align="left" valign="top">53.50</td><td align="left" valign="top">462</td><td align="left" valign="top">10.43</td><td align="left" valign="top">41.20</td><td align="left" valign="top">425</td></tr><tr><td align="left" valign="top">W18</td><td align="left" valign="top"><italic>6.86</italic></td><td align="left" valign="top"><italic>60.56</italic></td><td align="left" valign="top">328</td><td align="left" valign="top">12.98</td><td align="left" valign="top">29.82</td><td align="left" valign="top">425</td><td align="left" valign="top">8.93</td><td align="left" valign="top">52.24</td><td align="left" valign="top">670</td><td align="left" valign="top">11.42</td><td align="left" valign="top">40.27</td><td align="left" valign="top">694</td></tr><tr><td align="left" valign="top">W19</td><td align="left" valign="top">9.74</td><td align="left" valign="top">46.16</td><td align="left" valign="top">402</td><td align="left" valign="top">11.32</td><td align="left" valign="top">34.63</td><td align="left" valign="top">357</td><td align="left" valign="top">10.18</td><td align="left" valign="top">45.15</td><td align="left" valign="top">987</td><td align="left" valign="top">10.99</td><td align="left" valign="top">39.99</td><td align="left" valign="top">381</td></tr><tr><td align="left" valign="top">W20</td><td align="left" valign="top">10.15</td><td align="left" valign="top">34.17</td><td align="left" valign="top">179</td><td align="left" valign="top">10.23</td><td align="left" valign="top">41.84</td><td align="left" valign="top">371</td><td align="left" valign="top">9.99</td><td align="left" valign="top">41.26</td><td align="left" valign="top">658</td><td align="left" valign="top">11.93</td><td align="left" valign="top">26.95</td><td align="left" valign="top">483</td></tr><tr><td align="left" valign="top">W21</td><td align="left" valign="top">8.64</td><td align="left" valign="top">50.26</td><td align="left" valign="top">569</td><td align="left" valign="top">11.10</td><td align="left" valign="top">34.63</td><td align="left" valign="top">501</td><td align="left" valign="top">11.10</td><td align="left" valign="top">36.22</td><td align="left" valign="top">5170</td><td align="left" valign="top">11.67</td><td align="left" valign="top">33.53</td><td align="left" valign="top">635</td></tr><tr><td align="left" valign="top">W22</td><td align="left" valign="top">10.36</td><td align="left" valign="top">43.12</td><td align="left" valign="top">207</td><td align="left" valign="top">9.90</td><td align="left" valign="top">45.81</td><td align="left" valign="top">485</td><td align="left" valign="top">9.47</td><td align="left" valign="top">44.02</td><td align="left" valign="top">2140</td><td align="left" valign="top">11.67</td><td align="left" valign="top">30.06</td><td align="left" valign="top">358</td></tr><tr><td align="left" valign="top">W23</td><td align="left" valign="top">9.64</td><td align="left" valign="top">43.23</td><td align="left" valign="top">113</td><td align="left" valign="top">10.29</td><td align="left" valign="top">41.88</td><td align="left" valign="top">522</td><td align="left" valign="top">9.31</td><td align="left" valign="top">44.30</td><td align="left" valign="top">171</td><td align="left" valign="top">10.31</td><td align="left" valign="top">36.48</td><td align="left" valign="top">140</td></tr><tr><td align="left" valign="top">W24</td><td align="left" valign="top">13.17</td><td align="left" valign="top">21.91</td><td align="left" valign="top">614</td><td align="left" valign="top">10.28</td><td align="left" valign="top">40.50</td><td align="left" valign="top">527</td><td align="left" valign="top">10.27</td><td align="left" valign="top">40.57</td><td align="left" valign="top">1619</td><td align="left" valign="top">13.51</td><td align="left" valign="top">19.93</td><td align="left" valign="top">603</td></tr><tr><td align="left" valign="top">W25</td><td align="left" valign="top">10.07</td><td align="left" valign="top">50.33</td><td align="left" valign="top">304</td><td align="left" valign="top">9.87</td><td align="left" valign="top">43.50</td><td align="left" valign="top">488</td><td align="left" valign="top">8.97</td><td align="left" valign="top">51.33</td><td align="left" valign="top">633</td><td align="left" valign="top">11.07</td><td align="left" valign="top">41.37</td><td align="left" valign="top">408</td></tr><tr><td align="left" valign="top">W26</td><td align="left" valign="top"><italic>7.98</italic></td><td align="left" valign="top">56.19</td><td align="left" valign="top">225</td><td align="left" valign="top"><italic>7.86</italic></td><td align="left" valign="top">50.00</td><td align="left" valign="top">298</td><td align="left" valign="top"><italic>7.68</italic></td><td align="left" valign="top">57.58</td><td align="left" valign="top">460</td><td align="left" valign="top">10.26</td><td align="left" valign="top">45.04</td><td align="left" valign="top">443</td></tr><tr><td align="left" valign="top">W27</td><td align="left" valign="top">8.76</td><td align="left" valign="top">49.90</td><td align="left" valign="top">336</td><td align="left" valign="top">8.84</td><td align="left" valign="top">41.82</td><td align="left" valign="top">342</td><td align="left" valign="top"><italic>7.04</italic></td><td align="left" valign="top">58.11</td><td align="left" valign="top">388</td><td align="left" valign="top">9.66</td><td align="left" valign="top">45.03</td><td align="left" valign="top">370</td></tr><tr><td align="left" valign="top">W28</td><td align="left" valign="top">11.32</td><td align="left" valign="top">31.90</td><td align="left" valign="top">268</td><td align="left" valign="top">11.32</td><td align="left" valign="top">31.76</td><td align="left" valign="top">562</td><td align="left" valign="top">9.58</td><td align="left" valign="top">43.31</td><td align="left" valign="top">515</td><td align="left" valign="top">12.69</td><td align="left" valign="top">25.02</td><td align="left" valign="top">350</td></tr><tr><td align="left" valign="top">W29</td><td align="left" valign="top">12.08</td><td align="left" valign="top">27.99</td><td align="left" valign="top">211</td><td align="left" valign="top">11.28</td><td align="left" valign="top">36.53</td><td align="left" valign="top">462</td><td align="left" valign="top">9.35</td><td align="left" valign="top">44.20</td><td align="left" valign="top">456</td><td align="left" valign="top">12.55</td><td align="left" valign="top">22.17</td><td align="left" valign="top">356</td></tr><tr><td align="left" valign="top">W30</td><td align="left" valign="top">11.43</td><td align="left" valign="top">27.47</td><td align="left" valign="top">278</td><td align="left" valign="top">8.09</td><td align="left" valign="top">56.10</td><td align="left" valign="top">467</td><td align="left" valign="top">11.04</td><td align="left" valign="top">34.28</td><td align="left" valign="top">392</td><td align="left" valign="top">11.24</td><td align="left" valign="top">27.05</td><td align="left" valign="top">328</td></tr><tr><td align="left" valign="top">W31</td><td align="left" valign="top">10.45</td><td align="left" valign="top">39.31</td><td align="left" valign="top">305</td><td align="left" valign="top">8.79</td><td align="left" valign="top">46.06</td><td align="left" valign="top">446</td><td align="left" valign="top">8.29</td><td align="left" valign="top">53.77</td><td align="left" valign="top">1067</td><td align="left" valign="top">11.11</td><td align="left" valign="top">36.72</td><td align="left" valign="top">406</td></tr><tr><td align="left" valign="top">W32</td><td align="left" valign="top"><italic>7.94</italic></td><td align="left" valign="top">55.03</td><td align="left" valign="top">248</td><td align="left" valign="top">9.19</td><td align="left" valign="top">40.50</td><td align="left" valign="top">694</td><td align="left" valign="top">8.13</td><td align="left" valign="top">52.45</td><td align="left" valign="top">1486</td><td align="left" valign="top">9.70</td><td align="left" valign="top">43.85</td><td align="left" valign="top">1543</td></tr><tr><td align="left" valign="top">W33</td><td align="left" valign="top">8.64</td><td align="left" valign="top">50.99</td><td align="left" valign="top">307</td><td align="left" valign="top">10.71</td><td align="left" valign="top">44.53</td><td align="left" valign="top">366</td><td align="left" valign="top"><italic>7.93</italic></td><td align="left" valign="top">53.66</td><td align="left" valign="top">775</td><td align="left" valign="top">12.79</td><td align="left" valign="top">35.38</td><td align="left" valign="top">344</td></tr><tr><td align="left" valign="top">W34</td><td align="left" valign="top">9.20</td><td align="left" valign="top">48.86</td><td align="left" valign="top">202</td><td align="left" valign="top">10.06</td><td align="left" valign="top">46.79</td><td align="left" valign="top">455</td><td align="left" valign="top">9.45</td><td align="left" valign="top">49.53</td><td align="left" valign="top">743</td><td align="left" valign="top">13.17</td><td align="left" valign="top">36.34</td><td align="left" valign="top">199</td></tr><tr><td align="left" valign="top">W35</td><td align="left" valign="top">8.45</td><td align="left" valign="top">49.27</td><td align="left" valign="top">191</td><td align="left" valign="top">9.99</td><td align="left" valign="top">39.92</td><td align="left" valign="top">472</td><td align="left" valign="top">8.20</td><td align="left" valign="top">51.35</td><td align="left" valign="top">2175</td><td align="left" valign="top">11.12</td><td align="left" valign="top">37.73</td><td align="left" valign="top">1812</td></tr><tr><td align="left" valign="top">W36</td><td align="left" valign="top"><italic>7.45</italic></td><td align="left" valign="top">58.25</td><td align="left" valign="top">266</td><td align="left" valign="top"><italic>6.69</italic></td><td align="left" valign="top">59.17</td><td align="left" valign="top">388</td><td align="left" valign="top">8.41</td><td align="left" valign="top">50.82</td><td align="left" valign="top">899</td><td align="left" valign="top">9.77</td><td align="left" valign="top">43.78</td><td align="left" valign="top">351</td></tr><tr><td align="left" valign="top">W37</td><td align="left" valign="top">10.27</td><td align="left" valign="top">46.75</td><td align="left" valign="top">222</td><td align="left" valign="top">9.91</td><td align="left" valign="top">41.15</td><td align="left" valign="top">501</td><td align="left" valign="top">8.04</td><td align="left" valign="top">54.91</td><td align="left" valign="top">1002</td><td align="left" valign="top">11.28</td><td align="left" valign="top">39.32</td><td align="left" valign="top">621</td></tr><tr><td align="left" valign="top">W38</td><td align="left" valign="top">11.02</td><td align="left" valign="top">35.77</td><td align="left" valign="top">207</td><td align="left" valign="top">11.59</td><td align="left" valign="top">36.28</td><td align="left" valign="top">584</td><td align="left" valign="top">10.02</td><td align="left" valign="top">42.72</td><td align="left" valign="top">797</td><td align="left" valign="top">12.54</td><td align="left" valign="top">27.26</td><td align="left" valign="top">343</td></tr><tr><td align="left" valign="top">W39</td><td align="left" valign="top">11.44</td><td align="left" valign="top">38.79</td><td align="left" valign="top">269</td><td align="left" valign="top"><italic>7.86</italic></td><td align="left" valign="top">54.24</td><td align="left" valign="top">550</td><td align="left" valign="top">9.84</td><td align="left" valign="top">45.85</td><td align="left" valign="top">805</td><td align="left" valign="top">13.95</td><td align="left" valign="top">19.90</td><td align="left" valign="top">331</td></tr><tr><td align="left" valign="top">W40</td><td align="left" valign="top">10.62</td><td align="left" valign="top">35.43</td><td align="left" valign="top">332</td><td align="left" valign="top">9.58</td><td align="left" valign="top">42.17</td><td align="left" valign="top">409</td><td align="left" valign="top">9.71</td><td align="left" valign="top">40.90</td><td align="left" valign="top">1104</td><td align="left" valign="top">14.88</td><td align="left" valign="top">11.25</td><td align="left" valign="top">221</td></tr><tr><td align="left" valign="top">W41</td><td align="left" valign="top">10.51</td><td align="left" valign="top">44.05</td><td align="left" valign="top">266</td><td align="left" valign="top"><italic>7.62</italic></td><td align="left" valign="top">54.26</td><td align="left" valign="top">356</td><td align="left" valign="top">9.32</td><td align="left" valign="top">48.02</td><td align="left" valign="top">954</td><td align="left" valign="top">11.10</td><td align="left" valign="top">34.85</td><td align="left" valign="top">1022</td></tr><tr><td align="left" valign="top">W42</td><td align="left" valign="top">11.07</td><td align="left" valign="top">44.53</td><td align="left" valign="top">458</td><td align="left" valign="top">9.35</td><td align="left" valign="top">45.18</td><td align="left" valign="top">155</td><td align="left" valign="top">9.04</td><td align="left" valign="top">53.37</td><td align="left" valign="top">2291</td><td align="left" valign="top">11.35</td><td align="left" valign="top">37.16</td><td align="left" valign="top">2792</td></tr><tr><td align="left" valign="top">W43</td><td align="left" valign="top">10.61</td><td align="left" valign="top">43.49</td><td align="left" valign="top">353</td><td align="left" valign="top"><italic>7.74</italic></td><td align="left" valign="top">55.85</td><td align="left" valign="top">335</td><td align="left" valign="top">8.78</td><td align="left" valign="top">53.93</td><td align="left" valign="top">625</td><td align="left" valign="top">12.40</td><td align="left" valign="top">34.65</td><td align="left" valign="top">378</td></tr><tr><td align="left" valign="top">W44</td><td align="left" valign="top"><italic>7.92</italic></td><td align="left" valign="top">55.98</td><td align="left" valign="top">222</td><td align="left" valign="top">9.62</td><td align="left" valign="top">39.96</td><td align="left" valign="top">339</td><td align="left" valign="top">8.09</td><td align="left" valign="top">53.08</td><td align="left" valign="top">1405</td><td align="left" valign="top">14.43</td><td align="left" valign="top">16.29</td><td align="left" valign="top">365</td></tr><tr><td align="left" valign="top">W45</td><td align="left" valign="top">8.71</td><td align="left" valign="top">48.01</td><td align="left" valign="top">259</td><td align="left" valign="top">10.31</td><td align="left" valign="top">36.58</td><td align="left" valign="top">391</td><td align="left" valign="top">9.02</td><td align="left" valign="top">47.50</td><td align="left" valign="top">749</td><td align="left" valign="top">9.19</td><td align="left" valign="top">46.47</td><td align="left" valign="top">677</td></tr><tr><td align="left" valign="top">W46</td><td align="left" valign="top">10.94</td><td align="left" valign="top">29.05</td><td align="left" valign="top">214</td><td align="left" valign="top">10.72</td><td align="left" valign="top">35.64</td><td align="left" valign="top">350</td><td align="left" valign="top">11.52</td><td align="left" valign="top">37.48</td><td align="left" valign="top">1260</td><td align="left" valign="top">13.62</td><td align="left" valign="top">20.52</td><td align="left" valign="top">610</td></tr><tr><td align="left" valign="top">W47</td><td align="left" valign="top">9.82</td><td align="left" valign="top">47.68</td><td align="left" valign="top">314</td><td align="left" valign="top">11.44</td><td align="left" valign="top">33.51</td><td align="left" valign="top">436</td><td align="left" valign="top">8.46</td><td align="left" valign="top">52.97</td><td align="left" valign="top">1454</td><td align="left" valign="top">12.09</td><td align="left" valign="top">34.95</td><td align="left" valign="top">428</td></tr><tr><td align="left" valign="top">W48</td><td align="left" valign="top">9.44</td><td align="left" valign="top">42.08</td><td align="left" valign="top">240</td><td align="left" valign="top">10.15</td><td align="left" valign="top">42.42</td><td align="left" valign="top">385</td><td align="left" valign="top">9.34</td><td align="left" valign="top">50.15</td><td align="left" valign="top">1454</td><td align="left" valign="top">13.98</td><td align="left" valign="top">19.24</td><td align="left" valign="top">485</td></tr><tr><td align="left" valign="top">W49</td><td align="left" valign="top"><italic>6.34</italic></td><td align="left" valign="top">61.81</td><td align="left" valign="top">210</td><td align="left" valign="top">9.54</td><td align="left" valign="top">51.02</td><td align="left" valign="top">450</td><td align="left" valign="top">9.28</td><td align="left" valign="top">45.54</td><td align="left" valign="top">744</td><td align="left" valign="top">11.16</td><td align="left" valign="top">35.22</td><td align="left" valign="top">662</td></tr><tr><td align="left" valign="top">W50</td><td align="left" valign="top">11.44</td><td align="left" valign="top">36.95</td><td align="left" valign="top">180</td><td align="left" valign="top">9.94</td><td align="left" valign="top">42.40</td><td align="left" valign="top">604</td><td align="left" valign="top">10.54</td><td align="left" valign="top">39.41</td><td align="left" valign="top">1294</td><td align="left" valign="top">11.28</td><td align="left" valign="top">36.13</td><td align="left" valign="top">474</td></tr><tr><td align="left" valign="top">W51</td><td align="left" valign="top">10.76</td><td align="left" valign="top">39.57</td><td align="left" valign="top">239</td><td align="left" valign="top">9.73</td><td align="left" valign="top">46.80</td><td align="left" valign="top">442</td><td align="left" valign="top">9.82</td><td align="left" valign="top">45.89</td><td align="left" valign="top">564</td><td align="left" valign="top">11.28</td><td align="left" valign="top">37.34</td><td align="left" valign="top">540</td></tr><tr><td align="left" valign="top">W52</td><td align="left" valign="top">8.66</td><td align="left" valign="top">52.05</td><td align="left" valign="top">253</td><td align="left" valign="top">11.07</td><td align="left" valign="top">39.83</td><td align="left" valign="top">418</td><td align="left" valign="top">10.68</td><td align="left" valign="top">40.98</td><td align="left" valign="top">110</td><td align="left" valign="top">14.48</td><td align="left" valign="top">24.01</td><td align="left" valign="top">387</td></tr><tr><td align="left" valign="top">W53</td><td align="left" valign="top"><italic>6.58</italic></td><td align="left" valign="top">57.13</td><td align="left" valign="top">166</td><td align="left" valign="top">10.09</td><td align="left" valign="top">42.20</td><td align="left" valign="top">380</td><td align="left" valign="top"><italic>7.69</italic></td><td align="left" valign="top">55.60</td><td align="left" valign="top">334</td><td align="left" valign="top">9.68</td><td align="left" valign="top">43.02</td><td align="left" valign="top">287</td></tr><tr><td align="left" valign="top">W54</td><td align="left" valign="top">9.87</td><td align="left" valign="top">47.54</td><td align="left" valign="top">224</td><td align="left" valign="top">8.67</td><td align="left" valign="top">51.23</td><td align="left" valign="top">333</td><td align="left" valign="top">9.65</td><td align="left" valign="top">46.25</td><td align="left" valign="top">2380</td><td align="left" valign="top">12.66</td><td align="left" valign="top">29.66</td><td align="left" valign="top">494</td></tr><tr><td align="left" valign="top">W55</td><td align="left" valign="top"><italic>7.92</italic></td><td align="left" valign="top">52.57</td><td align="left" valign="top">261</td><td align="left" valign="top">9.88</td><td align="left" valign="top">36.97</td><td align="left" valign="top">382</td><td align="left" valign="top">9.84</td><td align="left" valign="top">45.22</td><td align="left" valign="top">867</td><td align="left" valign="top">14.37</td><td align="left" valign="top">18.19</td><td align="left" valign="top">582</td></tr><tr><td align="left" valign="top">W56</td><td align="left" valign="top">11.05</td><td align="left" valign="top">43.55</td><td align="left" valign="top">261</td><td align="left" valign="top">10.36</td><td align="left" valign="top">44.53</td><td align="left" valign="top">414</td><td align="left" valign="top">9.46</td><td align="left" valign="top">46.08</td><td align="left" valign="top">1770</td><td align="left" valign="top">11.45</td><td align="left" valign="top">38.48</td><td align="left" valign="top">797</td></tr><tr><td align="left" valign="top">W57</td><td align="left" valign="top">8.32</td><td align="left" valign="top">50.94</td><td align="left" valign="top">304</td><td align="left" valign="top">10.47</td><td align="left" valign="top">39.10</td><td align="left" valign="top">402</td><td align="left" valign="top">8.74</td><td align="left" valign="top">51.63</td><td align="left" valign="top">899</td><td align="left" valign="top">10.95</td><td align="left" valign="top">31.65</td><td align="left" valign="top">291</td></tr><tr><td align="left" valign="top">W58</td><td align="left" valign="top"><italic>7.89</italic></td><td align="left" valign="top">56.28</td><td align="left" valign="top">340</td><td align="left" valign="top">10.70</td><td align="left" valign="top">41.63</td><td align="left" valign="top">491</td><td align="left" valign="top">8.01</td><td align="left" valign="top">58.11</td><td align="left" valign="top">566</td><td align="left" valign="top">10.83</td><td align="left" valign="top">40.04</td><td align="left" valign="top">348</td></tr><tr><td align="left" valign="top">W59</td><td align="left" valign="top">8.08</td><td align="left" valign="top">47.49</td><td align="left" valign="top">277</td><td align="left" valign="top">11.75</td><td align="left" valign="top">36.88</td><td align="left" valign="top">539</td><td align="left" valign="top">8.29</td><td align="left" valign="top">48.90</td><td align="left" valign="top">1085</td><td align="left" valign="top">10.81</td><td align="left" valign="top">33.20</td><td align="left" valign="top">733</td></tr><tr><td align="left" valign="top">W60</td><td align="left" valign="top">10.33</td><td align="left" valign="top">44.92</td><td align="left" valign="top">294</td><td align="left" valign="top">9.22</td><td align="left" valign="top">49.62</td><td align="left" valign="top">388</td><td align="left" valign="top">9.76</td><td align="left" valign="top">44.55</td><td align="left" valign="top">1659</td><td align="left" valign="top">11.69</td><td align="left" valign="top">31.57</td><td align="left" valign="top">742</td></tr><tr><td align="left" valign="top">Mean (SD)</td><td align="left" valign="top">9.96 (1.52)</td><td align="left" valign="top">45.04 (8.62)</td><td align="left" valign="top">278 (88)</td><td align="left" valign="top">10.60 (1.37)</td><td align="left" valign="top">39.23 (7.45)</td><td align="left" valign="top">749 (94)</td><td align="left" valign="top">8.99 (1.10)</td><td align="left" valign="top">49.00 (6.51)</td><td align="left" valign="top">1040 (743)</td><td align="left" valign="top">11.71 (1.47)</td><td align="left" valign="top">33.72 (8.58)</td><td align="left" valign="top">570 (406)</td></tr><tr><td align="left" valign="top">DIFF<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup> (DIFF_SD<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup>)</td><td align="left" valign="top">1.54 (1.68)</td><td align="left" valign="top">9.13 (8.90)</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup></td><td align="left" valign="top">0.93 (2.06)</td><td align="left" valign="top">4.94 (11.78)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">2.24 (0.98)</td><td align="left" valign="top">13.09 (5.88)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2212;0.47 (1.33)</td><td align="left" valign="top">&#x2212;2.20 (7.15)</td><td align="left" valign="top">&#x2014;</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>WSTF: Wiener Sachtextformel (4th Vienna Formula).</p></fn><fn id="table3fn2"><p><sup>b</sup>FRE: Flesch reading ease.</p></fn><fn id="table3fn3"><p><sup>c</sup>Italic font denotes that the target readability (WSTF&#x2264;8, FRE&#x2265;60) was reached.</p></fn><fn id="table3fn4"><p><sup>d</sup>DIFF: difference.</p></fn><fn id="table3fn5"><p><sup>e</sup>DIFF_SD: SD difference.</p></fn><fn id="table3fn6"><p><sup>f</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap><p>Microsoft Copilot achieved the highest average score of 49.0 (SD 6.51) on the readability metric FRE, while Le Chat came last with 33.72 (SD 8.58). ChatGPT-3.5 generated texts with, on average, the fewest words (278, SD 278 words), while Microsoft Copilot generated texts with the most words (1040, SD 743 words) but still less than the original texts.</p><p>The ChatGPT-based models (ChatGPT-3.5, ChatGPT-4o, and Microsoft Copilot) achieved an average improvement of 1.54 (SD 1.68), 0.93 (SD 2.06), and 2.24 (SD 0.98) grade levels, respectively, for the WSTF.</p><p>ChatGPT-3.5 reached the desired class level of eighth grade for 20 texts; Microsoft Copilot reached this level for half of the texts (see <xref ref-type="table" rid="table3">Table 3</xref> and <xref ref-type="fig" rid="figure1">Figure 1</xref>). Notably, the newer ChatGPT-4o achieved this for only 5 texts.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Distribution of calculated WSTF scores for GPT-3.5, GPT-4o, Microsoft Copilot, and Le Chat. The fifth column shows the distribution of the readability scores of the original texts. The dashed line indicates the recommended readability score of 8. WSTF: Wiener Sachtextformel (4th Vienna Formula).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e77149_fig01.png"/></fig><p>Le Chat did not reach the eighth grade (or lower) for any text. By contrast, the average difference of &#x2212;0.47 indicates that this LLM tends to decrease the readability. This was also reflected in the statistical tests. For both the WSTF and FRE metrics, the hypotheses that the mean readability improved (<inline-formula><mml:math id="ieqn1"><mml:msub><mml:mrow><mml:mi>H</mml:mi></mml:mrow><mml:mrow><mml:mi>W</mml:mi><mml:mi>S</mml:mi><mml:mi>T</mml:mi><mml:mi>F</mml:mi><mml:mo>|</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn2"><mml:msub><mml:mrow><mml:mi>H</mml:mi></mml:mrow><mml:mrow><mml:mi>F</mml:mi><mml:mi>R</mml:mi><mml:mi>E</mml:mi><mml:mo>|</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula>) could not be accepted.</p><p>The FRE scores of the rephrased texts improved for GPT-3.5, GPT-4o, and Microsoft Copilot by 9.13, 4.94, and 13.09, respectively (see <xref ref-type="table" rid="table3">Table 3</xref> and <xref ref-type="fig" rid="figure2">Figure 2</xref>). However, the readability of most of the texts was still low, that is, scores below 60.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Distribution of calculated FRE scores for GPT-3.5, GPT-4o, Microsoft Copilot, and Le Chat. The fifth column shows the distribution of the readability scores of the original texts. The dashed line indicates the recommended readability score of 60. FRE: Flesch reading ease.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e77149_fig02.png"/></fig><p>On average, Le Chat&#x2019;s texts were 2.2 scores inferior to the original texts, in line with the evaluation of the WSTF metric.</p><p>The findings described above are also reflected in the results of the statistical tests: None of the tests for an improvement to the eighth grade level yielded a significant result, that is, alternative hypotheses could not be accepted. However, except for the Le Chat model, it could be shown that the mean readability was improved significantly, that is, the alternative hypotheses could be accepted. In a nutshell, three out of four LLMs achieved a statistically significant readability improvement, yet it was not high enough to have reached the eighth grade level.</p></sec><sec id="s3-3"><title>Accuracy of the Rephrased Health Information Texts</title><p>All LLM answers were screened independently by 3 reviewers. Fleiss &#x03BA; was 0.264, and the percent agreement was 54.6%. This relatively low agreement reflects the difficulty of evaluating medical content without deep domain-level expertise; ideally, assessments would involve medical doctors, and the reliability of the evaluation is further complicated by uncertainty regarding the correctness of the original websites.</p><p>Although not a systematic assessment, several obvious mistakes and LLM hallucinations were discovered: Microsoft Copilot shortened the information about endometrial cancer (W29) into &#x201C;endometrial cancer is the most common cancer among women in Germany&#x201D; (all the following examples are translated versions of the original German health information texts and rephrased LLM answers). From an epidemiological perspective, this claim is incorrect, with breast cancer being the most prevalent type of cancer among women, constituting a nonnegligible change of meaning in the rephrased text.</p><p>The original text about myocarditis (W49) included the sentence: &#x201C;Myocarditis is also considered to be an important cause of sudden cardiac death in athletes,&#x201D; which is difficult to understand for readers and may lead to misinterpretations. This kind of sudden cardiac death occurs during exercise, training, or during a match. This information that is not given in the sentence may just be indicated by using the word &#x201C;athlete.&#x201D; The rephrased sentence also bears this ambiguity and does even increase it: &#x201C;When athletes suddenly die, it is often due to inflammation of the heart muscle.&#x201D; The &#x2018;context&#x2019; of sudden death is omitted.</p><p>Missing context is also noticed if verbatim speech and statements by medical experts were included in the original texts. The selected LLMs reduced these statements into plain text, thereby omitting the source of the information. For example, the article about myocarditis (W49) included an expert statement as follows: &#x201C;You should always go to the doctor if you notice symptoms that you are not aware of, says Dr. Milan Dinic, a cardiologist in private practice from Munich.&#x201D; &#x201C;Particularly in women, any new complaint between the tip of the nose and the navel is usually heart related. You should therefore definitely think about your heart.&#x201D;</p><p>ChatGPT-3.5 rephrased this to &#x201C;You should always see a doctor if you notice any new symptoms. In women in particular, many symptoms can indicate heart disease.&#x201D;</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Results</title><p>The original medical texts extracted from health information websites are, on average, <italic>difficult</italic> (for the FRE metric) or <italic>fairly difficult</italic> (for the WSTF) to read. This means that the original texts use complicated sentence structures and/or complex specialist terminology. Our study showed that LLMs can help improve the readability, especially for the models ChatGPT-3.5 and Microsoft Copilot.</p><p>ChatGPT-3.5 and Microsoft Copilot were able to reduce text. However, the accuracy of the content must be checked by medical experts to make sure that no ambiguous or false statements were introduced. It is well known that LLMs tend to hallucinate [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref64">64</xref>] or &#x201C;escalate the minor biases that could occur in the data bank with which it gets trained&#x201D; [<xref ref-type="bibr" rid="ref35">35</xref>]. Nevertheless, the authors postulate that the process of &#x201C;fact-checking&#x201D; an automatically generated text is more time efficient than manually rewriting medical texts for laypersons. Specialized LLMs or LLMs fine-tuned for medical texts could also be a possible solution to increase the correctness and reliability of generated outputs [<xref ref-type="bibr" rid="ref35">35</xref>] and thus make this text generation process even more time- and cost-efficient.</p><p>The authors found that LLMs moderately increased readability. This is in line with the research by Li [<xref ref-type="bibr" rid="ref32">32</xref>]. For radiology reports, ChatGPT 3 produces texts that improved the FRE by 45.5 points.</p><p>In our analyses, the FRE improvements were 9.13 (ChatGPT-3.5), 4.94 (ChatGPT-4o), and 13.09 (Microsoft Copilot). This might indicate that the rephrasing of texts works better for texts originally written in English. In addition, Srinivasan et al [<xref ref-type="bibr" rid="ref34">34</xref>] report FRE improvements in a similar range for GPT-3.5 (16.07) and for GPT-4o (5.4).</p></sec><sec id="s4-2"><title>Limitations</title><p>As the aims of the study were to reflect the experience of an average layperson seeking health information online, no advanced prompt optimization techniques were investigated. While more robust prompts might yield different results, the authors consider it unlikely that nonexpert users would engage in systematic prompt tuning. In addition, reproducibility is hindered by the fact that laypersons won&#x2019;t experiment with LLM model parameters such as <italic>temperature</italic>. Moreover, tuning model parameters over the chat interfaces isn&#x2019;t possible in all cases and requires API access. In this context, the authors assume that a high fraction of laypersons do not have the necessary technical background to experiment with LLM APIs and related programming languages.</p><p>Additionally, the exact model version of the LLMs used in this study are no longer publicly available. Hence, as in most LLM-based studies, both the selected LLMs and the examined website texts are snapshots in time. The LLM field is evolving rapidly, and reproducibility of the results is difficult.</p><p>Another aspect is that the texts taken from the websites may also change over time. The appearance and formatting of the individual web pages were deliberately not considered in this work: Only raw text material was extracted. However, aesthetic and design features or educational multimedia can influence the understandability of information material.</p><p>No dedicated <italic>German</italic> LLM was used in this study. It would be interesting to replicate this study with a fine-tuned German LLM. In 2024, the LLM community has a strong focus on English training data and models, and the performance is lower for other languages [<xref ref-type="bibr" rid="ref36">36</xref>]. Heilmeyer et al [<xref ref-type="bibr" rid="ref18">18</xref>] noted that specific, on-premise trained German models performed better. However, typical patients or citizens seeking health information will neither have the technical skills or knowledge nor the specialized hardware available to do this on their own.</p><p>The systematic evaluation of the (medical) accuracy of rephrased PEMs was beyond the study&#x2019;s scope, but future interdisciplinary research involving medical experts could address this. Moreover, a follow-up study could more deeply investigate the readability and correctness from a technical point of view by using APIs instead of relying on publicly available chat interfaces. In this context, more recent LLMs could be benchmarked with the same quality-controlled set of text material in an end-to-end evaluation pipeline.</p></sec><sec id="s4-3"><title>Comparison With Prior Work</title><p>If LLMs were used to answer patient-centric questions about hip arthroplasty, Mika et al [<xref ref-type="bibr" rid="ref31">31</xref>] report that patients would be able to understand them. However, they do not calculate a readability metric for the given answers and instead rely on a &#x201C;Response Rating System.&#x201D; In contrast, Eng et al [<xref ref-type="bibr" rid="ref29">29</xref>] results confirm the low readability of answers for patient-centric questions.</p><p>Compared to the works by [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref65">65</xref>], this study covered a broader spectrum of medical domains: Cancer, cardiovascular conditions, public health topics, etc.</p><p>Similar improvements in terms of readability were found by Ovelman et al [<xref ref-type="bibr" rid="ref66">66</xref>]: They used Claude 2 LLM to create plain language summaries of 10 evidence reviews. The covered topics range from vaccines, prehospital airway management, and malnutrition in hospitalized adults to breast irradiation for breast cancer. For half of their texts, the recommended sixth to eighth grade reading level was met by the generated summaries.</p><p>Lyu et al [<xref ref-type="bibr" rid="ref65">65</xref>] did not measure the quality of the rephrased reports with readability scores but let them be evaluated by experts. In addition, they found that the effect of prompt engineering was not considered high: &#x201C;All of the five further-modified prompts were found to produce results similar to those of the original prompt and far worse than those of the optimized prompt&#x201D;.</p><p>This study differs from the previously presented evaluations. Here, only German health information texts were rephrased by LLMs and their readability evaluated.</p></sec><sec id="s4-4"><title>Innovation</title><p>Citizens and patients have been using the Internet for health information seeking for almost two decades. Today, they increasingly consult LLMs in everyday situations: for answers to specific medical questions or for explanations of complex medical texts. This study investigates whether and how LLMs improve the readability of German online medical texts. To the authors&#x2019; knowledge, this is the first evaluation of readability metrics for German LLM-rephrased text and original medical text.</p><p>Shifting from the perspective of citizens and patients to health professionals or institutions: The use of an LLM could be a time-saving and cost-effective tool to fine-tune their information leaflets, online texts, etc to meet different information needs. The study showed that LLMs are already able to moderately improve readability.</p></sec><sec id="s4-5"><title>Conclusions</title><p>The use of LLMs can improve the readability of PEMs in the German language but requires careful expert review to ensure accuracy and completeness of medical information. The improvement is rather moderate, averaging 2&#x2010;3 school grades (for the WSTF). Still, this improvement can support patients reading PEMs online.</p><p>The selection of the LLM seemed critical to achieve good results, whereas a prompt seemed to be less of an influencing factor.</p><p>Some rephrased texts conveyed incorrect messages or took statements out of context. This is a serious risk, especially for medical texts. Therefore, a manual check is still needed and advised when using LLMs in similar scenarios.</p></sec></sec></body><back><notes><sec><title>Data Availability</title><p>The data of this study are available upon reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: MP</p><p>Data curation: AM</p><p>Formal analysis: AM</p><p>Investigation: AM</p><p>Methodology: AM, RZ, MW</p><p>Supervision: MP</p><p>Validation: RZ</p><p>Visualization: MP</p><p>Writing &#x2013; original draft: MP</p><p>Writing &#x2013; review &#x0026; editing: AM, RZ, MW</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">API</term><def><p>application programming interface</p></def></def-item><def-item><term id="abb3">FRE</term><def><p>Flesch reading ease</p></def></def-item><def-item><term id="abb4">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb5">PEM</term><def><p>patient education material</p></def></def-item><def-item><term id="abb6">WSTF</term><def><p>Wiener Sachtextformel (4th Vienna Formula)</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jacobs</surname><given-names>W</given-names> </name><name name-style="western"><surname>Amuta</surname><given-names>AO</given-names> </name><name name-style="western"><surname>Jeon</surname><given-names>KC</given-names> </name></person-group><article-title>Health information seeking in the digital age: an analysis of health information seeking behavior among US adults</article-title><source>Cogent Soc Sci</source><year>2017</year><month>01</month><day>1</day><volume>3</volume><issue>1</issue><fpage>1302785</fpage><pub-id pub-id-type="doi">10.1080/23311886.2017.1302785</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alpay</surname><given-names>L</given-names> </name><name name-style="western"><surname>Verhoef</surname><given-names>J</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>B</given-names> </name><name name-style="western"><surname>Te&#x2019;eni</surname><given-names>D</given-names> </name><name name-style="western"><surname>Zwetsloot-Schonk</surname><given-names>JHM</given-names> </name></person-group><article-title>Current challenge in consumer health informatics: bridging the gap between access to information and information understanding</article-title><source>Biomed Inform Insights</source><year>2009</year><month>01</month><day>1</day><volume>2</volume><issue>1</issue><fpage>1</fpage><lpage>10</lpage><pub-id pub-id-type="doi">10.4137/bii.s2223</pub-id><pub-id pub-id-type="medline">20419038</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zowalla</surname><given-names>R</given-names> </name><name name-style="western"><surname>Pobiruchin</surname><given-names>M</given-names> </name><name name-style="western"><surname>Wiesner</surname><given-names>M</given-names> </name></person-group><article-title>Analyzing the readability of health information booklets on cardiovascular diseases</article-title><source>Stud Health Technol Inform</source><year>2018</year><volume>253</volume><fpage>16</fpage><lpage>20</lpage><pub-id pub-id-type="medline">30147031</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Basch</surname><given-names>CH</given-names> </name><name name-style="western"><surname>Fera</surname><given-names>J</given-names> </name><name name-style="western"><surname>Garcia</surname><given-names>P</given-names> </name></person-group><article-title>Readability of influenza information online: implications for consumer health</article-title><source>Am J Infect Control</source><year>2019</year><month>11</month><volume>47</volume><issue>11</issue><fpage>1298</fpage><lpage>1301</lpage><pub-id pub-id-type="doi">10.1016/j.ajic.2019.04.178</pub-id><pub-id pub-id-type="medline">31253552</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yeung</surname><given-names>AWK</given-names> </name><name name-style="western"><surname>Wochele-Thoma</surname><given-names>T</given-names> </name><name name-style="western"><surname>Eibensteiner</surname><given-names>F</given-names> </name><etal/></person-group><article-title>Official websites providing information on COVID-19 vaccination: readability and content analysis</article-title><source>JMIR Public Health Surveill</source><year>2022</year><month>03</month><day>15</day><volume>8</volume><issue>3</issue><fpage>e34003</fpage><pub-id pub-id-type="doi">10.2196/34003</pub-id><pub-id pub-id-type="medline">35073276</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Silva</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Santos</surname><given-names>P</given-names> </name></person-group><article-title>The impact of health literacy on knowledge and attitudes towards preventive strategies against COVID-19: a cross-sectional study</article-title><source>Int J Environ Res Public Health</source><year>2021</year><month>05</month><day>19</day><volume>18</volume><issue>10</issue><fpage>5421</fpage><pub-id pub-id-type="doi">10.3390/ijerph18105421</pub-id><pub-id pub-id-type="medline">34069438</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McCaffery</surname><given-names>KJ</given-names> </name><name name-style="western"><surname>Dodd</surname><given-names>RH</given-names> </name><name name-style="western"><surname>Cvejic</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Health literacy and disparities in COVID-19-related knowledge, attitudes, beliefs and behaviours in Australia</article-title><source>Public Health Res Pract</source><year>2020</year><month>12</month><day>9</day><volume>30</volume><issue>4</issue><fpage>30342012</fpage><pub-id pub-id-type="doi">10.17061/phrp30342012</pub-id><pub-id pub-id-type="medline">33294907</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zowalla</surname><given-names>R</given-names> </name><name name-style="western"><surname>Pfeifer</surname><given-names>D</given-names> </name><name name-style="western"><surname>Wetter</surname><given-names>T</given-names> </name></person-group><article-title>Readability and topics of the German health web: exploratory study and text analysis</article-title><source>PLOS ONE</source><year>2023</year><volume>18</volume><issue>2</issue><fpage>e0281582</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0281582</pub-id><pub-id pub-id-type="medline">36763573</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rooney</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Santiago</surname><given-names>G</given-names> </name><name name-style="western"><surname>Perni</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Readability of patient education materials from high-impact medical journals: a 20-year analysis</article-title><source>J Patient Exp</source><year>2021</year><volume>8</volume><fpage>2374373521998847</fpage><pub-id pub-id-type="doi">10.1177/2374373521998847</pub-id><pub-id pub-id-type="medline">34179407</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gordejeva</surname><given-names>J</given-names> </name><name name-style="western"><surname>Zowalla</surname><given-names>R</given-names> </name><name name-style="western"><surname>Pobiruchin</surname><given-names>M</given-names> </name><name name-style="western"><surname>Wiesner</surname><given-names>M</given-names> </name></person-group><article-title>Readability of English, German, and Russian disease-related Wikipedia pages: automated computational analysis</article-title><source>J Med Internet Res</source><year>2022</year><month>05</month><day>16</day><volume>24</volume><issue>5</issue><fpage>e36835</fpage><pub-id pub-id-type="doi">10.2196/36835</pub-id><pub-id pub-id-type="medline">35576562</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Neves</surname><given-names>MP</given-names> </name><name name-style="western"><surname>De Almeida</surname><given-names>AB</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Sousa Antunes</surname><given-names>H</given-names> </name><name name-style="western"><surname>Freitas</surname><given-names>PM</given-names> </name><name name-style="western"><surname>Oliveira</surname><given-names>AL</given-names> </name><name name-style="western"><surname>Martins Pereira</surname><given-names>C</given-names> </name><name name-style="western"><surname>Vaz de Sequeira</surname><given-names>E</given-names> </name></person-group><article-title>Before and beyond artificial intelligence: opportunities and challenges</article-title><source>Multidisciplinary Perspectives on Artificial Intelligence and the Law</source><year>2024</year><publisher-name>Springer International Publishing</publisher-name><fpage>107</fpage><lpage>125</lpage><pub-id pub-id-type="doi">10.1007/978-3-031-41264-6_6</pub-id><pub-id pub-id-type="other">978-3-031-41263-9</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Singhal</surname><given-names>K</given-names> </name><name name-style="western"><surname>Azizi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Tu</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Large language models encode clinical knowledge</article-title><source>Nature New Biol</source><year>2023</year><month>08</month><day>3</day><volume>620</volume><fpage>172</fpage><lpage>180</lpage><pub-id pub-id-type="doi">10.1038/s41586-023-06291-2</pub-id><pub-id pub-id-type="medline">37438534</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>R</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>TF</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Thirunavukarasu</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>DSW</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>N</given-names> </name></person-group><article-title>Large language models in health care: development, applications, and challenges</article-title><source>Health Care Sci</source><year>2023</year><month>08</month><volume>2</volume><issue>4</issue><fpage>255</fpage><lpage>263</lpage><pub-id pub-id-type="doi">10.1002/hcs2.61</pub-id><pub-id pub-id-type="medline">38939520</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lois</surname><given-names>A</given-names> </name><name name-style="western"><surname>Yates</surname><given-names>R</given-names> </name><name name-style="western"><surname>Ivy</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Accuracy of natural language processors for patients seeking inguinal hernia information</article-title><source>Surg Endosc</source><year>2024</year><month>12</month><volume>38</volume><issue>12</issue><fpage>7409</fpage><lpage>7415</lpage><pub-id pub-id-type="doi">10.1007/s00464-024-11221-y</pub-id><pub-id pub-id-type="medline">39443381</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Denecke</surname><given-names>K</given-names> </name><name name-style="western"><surname>May</surname><given-names>R</given-names> </name><name name-style="western"><surname>Rivera Romero</surname><given-names>O</given-names> </name><collab>LLMHealthGroup</collab></person-group><article-title>Potential of large language models in health care: Delphi study</article-title><source>J Med Internet Res</source><year>2024</year><month>05</month><day>13</day><volume>26</volume><fpage>e52399</fpage><pub-id pub-id-type="doi">10.2196/52399</pub-id><pub-id pub-id-type="medline">38739445</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Spotnitz</surname><given-names>M</given-names> </name><name name-style="western"><surname>Idnay</surname><given-names>B</given-names> </name><name name-style="western"><surname>Gordon</surname><given-names>ER</given-names> </name><etal/></person-group><article-title>A survey of clinicians&#x2019; views of the utility of large language models</article-title><source>Appl Clin Inform</source><year>2024</year><month>03</month><volume>15</volume><issue>2</issue><fpage>306</fpage><lpage>312</lpage><pub-id pub-id-type="doi">10.1055/a-2281-7092</pub-id><pub-id pub-id-type="medline">38442909</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tepe</surname><given-names>M</given-names> </name><name name-style="western"><surname>Emekli</surname><given-names>E</given-names> </name></person-group><article-title>Decoding medical jargon: the use of AI language models (ChatGPT-4, BARD, Microsoft Copilot) in radiology reports</article-title><source>Patient Educ Couns</source><year>2024</year><month>09</month><volume>126</volume><fpage>108307</fpage><pub-id pub-id-type="doi">10.1016/j.pec.2024.108307</pub-id><pub-id pub-id-type="medline">38743965</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Heilmeyer</surname><given-names>F</given-names> </name><name name-style="western"><surname>B&#x00F6;hringer</surname><given-names>D</given-names> </name><name name-style="western"><surname>Reinhard</surname><given-names>T</given-names> </name><name name-style="western"><surname>Arens</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lyssenko</surname><given-names>L</given-names> </name><name name-style="western"><surname>Haverkamp</surname><given-names>C</given-names> </name></person-group><article-title>Viability of open large language models for clinical documentation in German health care: real-world model evaluation study</article-title><source>JMIR Med Inform</source><year>2024</year><month>08</month><day>28</day><volume>12</volume><fpage>e59617</fpage><pub-id pub-id-type="doi">10.2196/59617</pub-id><pub-id pub-id-type="medline">39195570</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chow</surname><given-names>JCL</given-names> </name><name name-style="western"><surname>Sanders</surname><given-names>L</given-names> </name><name name-style="western"><surname>Li</surname><given-names>K</given-names> </name></person-group><article-title>Impact of ChatGPT on medical chatbots as a disruptive technology</article-title><source>Front Artif Intell</source><year>2023</year><volume>6</volume><fpage>1166014</fpage><pub-id pub-id-type="doi">10.3389/frai.2023.1166014</pub-id><pub-id pub-id-type="medline">37091303</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Swisher</surname><given-names>AR</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>AW</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>GC</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Carle</surname><given-names>TR</given-names> </name><name name-style="western"><surname>Tang</surname><given-names>DM</given-names> </name></person-group><article-title>Enhancing health literacy: evaluating the readability of patient handouts revised by ChatGPT&#x2019;s large language model</article-title><source>Otolaryngol Head Neck Surg</source><year>2024</year><month>12</month><volume>171</volume><issue>6</issue><fpage>1751</fpage><lpage>1757</lpage><pub-id pub-id-type="doi">10.1002/ohn.927</pub-id><pub-id pub-id-type="medline">39105460</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Behers</surname><given-names>BJ</given-names> </name><name name-style="western"><surname>Vargas</surname><given-names>IA</given-names> </name><name name-style="western"><surname>Behers</surname><given-names>BM</given-names> </name><etal/></person-group><article-title>Assessing the readability of patient education materials on cardiac catheterization from artificial intelligence chatbots: an observational cross-sectional study</article-title><source>Cureus</source><year>2024</year><month>07</month><volume>16</volume><issue>7</issue><fpage>e63865</fpage><pub-id pub-id-type="doi">10.7759/cureus.63865</pub-id><pub-id pub-id-type="medline">39099896</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pompili</surname><given-names>D</given-names> </name><name name-style="western"><surname>Richa</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Collins</surname><given-names>P</given-names> </name><name name-style="western"><surname>Richards</surname><given-names>H</given-names> </name><name name-style="western"><surname>Hennessey</surname><given-names>DB</given-names> </name></person-group><article-title>Using artificial intelligence to generate medical literature for urology patients: a comparison of three different large language models</article-title><source>World J Urol</source><year>2024</year><month>07</month><day>29</day><volume>42</volume><issue>1</issue><fpage>455</fpage><pub-id pub-id-type="doi">10.1007/s00345-024-05146-3</pub-id><pub-id pub-id-type="medline">39073590</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Burns</surname><given-names>C</given-names> </name><name name-style="western"><surname>Bakaj</surname><given-names>A</given-names> </name><name name-style="western"><surname>Berishaj</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hristidis</surname><given-names>V</given-names> </name><name name-style="western"><surname>Deak</surname><given-names>P</given-names> </name><name name-style="western"><surname>Equils</surname><given-names>O</given-names> </name></person-group><article-title>Use of generative AI for improving health literacy in reproductive health: case study</article-title><source>JMIR Form Res</source><year>2024</year><month>08</month><day>6</day><volume>8</volume><fpage>e59434</fpage><pub-id pub-id-type="doi">10.2196/59434</pub-id><pub-id pub-id-type="medline">38986153</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Roster</surname><given-names>K</given-names> </name><name name-style="western"><surname>Kann</surname><given-names>RB</given-names> </name><name name-style="western"><surname>Farabi</surname><given-names>B</given-names> </name><name name-style="western"><surname>Gronbeck</surname><given-names>C</given-names> </name><name name-style="western"><surname>Brownstone</surname><given-names>N</given-names> </name><name name-style="western"><surname>Lipner</surname><given-names>SR</given-names> </name></person-group><article-title>Readability and health literacy scores for ChatGPT-generated dermatology public education materials: cross-sectional analysis of sunscreen and Melanoma questions</article-title><source>JMIR Dermatol</source><year>2024</year><month>03</month><day>6</day><volume>7</volume><fpage>e50163</fpage><pub-id pub-id-type="doi">10.2196/50163</pub-id><pub-id pub-id-type="medline">38446502</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rouhi</surname><given-names>AD</given-names> </name><name name-style="western"><surname>Ghanem</surname><given-names>YK</given-names> </name><name name-style="western"><surname>Yolchieva</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Can artificial intelligence improve the readability of patient education materials on aortic stenosis? A pilot study</article-title><source>Cardiol Ther</source><year>2024</year><month>03</month><volume>13</volume><issue>1</issue><fpage>137</fpage><lpage>147</lpage><pub-id pub-id-type="doi">10.1007/s40119-023-00347-0</pub-id><pub-id pub-id-type="medline">38194058</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shah</surname><given-names>YB</given-names> </name><name name-style="western"><surname>Ghosh</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hochberg</surname><given-names>AR</given-names> </name><etal/></person-group><article-title>Comparison of ChatGPT and traditional patient education materials for men&#x2019;s health</article-title><source>Urol Pract</source><year>2024</year><month>01</month><volume>11</volume><issue>1</issue><fpage>87</fpage><lpage>94</lpage><pub-id pub-id-type="doi">10.1097/UPJ.0000000000000490</pub-id><pub-id pub-id-type="medline">37914380</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Halawani</surname><given-names>A</given-names> </name><name name-style="western"><surname>Almehmadi</surname><given-names>SG</given-names> </name><name name-style="western"><surname>Alhubaishy</surname><given-names>BA</given-names> </name><name name-style="western"><surname>Alnefaie</surname><given-names>ZA</given-names> </name><name name-style="western"><surname>Hasan</surname><given-names>MN</given-names> </name></person-group><article-title>Empowering patients: how accurate and readable are large language models in renal cancer education</article-title><source>Front Oncol</source><year>2024</year><volume>14</volume><fpage>1457516</fpage><pub-id pub-id-type="doi">10.3389/fonc.2024.1457516</pub-id><pub-id pub-id-type="medline">39391252</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guerra</surname><given-names>GA</given-names> </name><name name-style="western"><surname>Grove</surname><given-names>S</given-names> </name><name name-style="western"><surname>Le</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Artificial intelligence as a modality to enhance the readability of neurosurgical literature for patients</article-title><source>J Neurosurg</source><year>2025</year><month>04</month><day>1</day><volume>142</volume><issue>4</issue><fpage>1189</fpage><lpage>1195</lpage><pub-id pub-id-type="doi">10.3171/2024.6.JNS24617</pub-id><pub-id pub-id-type="medline">39504543</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eng</surname><given-names>E</given-names> </name><name name-style="western"><surname>Mowers</surname><given-names>C</given-names> </name><name name-style="western"><surname>Sachdev</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Chat generative pre-trained transformer (ChatGPT) &#x2013; 3.5 responses require advanced readability for the general population and may not effectively supplement patient-related information provided by the treating surgeon regarding common questions about rotator cuff repair</article-title><source>Arthroscopy</source><year>2025</year><month>01</month><volume>41</volume><issue>1</issue><fpage>42</fpage><lpage>52</lpage><pub-id pub-id-type="doi">10.1016/j.arthro.2024.05.009</pub-id><pub-id pub-id-type="medline">38777000</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Charnock</surname><given-names>D</given-names> </name><name name-style="western"><surname>Shepperd</surname><given-names>S</given-names> </name><name name-style="western"><surname>Needham</surname><given-names>G</given-names> </name><name name-style="western"><surname>Gann</surname><given-names>R</given-names> </name></person-group><article-title>DISCERN: an instrument for judging the quality of written consumer health information on treatment choices</article-title><source>J Epidemiol Community Health</source><year>1999</year><month>02</month><volume>53</volume><issue>2</issue><fpage>105</fpage><lpage>111</lpage><pub-id pub-id-type="doi">10.1136/jech.53.2.105</pub-id><pub-id pub-id-type="medline">10396471</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mika</surname><given-names>AP</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Engstrom</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Polkowski</surname><given-names>GG</given-names> </name><name name-style="western"><surname>Wilson</surname><given-names>JM</given-names> </name></person-group><article-title>Assessing ChatGPT responses to common patient questions regarding total hip arthroplasty</article-title><source>J Bone Joint Surg Am</source><year>2023</year><month>10</month><day>4</day><volume>105</volume><issue>19</issue><fpage>1519</fpage><lpage>1526</lpage><pub-id pub-id-type="doi">10.2106/JBJS.23.00209</pub-id><pub-id pub-id-type="medline">37459402</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>H</given-names> </name><name name-style="western"><surname>Moon</surname><given-names>JT</given-names> </name><name name-style="western"><surname>Iyer</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Decoding radiology reports: potential application of OpenAI ChatGPT to enhance patient understanding of diagnostic reports</article-title><source>Clin Imaging</source><year>2023</year><month>09</month><volume>101</volume><fpage>137</fpage><lpage>141</lpage><pub-id pub-id-type="doi">10.1016/j.clinimag.2023.06.008</pub-id><pub-id pub-id-type="medline">37336169</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moons</surname><given-names>P</given-names> </name><name name-style="western"><surname>Van Bulck</surname><given-names>L</given-names> </name></person-group><article-title>Using ChatGPT and Google Bard to improve the readability of written patient information: a proof of concept</article-title><source>Eur J Cardiovasc Nurs</source><year>2024</year><month>03</month><day>12</day><volume>23</volume><issue>2</issue><fpage>122</fpage><lpage>126</lpage><pub-id pub-id-type="doi">10.1093/eurjcn/zvad087</pub-id><pub-id pub-id-type="medline">37603843</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Srinivasan</surname><given-names>N</given-names> </name><name name-style="western"><surname>Samaan</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Rajeev</surname><given-names>ND</given-names> </name><name name-style="western"><surname>Kanu</surname><given-names>MU</given-names> </name><name name-style="western"><surname>Yeo</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Samakar</surname><given-names>K</given-names> </name></person-group><article-title>Large language models and bariatric surgery patient education: a comparative readability analysis of GPT-3.5, GPT-4, Bard, and online institutional resources</article-title><source>Surg Endosc</source><year>2024</year><month>05</month><volume>38</volume><issue>5</issue><fpage>2522</fpage><lpage>2532</lpage><pub-id pub-id-type="doi">10.1007/s00464-024-10720-2</pub-id><pub-id pub-id-type="medline">38472531</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pal</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bhattacharya</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>SS</given-names> </name><name name-style="western"><surname>Chakraborty</surname><given-names>C</given-names> </name></person-group><article-title>A domain-specific next-generation large language model (LLM) or ChatGPT is required for biomedical engineering and research</article-title><source>Ann Biomed Eng</source><year>2024</year><month>03</month><volume>52</volume><issue>3</issue><fpage>451</fpage><lpage>454</lpage><pub-id pub-id-type="doi">10.1007/s10439-023-03306-x</pub-id><pub-id pub-id-type="medline">37428337</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alonso</surname><given-names>I</given-names> </name><name name-style="western"><surname>Oronoz</surname><given-names>M</given-names> </name><name name-style="western"><surname>Agerri</surname><given-names>R</given-names> </name></person-group><article-title>MedExpQA: multilingual benchmarking of large language models for medical question answering</article-title><source>Artif Intell Med</source><year>2024</year><month>09</month><volume>155</volume><fpage>102938</fpage><pub-id pub-id-type="doi">10.1016/j.artmed.2024.102938</pub-id><pub-id pub-id-type="medline">39121544</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Weiss</surname><given-names>BD</given-names> </name></person-group><source>Health Literacy - A Manual for Clinicians</source><year>2003</year><access-date>2024-11-08</access-date><publisher-name>American Medical Association Foundation and American Medical Association</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="http://lib.ncfh.org/pdfs/6617.pdf">http://lib.ncfh.org/pdfs/6617.pdf</ext-link></comment><pub-id pub-id-type="other">1-57947-502-7</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="web"><article-title>How to write easy-to-read health materials</article-title><source>US National Library of Medicine</source><year>2017</year><access-date>2024-11-08</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://webcitation.org/6zBeCFhPU">https://webcitation.org/6zBeCFhPU</ext-link></comment></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>FLESCH</surname><given-names>R</given-names> </name></person-group><article-title>A new readability yardstick</article-title><source>J Appl Psychol</source><year>1948</year><month>06</month><volume>32</volume><issue>3</issue><fpage>221</fpage><lpage>233</lpage><pub-id pub-id-type="doi">10.1037/h0057532</pub-id><pub-id pub-id-type="medline">18867058</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Amstad</surname><given-names>T</given-names> </name></person-group><source>Wie Verst&#x00E4;ndlich Sind Unsere Zeitungen</source><year>1978</year><access-date>2025-12-20</access-date><publisher-name>Studenten-Schreib-Service</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://books.google.co.in/books/about/Wie_verst%C3%A4ndlich_sind_unsere_Zeitungen.html?id=kiI7vwEACAAJ&#x0026;redir_esc=y">https://books.google.co.in/books/about/Wie_verst%C3%A4ndlich_sind_unsere_Zeitungen.html?id=kiI7vwEACAAJ&#x0026;redir_esc=y</ext-link></comment></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Bamberger</surname><given-names>R</given-names> </name><name name-style="western"><surname>Vanecek</surname><given-names>E</given-names> </name></person-group><source>Lesen - Verstehen - Lernen - Schreiben: Die Schwierigkeitsstufen von Texten in Deutscher Sprache [Book in German]</source><year>1984</year><access-date>2025-12-20</access-date><publisher-name>Jugend und Volk</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://search.worldcat.org/fr/title/lesen-verstehen-lernen-schreiben-die-schwierigkeitsstufen-von-texten-in-deutscher-sprache/oclc/12137245">https://search.worldcat.org/fr/title/lesen-verstehen-lernen-schreiben-die-schwierigkeitsstufen-von-texten-in-deutscher-sprache/oclc/12137245</ext-link></comment><pub-id pub-id-type="other">9783224152502</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wiesner</surname><given-names>M</given-names> </name><name name-style="western"><surname>Zowalla</surname><given-names>R</given-names> </name><name name-style="western"><surname>Pobiruchin</surname><given-names>M</given-names> </name></person-group><article-title>The difficulty of German information booklets on psoriasis and psoriatic arthritis: automated readability and vocabulary analysis</article-title><source>JMIR Dermatol</source><volume>3</volume><issue>1</issue><fpage>e16095</fpage><pub-id pub-id-type="doi">10.2196/16095</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="web"><article-title>About GitHub</article-title><source>GitHub</source><year>2024</year><access-date>2024-11-07</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/about">https://github.com/about</ext-link></comment></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="web"><source>Hugging Face</source><year>2024</year><access-date>2025-12-20</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://huggingface.co">https://huggingface.co</ext-link></comment></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="web"><article-title>GPT-4 is OpenAI&#x2019;s most advanced system, producing safer and more useful responses</article-title><source>OpenAI</source><year>2024</year><access-date>2024-10-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://openai.com/index/gpt-4/">https://openai.com/index/gpt-4/</ext-link></comment></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="web"><source>Gemini</source><year>2024</year><access-date>2024-10-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://gemini.google.com">https://gemini.google.com</ext-link></comment></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Devlin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>MW</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>K</given-names> </name><name name-style="western"><surname>Toutanova</surname><given-names>K</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Burstein</surname><given-names>J</given-names> </name><name name-style="western"><surname>Doran</surname><given-names>C</given-names> </name><name name-style="western"><surname>Solorio</surname><given-names>T</given-names> </name></person-group><article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title><source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</source><year>2019</year><publisher-name>Association for Computational Linguistics</publisher-name><fpage>4171</fpage><lpage>4186</lpage><comment><ext-link ext-link-type="uri" xlink:href="http://aclweb.org/anthology/N19-1">http://aclweb.org/anthology/N19-1</ext-link></comment><pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="web"><article-title>Llama 2: open source, free for research and commercial use</article-title><source>Llama</source><year>2024</year><access-date>2024-10-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.llama.com/llama2">https://www.llama.com/llama2</ext-link></comment></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="web"><article-title>Claude 2</article-title><source>Anthropic</source><year>2023</year><month>07</month><day>11</day><access-date>2024-10-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.anthropic.com/news/claude-2">https://www.anthropic.com/news/claude-2</ext-link></comment></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Raffel</surname><given-names>C</given-names> </name><name name-style="western"><surname>Shazeer</surname><given-names>N</given-names> </name><name name-style="western"><surname>Roberts</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Exploring the limits of transfer learning with a unified text-to-text transformer</article-title><source>J Mach Learn Res</source><year>2020</year><month>01</month><access-date>2025-12-20</access-date><volume>21</volume><issue>1</issue><fpage>5485</fpage><lpage>5551</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://dl.acm.org/doi/10.5555/3455716.3455856">https://dl.acm.org/doi/10.5555/3455716.3455856</ext-link></comment></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="web"><article-title>BigScience large open-science open-access multilingual language model</article-title><source>Hugging Face</source><year>2024</year><access-date>2024-10-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://huggingface.co/bigscience/bloom">https://huggingface.co/bigscience/bloom</ext-link></comment></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="web"><article-title>Copilot</article-title><source>Microsoft</source><year>2024</year><access-date>2024-10-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.microsoft.com/en-US/microsoft-copilot/personal-ai-assistant">https://www.microsoft.com/en-US/microsoft-copilot/personal-ai-assistant</ext-link></comment></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="web"><source>Falcon LLM</source><year>2024</year><access-date>2024-10-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://falconllm.tii.ae/">https://falconllm.tii.ae/</ext-link></comment></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="web"><source>Mistral AI</source><year>2024</year><access-date>2024-10-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://mistral.ai/">https://mistral.ai/</ext-link></comment></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Uhlig</surname><given-names>M</given-names> </name></person-group><article-title>DRXD1000/Phoenix-7B</article-title><source>Hugging Face</source><year>2024</year><access-date>2024-10-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://huggingface.co/DRXD1000/Phoenix">https://huggingface.co/DRXD1000/Phoenix</ext-link></comment></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Pl&#x00FC;ster</surname><given-names>B</given-names> </name><name name-style="western"><surname>Schuhmann</surname><given-names>C</given-names> </name></person-group><article-title>LeoLM/leo-hessianai-13b</article-title><source>Hugging Face</source><year>2024</year><access-date>2024-10-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://huggingface.co/LeoLM/leo-hessianai-13b">https://huggingface.co/LeoLM/leo-hessianai-13b</ext-link></comment></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Han</surname><given-names>T</given-names> </name><name name-style="western"><surname>Adams</surname><given-names>LC</given-names> </name><name name-style="western"><surname>Papaioannou</surname><given-names>JM</given-names> </name><etal/></person-group><article-title>MedAlpaca&#x2013;an open-source collection of medical conversational AI models and training data</article-title><source>ArXiv</source><comment>Preprint posted online on  Apr 14, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2304.08247</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="web"><article-title>stanford-crfm/BioMedLM</article-title><source>Hugging Face</source><year>2024</year><access-date>2024-10-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://huggingface.co/stanford-crfm/BioMedLM">https://huggingface.co/stanford-crfm/BioMedLM</ext-link></comment></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Petroni</surname><given-names>F</given-names> </name><name name-style="western"><surname>Rockt&#x00E4;schel</surname><given-names>T</given-names> </name><name name-style="western"><surname>Riedel</surname><given-names>S</given-names> </name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Wan</surname><given-names>X</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ng</surname><given-names>V</given-names> </name><name name-style="western"><surname>Wan</surname><given-names>X</given-names> </name></person-group><article-title>Language models as knowledge bases?</article-title><source>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</source><year>2019</year><publisher-name>Association for Computational Linguistics</publisher-name><fpage>2463</fpage><lpage>2473</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.aclweb.org/anthology/D19-1">https://www.aclweb.org/anthology/D19-1</ext-link></comment><pub-id pub-id-type="doi">10.18653/v1/D19-1250</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Wei</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Schuurmans</surname><given-names>D</given-names> </name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Koyejo</surname><given-names>S</given-names> </name><name name-style="western"><surname>Mohamed</surname><given-names>S</given-names> </name><name name-style="western"><surname>Agarwal</surname><given-names>A</given-names> </name><name name-style="western"><surname>Belgrave</surname><given-names>D</given-names> </name><name name-style="western"><surname>Cho</surname><given-names>K</given-names> </name><name name-style="western"><surname>Oh</surname><given-names>A</given-names> </name></person-group><article-title>Chain-of-thought prompting elicits reasoning in large language models</article-title><access-date>2025-12-20</access-date><conf-name>NIPS&#x2019;22: Proceedings of the 36th International Conference on Neural Information Processing Systems</conf-name><conf-date>Nov 28 to Dec 9, 2022</conf-date><conf-loc>New Orleans, LA</conf-loc><fpage>24824</fpage><lpage>24837</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://dl.acm.org/doi/10.5555/3600270.3602070">https://dl.acm.org/doi/10.5555/3600270.3602070</ext-link></comment></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Sun</surname><given-names>X</given-names> </name><name name-style="western"><surname>Li</surname><given-names>X</given-names> </name><name name-style="western"><surname>Li</surname><given-names>J</given-names> </name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Bouamor</surname><given-names>H</given-names> </name><name name-style="western"><surname>Pino</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bali</surname><given-names>K</given-names> </name></person-group><article-title>Text classification via large language models</article-title><source>Findings of the Association for Computational Linguistics: EMNLP 2023</source><year>2023</year><publisher-name>Association for Computational Linguistics</publisher-name><fpage>8990</fpage><lpage>9005</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/2023.findings-emnlp">https://aclanthology.org/2023.findings-emnlp</ext-link></comment><pub-id pub-id-type="doi">10.18653/v1/2023.findings-emnlp.603</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Holm</surname><given-names>S</given-names> </name></person-group><article-title>A simple sequentially rejective multiple test procedure</article-title><source>Scand J Stat</source><year>1979</year><volume>6</volume><issue>2</issue><fpage>65</fpage><lpage>70</lpage><pub-id pub-id-type="doi">10.2307/4615733</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Faul</surname><given-names>F</given-names> </name><name name-style="western"><surname>Erdfelder</surname><given-names>E</given-names> </name><name name-style="western"><surname>Buchner</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lang</surname><given-names>AG</given-names> </name></person-group><article-title>Statistical power analyses using G*Power 3.1: tests for correlation and regression analyses</article-title><source>Behav Res Methods</source><year>2009</year><month>11</month><volume>41</volume><issue>4</issue><fpage>1149</fpage><lpage>1160</lpage><pub-id pub-id-type="doi">10.3758/BRM.41.4.1149</pub-id><pub-id pub-id-type="medline">19897823</pub-id></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ji</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>N</given-names> </name><name name-style="western"><surname>Frieske</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Survey of hallucination in natural language generation</article-title><source>ACM Comput Surv</source><year>2023</year><month>12</month><day>31</day><volume>55</volume><issue>12</issue><fpage>1</fpage><lpage>38</lpage><pub-id pub-id-type="doi">10.1145/3571730</pub-id></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lyu</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Zapadka</surname><given-names>ME</given-names> </name><etal/></person-group><article-title>Translating radiology reports into plain language using ChatGPT and GPT-4 with prompt learning: results, limitations, and potential</article-title><source>Vis Comput Ind Biomed Art</source><year>2023</year><month>05</month><day>18</day><volume>6</volume><issue>1</issue><fpage>9</fpage><pub-id pub-id-type="doi">10.1186/s42492-023-00136-5</pub-id><pub-id pub-id-type="medline">37198498</pub-id></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ovelman</surname><given-names>C</given-names> </name><name name-style="western"><surname>Kugley</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gartlehner</surname><given-names>G</given-names> </name><name name-style="western"><surname>Viswanathan</surname><given-names>M</given-names> </name></person-group><article-title>The use of a large language model to create plain language summaries of evidence reviews in healthcare: a feasibility study</article-title><source>Cochrane Evid Synth Methods</source><year>2024</year><month>02</month><volume>2</volume><issue>2</issue><fpage>e12041</fpage><pub-id pub-id-type="doi">10.1002/cesm.12041</pub-id><pub-id pub-id-type="medline">40475808</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>List of content providers and websites.</p><media xlink:href="ai_v5i1e77149_app1.pdf" xlink:title="PDF File, 61 KB"/></supplementary-material></app-group></back></article>