<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR AI</journal-id>
      <journal-title>JMIR AI</journal-title>
      <issn pub-type="epub">2817-1705</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v5i1e91369</article-id>
      <article-id pub-id-type="pmid">42081273</article-id>
      <article-id pub-id-type="doi">10.2196/91369</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Expert Evaluation of the Perceived Accuracy, Relevance, and Safety of Large Language Model–Generated Patient Information in Geriatrics: Cross-Condition Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Coristine</surname>
            <given-names>Andrew</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Tsai</surname>
            <given-names>Meng-Hsun</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Fukuzawa</surname>
            <given-names>Fumitoshi</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Martini</surname>
            <given-names>Sebastian</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Medicine IV, Geriatrics</institution>
            <institution>LMU Munich Hospital</institution>
            <institution>Ludwig-Maximilians-Universität München</institution>
            <addr-line>Ziemssenstr. 5</addr-line>
            <addr-line>Munich, Bavaria, 80336</addr-line>
            <country>Germany</country>
            <phone>49 089440055407</phone>
            <fax>49 08944001955407</fax>
            <email>sebastian.martini@med.uni-muenchen.de</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3663-9239</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Schluessel</surname>
            <given-names>Sabine</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2485-8859</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Aghamaliyev</surname>
            <given-names>Ughur</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0970-2132</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Rippl</surname>
            <given-names>Michaela</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0000-1583-1577</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Deissler</surname>
            <given-names>Linda</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0007-6108-6441</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Tausendfreund</surname>
            <given-names>Olivia</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0008-1012-9912</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Nuebler</surname>
            <given-names>Desiree</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0001-0229-8838</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Mueller</surname>
            <given-names>Katharina</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0007-3128-1465</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Schmidmaier</surname>
            <given-names>Ralf</given-names>
          </name>
          <degrees>MME, MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3541-3588</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>Drey</surname>
            <given-names>Michael</given-names>
          </name>
          <degrees>MSc, MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6709-9672</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Medicine IV, Geriatrics</institution>
        <institution>LMU Munich Hospital</institution>
        <institution>Ludwig-Maximilians-Universität München</institution>
        <addr-line>Munich, Bavaria</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of General, Visceral, and Transplant Surgery</institution>
        <institution>LMU Munich Hospital</institution>
        <institution>Ludwig-Maximilians-Universität München</institution>
        <addr-line>Munich, Bavaria</addr-line>
        <country>Germany</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Sebastian Martini <email>sebastian.martini@med.uni-muenchen.de</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2026</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>4</day>
        <month>5</month>
        <year>2026</year>
      </pub-date>
      <volume>5</volume>
      <elocation-id>e91369</elocation-id>
      <history>
        <date date-type="received">
          <day>14</day>
          <month>1</month>
          <year>2026</year>
        </date>
        <date date-type="rev-request">
          <day>26</day>
          <month>1</month>
          <year>2026</year>
        </date>
        <date date-type="accepted">
          <day>31</day>
          <month>3</month>
          <year>2026</year>
        </date>
      </history>
      <copyright-statement>©Sebastian Martini, Sabine Schluessel, Ughur Aghamaliyev, Michaela Rippl, Linda Deissler, Olivia Tausendfreund, Desiree Nuebler, Katharina Mueller, Ralf Schmidmaier, Michael Drey. Originally published in JMIR AI (https://ai.jmir.org), 04.05.2026.</copyright-statement>
      <copyright-year>2026</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on https://www.ai.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://ai.jmir.org/2026/1/e91369" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Large language models (LLMs) are increasingly used to generate patient-oriented medical information. In geriatrics, such information must balance accuracy, relevance, and safety, as older adults may be particularly susceptible to misleading or harmful advice. However, systematic evaluations of expert perceptions across multiple geriatric conditions remain limited.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to explore geriatricians’ perceptions of the accuracy, relevance, and potential harm of LLM-generated patient information across common geriatric conditions and to examine variability and interrater agreement in expert ratings.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>In this cross-sectional expert rating study, 10 geriatricians evaluated 50 LLM-generated statements covering 5 geriatric conditions (sarcopenia, osteoporosis, urinary incontinence, depression, and dementia). Statements addressed diagnostic, etiological, prognostic, risk-related, and therapeutic aspects. Experts rated perceived accuracy, relevance, and potential harm using 5-point Likert scales. Rating distributions were summarized using medians and IQRs. The Kendall coefficient of concordance (W) was used exploratorily to assess agreement in the relative ordering of statements within predefined strata. Readability was assessed using Flesch-Kincaid Grade Level and Flesch Reading Ease.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Expert ratings indicated high perceived accuracy (median 4.32, IQR 4.01-4.59 and perceived relevance (median 4.51, IQR 4.06-4.66), while perceived potential harm remained low (median 1.59, IQR 1.17-1.92). IQR values ranged from 0.00 to 1.38 with most values clustering below 0.5, indicating limited dispersion in expert ratings. Agreement in the relative ordering of statements varied across domains, with W values ranging from 0.27 to 0.62 (median 0.53, IQR 0.46-0.58), indicating moderate concordance. No statements combined low perceived accuracy with high perceived potential harm. Readability analysis indicated generally accessible language, with a median Flesch-Kincaid Grade Level of 8.3 (IQR 7.4-9.6) and a median Flesch Reading Ease score of 60.8 (IQR 50.1-66.9).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>LLM-generated patient information for common geriatric conditions was rated as largely accurate and relevant, with low potential harm in typical scenarios. Variability in expert emphasis and the exploratory nature of agreement analyses highlight the limitations of perception-based evaluation. Future studies should incorporate guideline-based validation, readability optimization, and patient-centered outcomes to more comprehensively evaluate the safety and suitability of LLM-generated information for geriatric patient education.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>large language models</kwd>
        <kwd>LLMs</kwd>
        <kwd>ChatGPT</kwd>
        <kwd>geriatric medicine</kwd>
        <kwd>patient education</kwd>
        <kwd>medical informatics</kwd>
        <kwd>expert consensus</kwd>
        <kwd>artificial intelligence in health care</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Geriatric patients represent a rapidly growing population affected by prevalent conditions such as sarcopenia, osteoporosis, urinary incontinence, depression, and dementia [<xref ref-type="bibr" rid="ref1">1</xref>]. Patients and caregivers frequently seek health information online to better understand diagnostic procedures, disease mechanisms, therapeutic options, associated risks, and prognosis [<xref ref-type="bibr" rid="ref2">2</xref>]. However, available information is often fragmented, inconsistent, or insufficiently tailored to varying levels of medical knowledge [<xref ref-type="bibr" rid="ref3">3</xref>]. As a result, the complexity and framing of medical content may not align with the informational needs of older adults, particularly in the context of multimorbidity.</p>
      <p>In this setting, large language models (LLMs) such as ChatGPT (OpenAI) are increasingly used to generate health-related information for patients and caregivers [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. Unlike static online resources, these systems synthesize responses to user queries and may adapt explanations to the context of the question [<xref ref-type="bibr" rid="ref6">6</xref>]. Their ability to produce fluent and coherent text has raised interest in their potential role in patient education and decision support. At the same time, concerns persist regarding the accuracy, relevance, and safety of LLM-generated medical content, particularly when responses appear plausible but contain subtle inaccuracies or omit clinically important caveats [<xref ref-type="bibr" rid="ref7">7</xref>].</p>
      <p>These concerns are especially relevant in geriatrics. Older adults often manage multiple chronic conditions, polypharmacy, and functional impairments, increasing susceptibility to misleading or incomplete medical advice. Even minor inaccuracies or ambiguous recommendations may result in delayed care seeking, inappropriate self-management, or medication-related harm [<xref ref-type="bibr" rid="ref8">8</xref>]. Consequently, evaluating LLM-generated patient information in geriatric contexts requires careful consideration not only of perceived accuracy but also of perceived relevance and potential harm.</p>
      <p>Previous studies assessing LLM performance in medicine have predominantly focused on factual accuracy using benchmark questions, examinations, or guideline-based comparisons [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. While valuable, these approaches may not fully capture how clinicians perceive the usability and safety of patient-oriented information in real-world contexts. Expert rating studies provide a complementary perspective by capturing clinician judgments of perceived accuracy, relevance, and risk, particularly where formal gold standards are difficult to operationalize. Despite the high prevalence of multimorbidity in older adults, most evaluations of artificial intelligence (AI)–generated patient information have focused on single diseases rather than cross-condition assessment [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Likert-scale ratings summarize absolute judgments but provide limited insight into agreement among raters. Measures of dispersion describe variability in ratings, whereas concordance statistics capture consistency in relative ordering. These complementary approaches provide a more comprehensive understanding of expert evaluation.</p>
      <p>Against this background, this study provides an exploratory expert-based evaluation of LLM-generated patient information across 5 common geriatric conditions: sarcopenia, osteoporosis, urinary incontinence, depression, and dementia. Geriatricians rated responses addressing diagnostic, etiological, prognostic, risk-related, and therapeutic aspects with respect to perceived accuracy, perceived relevance, and perceived potential harm. The aim was not to establish objective clinical correctness or clinical prioritization but to examine patterns of perceived quality, variability, and agreement in expert judgments across conditions and content domains.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Design and Workflow</title>
        <p>This study was designed as a cross-sectional expert evaluation of LLM-generated responses to common geriatric patient questions. The overall study workflow is summarized in <xref rid="figure1" ref-type="fig">Figure 1</xref>. Briefly, frequently asked geriatric questions were identified by a panel of geriatricians; submitted to ChatGPT using a standardized procedure; and subsequently evaluated by expert reviewers with respect to perceived accuracy, perceived relevance, and perceived potential harm. Readability of generated responses was assessed using established metrics (Flesch Reading Ease and Flesch-Kincaid Grade Level) to provide complementary information on accessibility of the content. In addition, demographic and professional background information of participating experts was collected. The study represents an exploratory assessment of expert perceptions and did not aim to establish objective clinical correctness or guideline adherence.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Study workflow and survey design. Geriatricians evaluated 50 large language model–generated responses to common geriatric patient questions across 5 geriatric conditions (sarcopenia, osteoporosis, urinary incontinence, depression, and dementia). For each condition, statements addressed diagnostics, etiology, prognosis, risks of therapy, and therapy. Responses were rated with respect to perceived accuracy, perceived relevance, and perceived potential harm. AI: artificial intelligence.</p>
          </caption>
          <graphic xlink:href="ai_v5i1e91369_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Identification and Selection of Geriatric Patient Questions</title>
        <p>A panel of 6 board-certified geriatricians identified frequently asked questions encountered in routine geriatric practice through structured group discussion. Questions covered 5 common geriatric disease domains: sarcopenia, osteoporosis, urinary incontinence, depression, and dementia. The aspects covered were diagnostics, etiology, prognosis, risks of therapy, and therapy.</p>
        <p>The identified questions were subsequently evaluated by each panel member using a predetermined point-based system that reflected their perceived frequency in routine clinical practice. For each question, geriatricians assigned a score of 1 (infrequently encountered), 2 (moderately frequent), or 3 (very frequently encountered). Scores were aggregated across all panel members, and questions with the highest total scores were selected for inclusion. The final sample size of 50 questions was predetermined to ensure representation across diseases. These 50 questions were retained for the subsequent generation and evaluation of LLM responses. This approach ensured that the evaluated questions reflected common information needs among geriatric patients and caregivers. Consequently, the resulting distribution across content domains was not uniform, reflecting the pragmatic selection based on clinical relevance and frequency.</p>
      </sec>
      <sec>
        <title>Generation of LLM Responses</title>
        <p>All selected questions were submitted to ChatGPT (version 5.1; OpenAI) between November 13 and 18, 2025. A paid subscription tier (ChatGPT Plus) was used. For each question, a new and independent chat session was initiated. As the standard web interface does not allow manual adjustment of generation parameters, default model settings (eg, temperature and sampling parameters) were applied. Questions were submitted using a standardized input procedure consisting solely of the respective patient question listed in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. No additional instructions, role assignments, or system prompts were included in the input field.</p>
        <p>However, within the ChatGPT personalization settings, a background description was specified indicating that the user was aged &#62;70 years and had several health problems (“I am over 70 years old and have several health issues.”). This contextual setting was used to approximate a realistic interaction scenario in which older adults seek medical information from conversational AI systems.</p>
        <p>Each LLM-generated response consisted of a short paragraph addressing a patient question. For the purpose of expert evaluation, each generated answer was treated as one evaluable statement. No further segmentation of responses into substatements was performed. No follow-up questions or clarifications were provided. No postprocessing or manual editing of responses was performed prior to expert evaluation. The resulting responses constituted the material evaluated in this study (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
      </sec>
      <sec>
        <title>Expert Panel and Rating Procedure</title>
        <p>A total of 10 geriatricians with clinical experience in the care of geriatric patients participated as expert reviewers. The geriatricians came from 4 different geriatric divisions across Germany. Their demographic and professional background data, years of clinical experience, and areas of specialization were collected using a standardized questionnaire. These characteristics are summarized in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <p>Each expert evaluated all LLM-generated responses independently with respect to 3 dimensions: perceived accuracy, perceived relevance, and perceived potential harm. Perceived accuracy was defined as the extent to which a statement appeared correct and clinically plausible based on the rater’s expertise. Relevance captured the perceived usefulness of the statement for patient education. Potential harm was defined as the likelihood of patient harm if the information were followed, including unsafe self-management, delayed care seeking, medication-related risks, or omission of clinically relevant warnings. Likert-scale responses were numerically coded for analysis only. Ratings reflected individual clinical judgment and were not intended to represent consensus-based guideline validation.</p>
        <p>Statements were evaluated using a 5-point Likert scale with verbally anchored response options. The scale comprised the categories “strongly disagree,” “disagree,” “neutral,” “agree,” and “strongly agree,” for which numerical labels were not displayed during the rating process. Consequently, reviewers were not exposed to numeric values during the assessment process. For the purpose of statistical analysis, the responses were subsequently encoded numerically on a scale ranging from 1 (strongly disagree) to 5 (strongly agree). This encoding step was performed exclusively during data analysis and did not influence the evaluation procedure itself.</p>
        <p>For each of the 50 questions, responses were evaluated along 3 distinct dimensions: perceived accuracy, perceived relevance, and perceived potential harm. The total number of evaluations completed by each expert amounted to 150, corresponding to 50 questions assessed across 3 distinct dimensions. To ensure data quality, given the high number of ratings, an instructed-response attention check item was included in the questionnaire as a separate multiple-choice question within the survey platform instructing participants to select “orange juice.” All reviewers responded correctly, indicating full task engagement.</p>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>All analyses were conducted at the level of independent expert raters to avoid item-level pseudoreplication. Each evaluated response was uniquely assigned to one disease category and one content domain; thus, every statement simultaneously belonged to a specific disease and a specific content domain.</p>
        <p>For inferential comparisons across diseases and content domains, ratings were aggregated within each rater by calculating the mean Likert score across all responses belonging to the respective disease (or content domain) and rating dimension. Consequently, for each rating dimension, every rater contributed one aggregated value per disease and one aggregated value per content domain. Disease-specific analyses therefore reflect aggregation across content domains within each disease, whereas content-domain analyses reflect aggregation across diseases within each domain. These aggregated subject-level values served as the basis for statistical comparisons.</p>
        <p>Given the ordinal nature of Likert-scale data and the small number of raters, nonparametric methods were used throughout. For the Friedman test, mean scores were used for within-rater aggregation across items, representing a common approach in nonparametric repeated-measures analyses to enable ranking procedures. For each item, ratings from multiple experts were first averaged to obtain a single aggregated score per item. These aggregated scores were then used to calculate medians and IQRs across items. Differences across diseases and across content domains were explored using Friedman tests with the rater as the blocking factor.</p>
        <p>Where global tests suggested differences, pairwise Wilcoxon signed-rank tests with Bonferroni correction were applied. All tests were 2-sided, and a <italic>P</italic> value &#60;.05 was considered statistically significant. Given the exploratory design and limited number of raters, inferential statistics were interpreted cautiously and used to characterize patterns rather than to infer equivalence or definitive differences.</p>
        <p>For descriptive analyses at the statement level, ratings were summarized using medians and IQRs. These measures were used to characterize central tendency and dispersion of expert judgments across statements, diseases, and content domains. IQR values were reported descriptively to reflect the spread of expert ratings.</p>
        <p>To explore agreement among experts regarding the relative ordering of responses within specific strata, the Kendall coefficient of concordance (W) was calculated using the <italic>irr</italic> package in R (version 4.5.2; R Foundation for Statistical Computing). This implementation applies a tie-corrected formulation of Kendall W, which accounts for the large number of tied ranks inherent in Likert-scale data. Analyses were performed separately within predefined strata defined by the disease–content domain–rating dimension. Within each stratum, Likert-scale ratings were converted into ordinal ranks within each rater to derive the relative ordering of responses. Ties were handled using average ranks. Agreement analyses were restricted to strata containing at least 3 responses, acknowledging that concordance estimates based on very small item counts may be unstable. Kendall W values were interpreted descriptively and considered exploratory.</p>
        <p>Readability analyses were conducted on the full response texts prior to evaluation. Flesch Reading Ease and Flesch-Kincaid Grade Level scores were calculated using the <italic>quanteda</italic> package in R for each response and summarized descriptively using medians and IQRs.</p>
      </sec>
      <sec>
        <title>Statistical Software</title>
        <p>All analyses were performed using R within the RStudio integrated development environment (version 2025.09.2; Posit Software). Statistical analyses were conducted to explore differences in expert ratings across diseases and content domains.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The ethics committee of the Ludwig-Maximilian-University Medical Faculty confirmed that formal ethics approval was not required for this study (project 25-0855 KB; October 1, 2025), as it did not involve patients or patient data. Participation of expert reviewers was voluntary, anonymous, did not include any compensation, and was obtained after informed consent. No personal identifying information was collected.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Expert Ratings of Perceived Accuracy, Perceived Relevance, and Perceived Potential Harm</title>
        <p>Across 50 statements rated by 10 geriatricians, LLM-generated statements were rated as highly accurate (median 4.32, IQR 4.01-4.59) and relevant (median 4.51, IQR 4.06-4.66), with low perceived potential harm (median 1.59, IQR 1.17-1.92; <xref rid="figure2" ref-type="fig">Figure 2</xref>).</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Distribution of expert ratings for perceived accuracy, perceived relevance, and perceived potential harm across all statements. Boxplots summarize statement-level ratings aggregated across raters, displaying the median and IQR; whiskers represent 1.5×IQR. Ratings are based on 5-point Likert scales (1=strongly disagree; 5=strongly agree).</p>
          </caption>
          <graphic xlink:href="ai_v5i1e91369_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>When stratified by disease, expert ratings demonstrated consistently high perceived accuracy and perceived relevance across sarcopenia, osteoporosis, urinary incontinence, depression, and dementia, accompanied by low perceived potential harm (<xref ref-type="table" rid="table1">Table 1</xref>). Median ratings for perceived accuracy and perceived relevance were uniformly located in the upper range of the Likert scale across all conditions, with overlapping IQRs. Perceived potential harm ratings remained low for all diseases and showed limited dispersion, indicating broadly consistent expert perceptions of response quality and safety across disease domains.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Expert ratings of perceived accuracy, perceived relevance, and perceived potential harm stratified by disease (sarcopenia, osteoporosis, urinary incontinence, depression, and dementia)a.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="260"/>
            <col width="260"/>
            <col width="280"/>
            <thead>
              <tr valign="top">
                <td>Disease</td>
                <td>Perceived accuracy, median (IQR)</td>
                <td>Perceived relevance, median (IQR)</td>
                <td>Perceived potential harm, median (IQR)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Sarcopenia</td>
                <td>4.25 (3.94-4.82)</td>
                <td>4.57 (4.16-4.76)</td>
                <td>1.10 (1.05-1.95)</td>
              </tr>
              <tr valign="top">
                <td>Osteoporosis</td>
                <td>4.35 (4.12-4.54)</td>
                <td>4.42 (4.23-4.59)</td>
                <td>1.53 (1.12-1.98)</td>
              </tr>
              <tr valign="top">
                <td>Urinary incontinence</td>
                <td>4.43 (4.07-4.47)</td>
                <td>4.60 (4.18-4.84)</td>
                <td>1.70 (1.02-2.00)</td>
              </tr>
              <tr valign="top">
                <td>Depression</td>
                <td>4.28 (4.03-4.58)</td>
                <td>4.23 (4.05-4.71)</td>
                <td>1.67 (1.08-2.00)</td>
              </tr>
              <tr valign="top">
                <td>Dementia</td>
                <td>4.20 (3.92-4.65)</td>
                <td>4.45 (4.05-4.77)</td>
                <td>1.35 (1.12-2.00)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Ratings are based on 5-point Likert scales and are summarized as median and IQR, reflecting the ordinal nature of the data. Values represent aggregated statement-level scores derived from expert ratings.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>When stratified by content domain, expert ratings showed consistently high perceived accuracy and perceived relevance across diagnostics, etiology, prognosis, risks of therapy, and therapy, accompanied by low perceived potential harm (<xref ref-type="table" rid="table2">Table 2</xref>). Median ratings for perceived accuracy and perceived relevance were generally highest for etiologic and prognostic statements, while perceived potential harm remained low across all domains, with relatively limited dispersion as reflected by the IQRs.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Expert ratings of perceived accuracy, perceived relevance, and perceived potential harm stratified by content domain (diagnostics, etiology, prognosis, risks of therapy, and therapy)a.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="260"/>
            <col width="260"/>
            <col width="280"/>
            <thead>
              <tr valign="top">
                <td>Content domain</td>
                <td>Perceived accuracy, median (IQR)</td>
                <td>Perceived relevance, median (IQR)</td>
                <td>Perceived potential harm, median (IQR)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Diagnostics</td>
                <td>4.15 (3.70-4.47)</td>
                <td>4.55 (4.03-4.80)</td>
                <td>1.60 (1.15-1.87)</td>
              </tr>
              <tr valign="top">
                <td>Etiology</td>
                <td>4.48 (4.00-4.70)</td>
                <td>4.40 (4.03-4.72)</td>
                <td>1.42 (1.06-1.85)</td>
              </tr>
              <tr valign="top">
                <td>Prognosis</td>
                <td>4.60 (4.28-4.70)</td>
                <td>4.70 (4.55-4.97)</td>
                <td>1.45 (1.10-1.75)</td>
              </tr>
              <tr valign="top">
                <td>Risks of therapy</td>
                <td>4.23 (3.90-4.52)</td>
                <td>4.32 (4.01-4.64)</td>
                <td>1.55 (1.24-2.08)</td>
              </tr>
              <tr valign="top">
                <td>Therapy</td>
                <td>4.31 (4.10-4.54)</td>
                <td>4.32 (4.18-4.72)</td>
                <td>1.54 (1.19-1.91)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Ratings are based on 5-point Likert scales and are summarized as median and IQR, reflecting the ordinal nature of the data. Values represent statement-level scores aggregated across expert ratings.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Importantly, no statement received a combination of low perceived accuracy (median &#60;3) and high perceived potential harm (median &#62;3), indicating that statements judged as less accurate were not simultaneously perceived as harmful.</p>
      </sec>
      <sec>
        <title>Exploratory Group Comparisons</title>
        <p>Exploratory comparisons across diseases and content domains were conducted using Friedman tests, followed by pairwise Wilcoxon signed-rank tests with Bonferroni correction where applicable.</p>
        <p>No statistically significant differences were observed between diseases for perceived accuracy, perceived relevance, or perceived potential harm (all adjusted <italic>P</italic> values &#62;.99).</p>
        <p>The global Friedman test indicated differences across content domains for perceived accuracy (<italic>χ</italic>²<sub>4</sub>=10.2; <italic>P</italic>=.03) and perceived relevance (<italic>χ</italic>²<sub>4</sub>=11.5; <italic>P</italic>=.02). However, these differences did not remain statistically significant after Bonferroni correction in pairwise comparisons. Given the limited number of expert raters, these analyses should be interpreted cautiously. The absence of statistically significant post hoc differences does not imply equivalence but reflects limited statistical power under multiple-testing correction.</p>
      </sec>
      <sec>
        <title>Uncertainty of Expert Ratings (IQR-Based Analysis)</title>
        <p>An IQR-based analysis was performed to describe the dispersion of expert ratings across statements beyond measures of central tendency. Overall, IQR values were generally low to moderate across most disease and content domains (range 0.00-1.38), indicating limited variability in expert assessments (<xref rid="figure3" ref-type="fig">Figure 3</xref>).</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Heatmap depicting the dispersion of expert ratings based on the IQR of statement-level Likert ratings aggregated across raters. IQR values are shown across disease domains (sarcopenia, osteoporosis, urinary incontinence, depression, and dementia) and content domains (diagnostics, etiology, prognosis, risks of therapy, and therapy), stratified by rating dimension (perceived accuracy, perceived relevance, and perceived potential harm). Higher IQR values indicate greater dispersion in expert ratings, whereas lower values reflect more consistent assessments across statements.</p>
          </caption>
          <graphic xlink:href="ai_v5i1e91369_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Higher IQR values were observed in selected content domains. In particular, therapeutic statements related to osteoporosis showed greater dispersion in perceived potential harm ratings, as did risk-related statements in sarcopenia and osteoporosis. Additionally, moderate variability was observed for several diagnostic, etiological, and prognostic statements in dementia.</p>
        <p>These descriptive patterns indicate that variability in expert ratings differed across content domains and statement types rather than being uniformly associated with specific diseases. Domains involving therapeutic decisions and risk appraisal tended to show greater dispersion, which may reflect heterogeneity in expert judgments.</p>
      </sec>
      <sec>
        <title>Agreement About Relative Ranking of Statements (Kendall W)</title>
        <p>The Kendall coefficient of concordance (W) was calculated to explore agreement in the relative ordering of statements within the disease–content domain–dimension strata containing at least 3 statements. Of the 75 possible strata, 12 met the minimum requirement of 3 statements and were included in the analysis, while 63 strata were excluded due to insufficient item counts.</p>
        <p>Across the 12 eligible strata, W values ranged from 0.27 to 0.62 (median 0.53, IQR 0.46-0.58), indicating moderate concordance in the relative ranking of statements by experts (<xref ref-type="table" rid="table3">Table 3</xref>). Even in strata with lower W values, median ratings remained high (<xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref>).</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Kendall coefficient of concordance (W) for all eligible disease–content domain–dimension strata containing at least 3 statements<sup>a</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="470"/>
            <col width="0"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Rating dimension and disease (content domain)</td>
                <td>Kendall W (n statements)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>Perceived accuracy</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Depression (etiology)</td>
                <td colspan="2">0.406<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Osteoporosis (risks of therapy)</td>
                <td colspan="2">0.271<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sarcopenia (therapy)</td>
                <td colspan="2">0.525<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Urinary incontinence (therapy)</td>
                <td colspan="2">0.287<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Perceived potential harm</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Depression (etiology)</td>
                <td colspan="2">0.620<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Osteoporosis (risks of therapy)</td>
                <td colspan="2">0.568<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sarcopenia (therapy)</td>
                <td colspan="2">0.522<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Urinary incontinence (therapy)</td>
                <td colspan="2">0.623<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Perceived relevance</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Depression (etiology)</td>
                <td colspan="2">0.598<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Osteoporosis (risks of therapy)</td>
                <td colspan="2">0.549<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sarcopenia (therapy)</td>
                <td colspan="2">0.479<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Urinary incontinence (therapy)</td>
                <td colspan="2">0.542<sup>b</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>Of the 75 possible strata, 12 met the minimum requirement for analysis (≥3 statements per stratum), while 63 strata were excluded due to insufficient item counts. W reflects agreement in the relative ordering of statements within each stratum and should be interpreted as exploratory, given the limited number of statements per group and the absence of a forced-ranking design.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>n=3.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>n=4.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Given the limited number of statements per stratum and the absence of a forced-ranking design, these findings should be interpreted as exploratory and descriptive rather than as definitive evidence of consensus or disagreement in clinical prioritization.</p>
      </sec>
      <sec>
        <title>Readability Analysis</title>
        <p>Readability analysis indicated that LLM-generated responses were written at a median Flesch-Kincaid Grade Level of 8.3 (IQR 7.4-9.6), corresponding to grades 8 to 9 in the US educational system. The median Flesch Reading Ease score was 60.8 (IQR 50.1-66.9), indicating generally accessible language with moderate variability across responses.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this exploratory expert rating study, geriatricians perceived LLM-generated patient information across 5 common geriatric conditions as largely accurate and relevant, with low potential harm when applied in typical informational contexts. Importantly, no statements combined low perceived accuracy with high perceived potential harm, suggesting that clearly misleading and dangerous content was not identified in the evaluated sample. Readability metrics suggested that the linguistic complexity of the responses was generally accessible for readers with secondary school literacy levels, although potentially challenging for individuals with limited health literacy. Nevertheless, variability in readability underscores the importance of considering health literacy and individual patient needs when deploying LLM-generated information in geriatric contexts [<xref ref-type="bibr" rid="ref14">14</xref>].</p>
        <p>The absence of statistically significant post hoc differences across diseases or content domains does not imply equivalence but reflects limited statistical power under multiple-testing correction. These results align with research in various medical disciplines that has similarly investigated perceived accuracy, relevance, and safety of AI-generated content [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. Contrary to the extant literature on AI-generated patient education, which is predominantly single-condition and often limited to global quality or readability scores in areas such as sarcopenia [<xref ref-type="bibr" rid="ref17">17</xref>], osteoporosis [<xref ref-type="bibr" rid="ref18">18</xref>], urinary incontinence [<xref ref-type="bibr" rid="ref19">19</xref>], depression [<xref ref-type="bibr" rid="ref20">20</xref>], and dementia [<xref ref-type="bibr" rid="ref21">21</xref>], our study provides a unified, cross-disease evaluation within a geriatric framework.</p>
      </sec>
      <sec>
        <title>Absolute Ratings Showed Limited Dispersion</title>
        <p>Beyond comparisons of central tendency, the analysis of IQRs provided a complementary perspective on the variability of expert ratings. Overall, dispersion of ratings was limited across most disease and content domains, indicating broadly consistent expert assessments. However, greater variability was observed in selected content domains, particularly for therapeutic statements and statements addressing risks of therapy in sarcopenia and osteoporosis, as well as for several diagnostic, etiological, and prognostic statements in dementia.</p>
        <p>Importantly, higher IQR values should not be interpreted as disagreement or lack of consensus but rather as reflecting heterogeneity in expert judgment. This pattern is clinically plausible, as therapeutic decision-making and risk appraisal often allow for a wider range of acceptable clinical perspectives, especially in complex geriatric contexts [<xref ref-type="bibr" rid="ref22">22</xref>]. In particular, therapeutic and risk-related statements related to osteoporosis demonstrated higher variability as clinical guidelines and practice patterns (eg, osteoporosis medication thresholds [<xref ref-type="bibr" rid="ref23">23</xref>]) vary, and clinical decision-making requires nuanced risk-benefit considerations [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref26">26</xref>]. Accordingly, the observed variability appeared more pronounced across content domains than across diseases.</p>
      </sec>
      <sec>
        <title>Moderate Concordance in the Relative Ranking of Statements</title>
        <p>Kendall’s coefficient of concordance was used to explore agreement in the relative ordering of statements within disease–content domain–dimension strata containing at least 3 statements. Of the 75 possible strata, only 12 fulfilled the minimum requirement for analysis, highlighting the limited item counts within many domain combinations. Across eligible strata, W values ranged from 0.27 to 0.62 (median 0.53, IQR 0.46-0.58), indicating moderate concordance in the relative ranking of statements [<xref ref-type="bibr" rid="ref27">27</xref>].</p>
        <p>These findings should be interpreted cautiously. First, the small number of statements per stratum (typically 3-4) limits the stability of concordance estimates. Second, ratings were based on Likert-scale assessments rather than a forced-ranking task; therefore, Kendall W captures consistency in relative ordering derived from ordinal ratings rather than explicit prioritization decisions. Importantly, even in strata with lower concordance, median ratings for perceived accuracy and perceived relevance remained high, and perceived potential harm remained low. Thus, variability in ranking does not necessarily imply substantive disagreement regarding the overall quality or safety of the generated responses [<xref ref-type="bibr" rid="ref28">28</xref>].</p>
      </sec>
      <sec>
        <title>Interpretation of Variability and Agreement</title>
        <p>A central methodological consideration of this study is the distinction between variability in absolute ratings and agreement in relative ordering. Narrow IQRs indicate that experts tended to provide similar absolute judgments, whereas the Kendall coefficient of concordance reflects whether experts agreed on which statements were perceived as more or less acceptable relative to others [<xref ref-type="bibr" rid="ref29">29</xref>]. These measures capture different dimensions of expert judgment and should not be interpreted interchangeably.</p>
        <p>Observed variation in Kendall W across domains suggests heterogeneity in expert emphasis rather than disagreement regarding content validity. In domains such as therapy and risk-related information, clinicians may legitimately differ in how strongly they weight caution, nuance, or contextualization, even when overall perceived accuracy remains high. Given that relevance was assessed using Likert scales rather than a forced-ranking methodology, these findings should be interpreted as differences in perceived emphasis rather than as direct evidence of clinical prioritization [<xref ref-type="bibr" rid="ref30">30</xref>].</p>
      </sec>
      <sec>
        <title>Methodological Considerations</title>
        <p>This study relies on expert perception rather than objective verification against clinical guidelines or reference standards. Accordingly, the construct assessed here is <italic>perceived</italic> accuracy rather than <italic>factual</italic> correctness. This distinction is critical, as fluent and coherent LLM-generated text may receive favorable accuracy ratings despite containing subtle inaccuracies. Evidence from cognitive psychology indicates that processing fluency enhances perceived truthfulness, a phenomenon closely related to the illusory truth effect and fluency-based heuristics [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. While expert ratings provide valuable insight into clinician judgment, they cannot substitute for systematic guideline-based validation [<xref ref-type="bibr" rid="ref33">33</xref>].</p>
        <p>Agreement analyses using Kendall W were conducted exploratorily and restricted to strata with a minimum number of statements. Nevertheless, the limited number of items within several strata constrains the stability of concordance estimates and warrants cautious interpretation. These analyses are best understood as descriptive signals of agreement patterns rather than definitive measures of consensus.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Several limitations should be considered. First, the expert panel was relatively small and geographically homogeneous, consisting exclusively of geriatricians from Germany, which may limit generalizability to other health care systems and guideline contexts. Second, the selection of statements was based on commonly encountered clinical questions, which likely biases the evaluation toward well-established and comparatively “safe” topics. Performance in rare, complex, or atypical scenarios, where LLM hallucinations may be more consequential, was not assessed [<xref ref-type="bibr" rid="ref10">10</xref>]. As question selection was guided by clinical relevance and frequency rather than equal representation across predefined domains, the resulting distribution of items was inherently unbalanced. This led to the exclusion of multiple strata from the Kendall coefficient of concordance analysis due to insufficient item counts.</p>
        <p>Third, although relevance was rated favorably by clinicians, readability metrics indicated that the linguistic complexity of responses may exceed the health literacy level of geriatric patients. This gap highlights the importance of integrating readability optimization and patient-centered evaluation in future assessments of LLM-generated medical information [<xref ref-type="bibr" rid="ref34">34</xref>]. Additionally, many generated responses were relatively short. As readability formulas such as the Flesch Reading Ease and Flesch-Kincaid Grade Level are sensitive to text length, their application to short text passages (eg, &#60;100 words) may yield unstable or less reliable estimates.</p>
        <p>Fourth, the study evaluated single-turn, zero-shot LLM responses and did not capture interactive dialogue, follow-up clarification, or longitudinal consistency, all of which are central to real-world patient information seeking. In addition, the limited number of responses within several disease-content domain strata restricted agreement analyses and reduced the stability of concordance estimates. Finally, relevance was assessed mostly from a clinician’s perspective; readability was assessed, but patient comprehension or usability were not evaluated and warrant future investigation [<xref ref-type="bibr" rid="ref35">35</xref>].</p>
      </sec>
      <sec>
        <title>Implications and Future Directions</title>
        <p>From an AI evaluation perspective, these findings suggest that expert-based perception studies can serve as an initial filter for identifying potentially problematic content in patient-oriented LLM outputs. However, such assessments should be complemented by objective validation approaches, including guideline-based accuracy checks and studies incorporating patient-centered outcomes. Future work should also examine more complex clinical scenarios and interactive use cases to better reflect real-world deployment.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this exploratory expert rating study, LLM-generated patient information for common geriatric conditions was perceived as largely accurate and relevant, with low perceived potential harm in typical clinical scenarios. Variability in expert judgments primarily reflected differences in emphasis rather than overt disagreement on content validity. Given the perception-based nature of the assessment, limited item counts for agreement analyses, and the absence of objective reference standards, these findings should be interpreted cautiously. Future evaluations should combine expert ratings with guideline-based validation and patient-centered outcome measures to more comprehensively assess the safety and suitability of LLM-generated information for geriatric patient education.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Supplementary material including all geriatric patient questions with corresponding ChatGPT-generated responses and characteristics of expert reviewers.</p>
        <media xlink:href="ai_v5i1e91369_app1.docx" xlink:title="DOCX File , 19 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>ChatGPT was used to generate responses to predefined patient questions, which were subsequently evaluated by human experts. ChatGPT was not used for data analysis, interpretation, or manuscript writing. During the preparation of this work, the authors used ChatGPT to check grammar and spelling and improve readability and language. After using this tool, the authors reviewed and edited the content as needed and take full responsibility for the content of the published manuscript.</p>
    </ack>
    <notes>
      <sec>
        <title>Funding</title>
        <p>This research did not receive any specific grant from funding agencies in the public, commercial, or not-for-profit sectors.</p>
      </sec>
    </notes>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The datasets generated and analyzed during this study are available from the corresponding author on reasonable request.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>Conceptualization: MD, MR, SM, SS, UA</p>
        <p>Data curation: DN, KM, LD, MR, OT, SM, SS</p>
        <p>Investigation: SM, SS</p>
        <p>Methodology: DN, KM, SM, UA</p>
        <p>Supervision: MD, RS</p>
        <p>Writing–original draft: MD, RS, SM</p>
        <p>Writing–review and editing: DN, KM, LD, MD, MR, OT, RS, SM, SS, UA</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Inouye</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Studenski</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tinetti</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Kuchel</surname>
              <given-names>GA</given-names>
            </name>
          </person-group>
          <article-title>Geriatric syndromes: clinical, research, and policy implications of a core geriatric concept</article-title>
          <source>J Am Geriatr Soc</source>
          <year>2007</year>
          <month>05</month>
          <volume>55</volume>
          <issue>5</issue>
          <fpage>780</fpage>
          <lpage>91</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/17493201"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/j.1532-5415.2007.01156.x</pub-id>
          <pub-id pub-id-type="medline">17493201</pub-id>
          <pub-id pub-id-type="pii">JGS1156</pub-id>
          <pub-id pub-id-type="pmcid">PMC2409147</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Medlock</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Eslami</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Askari</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Arts</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Sent</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>de Rooij</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Abu-Hanna</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Health information-seeking behavior of seniors who use the internet: a survey</article-title>
          <source>J Med Internet Res</source>
          <year>2015</year>
          <month>01</month>
          <day>08</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>e10</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2015/1/e10/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.3749</pub-id>
          <pub-id pub-id-type="medline">25574815</pub-id>
          <pub-id pub-id-type="pii">v17i1e10</pub-id>
          <pub-id pub-id-type="pmcid">PMC4296102</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Valizadeh-Haghi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rahmatizadeh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Soleimaninejad</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mousavi Shirazi</surname>
              <given-names>SF</given-names>
            </name>
            <name name-style="western">
              <surname>Mollaei</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Are health websites credible enough for elderly self-education in the most prevalent elderly diseases?</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2021</year>
          <month>01</month>
          <day>28</day>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>31</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-021-01397-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-021-01397-x</pub-id>
          <pub-id pub-id-type="medline">33509183</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-021-01397-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC7842013</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huo</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Boyle</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Marfo</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Tangamornsuksan</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Steen</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>McKechnie</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mayol</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Antoniou</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Thirunavukarasu</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Sanger</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ramji</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Guyatt</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Large language models for chatbot health advice studies: a systematic review</article-title>
          <source>JAMA Netw Open</source>
          <year>2025</year>
          <month>02</month>
          <day>03</day>
          <volume>8</volume>
          <issue>2</issue>
          <fpage>e2457879</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jamanetwork.com/journals/jamanetworkopen/fullarticle/10.1001/jamanetworkopen.2024.57879"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2024.57879</pub-id>
          <pub-id pub-id-type="medline">39903463</pub-id>
          <pub-id pub-id-type="pii">2829839</pub-id>
          <pub-id pub-id-type="pmcid">PMC11795331</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yun</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Bickmore</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Online health information-seeking in the era of large language models: cross-sectional web-based survey study</article-title>
          <source>J Med Internet Res</source>
          <year>2025</year>
          <month>03</month>
          <day>31</day>
          <volume>27</volume>
          <fpage>e68560</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2025//e68560/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/68560</pub-id>
          <pub-id pub-id-type="medline">40163112</pub-id>
          <pub-id pub-id-type="pii">v27i1e68560</pub-id>
          <pub-id pub-id-type="pmcid">PMC11997521</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wangmo</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bharadia</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed-Richards</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bhanderi</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Kachhadiya</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Allemann</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Elger</surname>
              <given-names>BS</given-names>
            </name>
          </person-group>
          <article-title>Generative AI/LLMs for plain language medical information for patients, caregivers and general public: opportunities, risks and ethics</article-title>
          <source>Patient Prefer Adherence</source>
          <year>2025</year>
          <month>07</month>
          <day>31</day>
          <volume>19</volume>
          <fpage>2227</fpage>
          <lpage>49</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.2147/PPA.S527922"/>
          </comment>
          <pub-id pub-id-type="doi">10.2147/PPA.S527922</pub-id>
          <pub-id pub-id-type="medline">40771655</pub-id>
          <pub-id pub-id-type="pii">527922</pub-id>
          <pub-id pub-id-type="pmcid">PMC12325106</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tangsrivimol</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Darzidehkalani</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Virk</surname>
              <given-names>HU</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Egger</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hacking</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Glicksberg</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Strauss</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Krittanawong</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Benefits, limits, and risks of ChatGPT in medicine</article-title>
          <source>Front Artif Intell</source>
          <year>2025</year>
          <month>1</month>
          <day>30</day>
          <volume>8</volume>
          <fpage>1518049</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3389/frai.2025.1518049"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/frai.2025.1518049</pub-id>
          <pub-id pub-id-type="medline">39949509</pub-id>
          <pub-id pub-id-type="pmcid">PMC11821943</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>De Busser</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>De Loof</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>The role of large language models in self-care: a study and benchmark on medicines and supplement guidance accuracy</article-title>
          <source>Int J Clin Pharm</source>
          <year>2025</year>
          <month>08</month>
          <volume>47</volume>
          <issue>4</issue>
          <fpage>1001</fpage>
          <lpage>10</lpage>
          <pub-id pub-id-type="doi">10.1007/s11096-024-01839-2</pub-id>
          <pub-id pub-id-type="medline">39644377</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11096-024-01839-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC12335388</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Avnat</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Herstain</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yanko</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ben Joya</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tzuchman Katz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Eshel</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Laros</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dagan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Barami</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mermelstein</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ovadia</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shomron</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Shalev</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Abdulnour</surname>
              <given-names>RE</given-names>
            </name>
          </person-group>
          <article-title>Performance of large language models in numerical versus semantic medical knowledge: cross-sectional benchmarking study on evidence-based questions and answers</article-title>
          <source>J Med Internet Res</source>
          <year>2025</year>
          <month>07</month>
          <day>14</day>
          <volume>27</volume>
          <fpage>e64452</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2025//e64452/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/64452</pub-id>
          <pub-id pub-id-type="medline">40658983</pub-id>
          <pub-id pub-id-type="pii">v27i1e64452</pub-id>
          <pub-id pub-id-type="pmcid">PMC12279315</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bedi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Orr-Ewing</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Dash</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Koyejo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Callahan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fries</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Wornow</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Swaminathan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lehmann</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kashyap</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chaurasia</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NR</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tazbaz</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Milstein</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pfeffer</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
          </person-group>
          <article-title>Testing and evaluation of health care applications of large language models: a systematic review</article-title>
          <source>JAMA</source>
          <year>2025</year>
          <month>01</month>
          <day>28</day>
          <volume>333</volume>
          <issue>4</issue>
          <fpage>319</fpage>
          <lpage>28</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2024.21700</pub-id>
          <pub-id pub-id-type="medline">39405325</pub-id>
          <pub-id pub-id-type="pii">2825147</pub-id>
          <pub-id pub-id-type="pmcid">PMC11480901</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aghamaliyev</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Karimbayli</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zamparas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bösch</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Krautz</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kahlert</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schölch</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Angele</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Niess</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Guba</surname>
              <given-names>MO</given-names>
            </name>
            <name name-style="western">
              <surname>Werner</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ilmer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Renz</surname>
              <given-names>BW</given-names>
            </name>
          </person-group>
          <article-title>Bots in white coats: are large language models the future of patient education? A multicenter cross-sectional analysis</article-title>
          <source>Int J Surg</source>
          <year>2025</year>
          <month>03</month>
          <day>01</day>
          <volume>111</volume>
          <issue>3</issue>
          <fpage>2376</fpage>
          <lpage>84</lpage>
          <pub-id pub-id-type="doi">10.1097/JS9.0000000000002250</pub-id>
          <pub-id pub-id-type="medline">39878073</pub-id>
          <pub-id pub-id-type="pii">01279778-990000000-02111</pub-id>
          <pub-id pub-id-type="pmcid">PMC12372709</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Majnarić</surname>
              <given-names>LT</given-names>
            </name>
            <name name-style="western">
              <surname>Babič</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>O'Sullivan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Holzinger</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>AI and Big Data in healthcare: towards a more comprehensive research framework for multimorbidity</article-title>
          <source>J Clin Med</source>
          <year>2021</year>
          <month>02</month>
          <day>14</day>
          <volume>10</volume>
          <issue>4</issue>
          <fpage>766</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=jcm10040766"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/jcm10040766</pub-id>
          <pub-id pub-id-type="medline">33672914</pub-id>
          <pub-id pub-id-type="pii">jcm10040766</pub-id>
          <pub-id pub-id-type="pmcid">PMC7918668</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mangio</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jayan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ben-Dekhil</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dao-Tran</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Dendere</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Machine learning in geriatric care: a scoping review of models using multidimensional assessment data</article-title>
          <source>Int J Med Inform</source>
          <year>2026</year>
          <month>03</month>
          <day>01</day>
          <volume>207</volume>
          <fpage>106181</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1386-5056(25)00398-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2025.106181</pub-id>
          <pub-id pub-id-type="medline">41308275</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(25)00398-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Will</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zaretsky</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dowlath</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Testa</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Feldman</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Enhancing the readability of online patient education materials using large language models: cross-sectional study</article-title>
          <source>J Med Internet Res</source>
          <year>2025</year>
          <month>06</month>
          <day>04</day>
          <volume>27</volume>
          <fpage>e69955</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2025//e69955/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/69955</pub-id>
          <pub-id pub-id-type="medline">40465378</pub-id>
          <pub-id pub-id-type="pii">v27i1e69955</pub-id>
          <pub-id pub-id-type="pmcid">PMC12177420</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Performance of large language model in cross-specialty medical scenarios</article-title>
          <source>J Transl Med</source>
          <year>2025</year>
          <month>12</month>
          <day>22</day>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>211</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://translational-medicine.biomedcentral.com/articles/10.1186/s12967-025-07577-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12967-025-07577-x</pub-id>
          <pub-id pub-id-type="medline">41430263</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12967-025-07577-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC12903388</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Takita</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kabata</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Walston</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Tatekawa</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Saito</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tsujimoto</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Miki</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ueda</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>A systematic review and meta-analysis of diagnostic performance comparison between generative AI and physicians</article-title>
          <source>NPJ Digit Med</source>
          <year>2025</year>
          <month>03</month>
          <day>22</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>175</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-025-01543-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-025-01543-z</pub-id>
          <pub-id pub-id-type="medline">40121370</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-025-01543-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC11929846</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Karataş</surname>
              <given-names>Ö</given-names>
            </name>
            <name name-style="western">
              <surname>Demirci</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pota</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tuna</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Assessing ChatGPT's role in sarcopenia and nutrition: insights from a descriptive study on AI-driven solutions</article-title>
          <source>J Clin Med</source>
          <year>2025</year>
          <month>03</month>
          <day>05</day>
          <volume>14</volume>
          <issue>5</issue>
          <fpage>1747</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=jcm14051747"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/jcm14051747</pub-id>
          <pub-id pub-id-type="medline">40095876</pub-id>
          <pub-id pub-id-type="pii">jcm14051747</pub-id>
          <pub-id pub-id-type="pmcid">PMC11900272</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Erden</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Temel</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Bağcıer</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence insights into osteoporosis: assessing ChatGPT's information quality and readability</article-title>
          <source>Arch Osteoporos</source>
          <year>2024</year>
          <month>03</month>
          <day>19</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>17</fpage>
          <pub-id pub-id-type="doi">10.1007/s11657-024-01376-5</pub-id>
          <pub-id pub-id-type="medline">38499716</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11657-024-01376-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rotem</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zamstein</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Rottenstreich</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>O'Sullivan</surname>
              <given-names>OE</given-names>
            </name>
            <name name-style="western">
              <surname>O'reilly</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Weintraub</surname>
              <given-names>AY</given-names>
            </name>
          </person-group>
          <article-title>The future of patient education: a study on AI-driven responses to urinary incontinence inquiries</article-title>
          <source>Int J Gynaecol Obstet</source>
          <year>2024</year>
          <month>12</month>
          <volume>167</volume>
          <issue>3</issue>
          <fpage>1004</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1002/ijgo.15751</pub-id>
          <pub-id pub-id-type="medline">38944693</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Attanasio</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Fazio</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Antonini</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lanzano</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Obumselu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Prato</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Flutti</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Pacchioni</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Fregna</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Franchini</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Colombo</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Evaluating ChatGPT-generated psychoeducation for mood disorders: comparative insights from patients and mental health professionals</article-title>
          <source>J Psychiatr Res</source>
          <year>2026</year>
          <month>02</month>
          <volume>193</volume>
          <fpage>354</fpage>
          <lpage>60</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0022-3956(25)00742-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jpsychires.2025.11.039</pub-id>
          <pub-id pub-id-type="medline">41353963</pub-id>
          <pub-id pub-id-type="pii">S0022-3956(25)00742-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aguirre</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hilsabeck</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zou</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Assessing the quality of ChatGPT responses to dementia caregivers' questions: qualitative analysis</article-title>
          <source>JMIR Aging</source>
          <year>2024</year>
          <month>05</month>
          <day>06</day>
          <volume>7</volume>
          <fpage>e53019</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aging.jmir.org/2024//e53019/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/53019</pub-id>
          <pub-id pub-id-type="medline">38722219</pub-id>
          <pub-id pub-id-type="pii">v7i1e53019</pub-id>
          <pub-id pub-id-type="pmcid">PMC11089887</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Bruchem-Visser</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>van Dijk</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>de Beaufort</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Mattace-Raso</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Ethical frameworks for complex medical decision making in older patients: a narrative review</article-title>
          <source>Arch Gerontol Geriatr</source>
          <year>2020</year>
          <volume>90</volume>
          <fpage>104160</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0167-4943(20)30154-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.archger.2020.104160</pub-id>
          <pub-id pub-id-type="medline">32629372</pub-id>
          <pub-id pub-id-type="pii">S0167-4943(20)30154-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rentzeperi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Pegiou</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tsakiridis</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Kalogiannidis</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Kourtis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mamopoulos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Athanasiadis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dagklis</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Diagnosis and management of osteoporosis: a comprehensive review of guidelines</article-title>
          <source>Obstet Gynecol Surv</source>
          <year>2023</year>
          <month>11</month>
          <volume>78</volume>
          <issue>11</issue>
          <fpage>657</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1097/OGX.0000000000001181"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/OGX.0000000000001181</pub-id>
          <pub-id pub-id-type="medline">38134337</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cacciatore</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Calvani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Esposito</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Massaro</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gava</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Picca</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tosato</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Marzetti</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Landi</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Emerging targets and treatments for sarcopenia: a narrative review</article-title>
          <source>Nutrients</source>
          <year>2024</year>
          <month>09</month>
          <day>27</day>
          <volume>16</volume>
          <issue>19</issue>
          <fpage>3271</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=nu16193271"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/nu16193271</pub-id>
          <pub-id pub-id-type="medline">39408239</pub-id>
          <pub-id pub-id-type="pii">nu16193271</pub-id>
          <pub-id pub-id-type="pmcid">PMC11478655</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ferreira</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Nogueira</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Guimarães</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Araújo</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Anti-dementia drugs: what is the evidence in advanced stages?</article-title>
          <source>Porto Biomed J</source>
          <year>2024</year>
          <month>04</month>
          <day>29</day>
          <volume>9</volume>
          <issue>2</issue>
          <fpage>251</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38690178"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/j.pbj.0000000000000251</pub-id>
          <pub-id pub-id-type="medline">38690178</pub-id>
          <pub-id pub-id-type="pii">PBJ-D-22-00032</pub-id>
          <pub-id pub-id-type="pmcid">PMC11060217</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shahzad</surname>
              <given-names>UB</given-names>
            </name>
            <name name-style="western">
              <surname>Hanif</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Aiman</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>From bisphosphonates to advanced therapies: a critical review of osteoporosis treatment strategies</article-title>
          <source>Osteoporos Int</source>
          <year>2025</year>
          <month>05</month>
          <volume>36</volume>
          <issue>5</issue>
          <fpage>933</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1007/s00198-025-07457-6</pub-id>
          <pub-id pub-id-type="medline">40108018</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00198-025-07457-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Legendre</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Species associations: the Kendall coefficient of concordance revisited</article-title>
          <source>J Agric Biol Environ Stat</source>
          <year>2005</year>
          <month>6</month>
          <volume>10</volume>
          <fpage>226</fpage>
          <lpage>45</lpage>
          <pub-id pub-id-type="doi">10.1198/108571105X46642</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Svensson</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Different ranking approaches defining association and agreement measures of paired ordinal data</article-title>
          <source>Stat Med</source>
          <year>2012</year>
          <month>11</month>
          <day>20</day>
          <volume>31</volume>
          <issue>26</issue>
          <fpage>3104</fpage>
          <lpage>17</lpage>
          <pub-id pub-id-type="doi">10.1002/sim.5382</pub-id>
          <pub-id pub-id-type="medline">22714677</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schober</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Mascha</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Vetter</surname>
              <given-names>TR</given-names>
            </name>
          </person-group>
          <article-title>Statistics from A (agreement) to Z (z score): a guide to interpreting common measures of association, agreement, diagnostic accuracy, effect size, heterogeneity, and reliability in medical research</article-title>
          <source>Anesth Analg</source>
          <year>2021</year>
          <month>12</month>
          <day>01</day>
          <volume>133</volume>
          <issue>6</issue>
          <fpage>1633</fpage>
          <lpage>41</lpage>
          <pub-id pub-id-type="doi">10.1213/ANE.0000000000005773</pub-id>
          <pub-id pub-id-type="medline">34633993</pub-id>
          <pub-id pub-id-type="pii">00000539-202112000-00032</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tyumeneva</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sudorgina</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kislyonkova</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lebedeva</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Ordering motivation and Likert scale ratings: when a numeric scale is not necessarily better</article-title>
          <source>Front Psychol</source>
          <year>2022</year>
          <month>09</month>
          <day>23</day>
          <volume>13</volume>
          <fpage>942593</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36211886"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpsyg.2022.942593</pub-id>
          <pub-id pub-id-type="medline">36211886</pub-id>
          <pub-id pub-id-type="pmcid">PMC9539757</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bell</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Nadarevic</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Mieth</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Buchner</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The illusory-truth effect and its absence under accuracy-focused processing are robust across contexts of low and high advertising exposure</article-title>
          <source>Cogn Res Princ Implic</source>
          <year>2025</year>
          <month>05</month>
          <day>13</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>21</fpage>
          <pub-id pub-id-type="doi">10.1186/s41235-025-00628-3</pub-id>
          <pub-id pub-id-type="medline">40358856</pub-id>
          <pub-id pub-id-type="pii">10.1186/s41235-025-00628-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC12075062</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hassan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Barber</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>The effects of repetition frequency on the illusory truth effect</article-title>
          <source>Cogn Res Princ Implic</source>
          <year>2021</year>
          <month>05</month>
          <day>13</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>38</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33983553"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s41235-021-00301-5</pub-id>
          <pub-id pub-id-type="medline">33983553</pub-id>
          <pub-id pub-id-type="pii">10.1186/s41235-021-00301-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC8116821</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meneses-Echavez</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Bidonde</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Montesinos-Guevara</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Amer</surname>
              <given-names>YS</given-names>
            </name>
            <name name-style="western">
              <surname>Loaiza-Betancur</surname>
              <given-names>AF</given-names>
            </name>
            <name name-style="western">
              <surname>Tellez Tinjaca</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Fraile Navarro</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Poklepović Peričić</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tokalić</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bala</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Storman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Swierz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zając</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Flórez</surname>
              <given-names>ID</given-names>
            </name>
            <name name-style="western">
              <surname>Schünemann</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Flottorp</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Alonso-Coello</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Using evidence to decision frameworks led to guidelines of better quality and more credible and transparent recommendations</article-title>
          <source>J Clin Epidemiol</source>
          <year>2023</year>
          <month>10</month>
          <volume>162</volume>
          <fpage>38</fpage>
          <lpage>46</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0895-4356(23)00185-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jclinepi.2023.07.013</pub-id>
          <pub-id pub-id-type="medline">37517506</pub-id>
          <pub-id pub-id-type="pii">S0895-4356(23)00185-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Ballegooie</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hoang</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Assessment of the readability of online patient education material from major geriatric associations</article-title>
          <source>J Am Geriatr Soc</source>
          <year>2021</year>
          <month>04</month>
          <volume>69</volume>
          <issue>4</issue>
          <fpage>1051</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1111/jgs.16960</pub-id>
          <pub-id pub-id-type="medline">33236778</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Okuhara</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Furukawa</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Okada</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yokota</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kiuchi</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Readability of written information for patients across 30 years: a systematic review of systematic reviews</article-title>
          <source>Patient Educ Couns</source>
          <year>2025</year>
          <month>06</month>
          <volume>135</volume>
          <fpage>108656</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0738-3991(25)00023-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.pec.2025.108656</pub-id>
          <pub-id pub-id-type="medline">40068244</pub-id>
          <pub-id pub-id-type="pii">S0738-3991(25)00023-0</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
