<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR AI</journal-id><journal-id journal-id-type="publisher-id">ai</journal-id><journal-id journal-id-type="index">41</journal-id><journal-title>JMIR AI</journal-title><abbrev-journal-title>JMIR AI</abbrev-journal-title><issn pub-type="epub">2817-1705</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v5i1e82607</article-id><article-id pub-id-type="doi">10.2196/82607</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Assessment of the Modified Rankin Scale in Electronic Health Records With a Fine-Tuned Large Language Model: Development and Internal Validation</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Silva</surname><given-names>Luis</given-names></name><degrees>MD, MPH</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Milani</surname><given-names>Marcus</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Bindra</surname><given-names>Sohum</given-names></name><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ikramuddin</surname><given-names>Salman</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Tessmer</surname><given-names>Megan</given-names></name><degrees>BSN, RN</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Frederickson</surname><given-names>Kaylee</given-names></name><degrees>CCC-SLP</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Datta</surname><given-names>Abhigyan</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ergen</surname><given-names>Halil</given-names></name><degrees>PT, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Stangebye</surname><given-names>Alex</given-names></name><degrees>PT, DPT</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Cooper</surname><given-names>Dawson</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kumar</surname><given-names>Kompal</given-names></name><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yeung</surname><given-names>Jeremy</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lakshminarayan</surname><given-names>Kamakshi</given-names></name><degrees>MBBS, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Streib</surname><given-names>Christopher</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Neurology, University of Florida</institution><addr-line>1600 SW Archer Road</addr-line><addr-line>Gainesville</addr-line><addr-line>FL</addr-line><country>United States</country></aff><aff id="aff2"><institution>Department of Neurology, University of Minnesota</institution><addr-line>Minneapolis</addr-line><addr-line>MN</addr-line><country>United States</country></aff><aff id="aff3"><institution>Department of Physical Therapy, Gaziantep University</institution><addr-line>Gaziantep</addr-line><country>Turkey</country></aff><aff id="aff4"><institution>Department of Physical Therapy, M Health Fairview</institution><addr-line>Minneapolis</addr-line><addr-line>MN</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Liu</surname><given-names>Hongfang</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Bhatnagar</surname><given-names>Priyanshi</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Adeniyi</surname><given-names>Samson</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Chen</surname><given-names>Yirui</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Luis Silva, MD, MPH, Department of Neurology, University of Florida, 1600 SW Archer Road, Gainesville, FL, 32608, United States, 1 7633373761; <email>luis.silva@neurology.ufl.edu</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>25</day><month>2</month><year>2026</year></pub-date><volume>5</volume><elocation-id>e82607</elocation-id><history><date date-type="received"><day>18</day><month>08</month><year>2025</year></date><date date-type="rev-recd"><day>14</day><month>12</month><year>2025</year></date><date date-type="accepted"><day>18</day><month>01</month><year>2026</year></date></history><copyright-statement>&#x00A9; Luis Silva, Marcus Milani, Sohum Bindra, Salman Ikramuddin, Megan Tessmer, Kaylee Frederickson, Abhigyan Datta, Halil Ergen, Alex Stangebye, Dawson Cooper, Kompal Kumar, Jeremy Yeung, Kamakshi Lakshminarayan, Christopher Streib. Originally published in JMIR AI (<ext-link ext-link-type="uri" xlink:href="https://ai.jmir.org">https://ai.jmir.org</ext-link>), 25.2.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.ai.jmir.org/">https://www.ai.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://ai.jmir.org/2026/1/e82607"/><abstract><sec><title>Background</title><p>The modified Rankin scale (mRS) is an important metric in stroke research, often used as a primary outcome in clinical trials and observational studies. The mRS can be assessed retrospectively from electronic health records (EHRs), but this process is labor-intensive and prone to interrater variability. Large language models (LLMs) have demonstrated potential in automating text classification.</p></sec><sec><title>Objective</title><p>We aimed to create a fine-tuned LLM that can analyze EHR text and classify mRS scores for clinical and research applications.</p></sec><sec sec-type="methods"><title>Methods</title><p>We performed a retrospective cohort study of patients admitted to a specialist stroke neurology service at a large academic hospital system between August 2020 and June 2023. Each patient&#x2019;s medical record was reviewed at two time points: (1) at hospital discharge and (2) approximately 90 days post discharge. Two independent researchers assigned an mRS score at each time point. Two separate models were trained on EHR passages with corresponding mRS scores as labeled outcomes: (1) a multiclass model to classify all seven mRS scores and (2) a binary model to classify functional independence (mRS scores 0&#x2010;2) versus non-independence (mRS scores 3&#x2010;6). Four-fold cross-validation was conducted using accuracy and the Cohen &#x03BA; as model performance metrics.</p></sec><sec sec-type="results"><title>Results</title><p>A total of 2290 EHR passages with corresponding mRS scores were included in model training. The multiclass model&#x2014;considering all seven scores of the mRS&#x2014;attained an accuracy of 77% and a weighted Cohen &#x03BA; of 0.92. Class-specific accuracy was the highest for mRS score 4 (90%) and the lowest for mRS score 2 (28%). The binary model&#x2014;considering only functional independence versus non-independence&#x2014;attained an accuracy of 92% and a Cohen &#x03BA; of 0.84.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Our findings demonstrate that LLMs can be successfully trained to determine mRS scores through EHR text analysis; however, improving discrimination between intermediate scores is required.</p></sec></abstract><kwd-group><kwd>stroke</kwd><kwd>modified Rankin scale</kwd><kwd>artificial intelligence</kwd><kwd>large language model</kwd><kwd>machine learning</kwd><kwd>electronic health record</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>The modified Rankin scale (mRS) is an important metric in stroke research, often used as a primary outcome in clinical trials and observational studies [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. The mRS is scored from 0 (no symptoms) to 6 (death), with higher scores indicating greater disability. It is determined based on a patient&#x2019;s stroke deficits and ability to perform daily activities [<xref ref-type="bibr" rid="ref3">3</xref>]. It has also been used in pivotal stroke trials as a binary outcome comparing functional independence (scores 0&#x2010;2) versus non-independence (scores 3&#x2010;6) [<xref ref-type="bibr" rid="ref4">4</xref>]. In most instances, trained clinicians or researchers collect the mRS score in real time. Alternatively, it can be assessed retrospectively from electronic health records (EHRs), but this process is labor-intensive and prone to interrater variability [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref8">8</xref>]. These limitations constrain stroke research by making it dependent on the availability of trained research staff and prevents the use of existing clinical databases for research.</p><p>Large language models (LLMs), such as GPT-4 (OpenAI), are deep learning&#x2013;based models that perform well in text classification and generation [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. Their use in medical research is expanding, with notable examples in neuroscience. GPT-4 has demonstrated 84% accuracy in localizing neurologic lesions and has also achieved a passing grade on the American Board of Psychiatry and Neurology examination [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. However, GPT-4 performed poorly when assessing the Glasgow Coma Scale, the Intracranial Hemorrhage score, and the Hunt and Hess classifications [<xref ref-type="bibr" rid="ref13">13</xref>]. These studies did not involve fine-tuning, a process in which the base model is trained on task-specific data to enhance performance. Classification of mRS scores from EHR text has been previously studied by Fernandez et al [<xref ref-type="bibr" rid="ref14">14</xref>] who, using a non-LLM model, achieved 59% accuracy, limiting its practical application in both clinical and research settings.</p><p>We hypothesize that a fine-tuned LLM can analyze EHR text from inpatient and outpatient settings and classify mRS scores for clinical and research applications. We aimed to develop a tool capable of streamlining observational stroke research and reducing reliance on trained research staff.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>We performed a retrospective cohort study of patients evaluated by a stroke neurology service at a large academic hospital system between August 2020 and June 2023. Each patient&#x2019;s medical record was reviewed at two time points: (1) at hospital discharge and (2) approximately 90 days post discharge, with follow-up notes selected within a window of 30 to 120 days. To minimize variability in follow-up timing, researchers were instructed to evaluate clinical notes recorded as close as possible to 90 days post discharge. If no appropriate notes were available, they gradually expanded the search window in both directions, extending to a final range of 30 to 120 days post discharge.</p><p>At each time point, two independent researchers&#x2014;trained and certified in mRS assessment&#x2014;assigned an mRS score. The mRS scoring followed the Rankin Focused Assessment, a structured checklist that standardizes patient evaluation through a question-answer format. Researchers assigned scores by answering predefined questions about functional status and stroke-related deficits [<xref ref-type="bibr" rid="ref15">15</xref>]. Discrepancies in mRS scoring were resolved through discussion. If no consensus was reached, a third reviewer adjudicated the case.</p><p>Additionally, researchers identified and collected one corresponding EHR passage from the clinical note deemed critical for determining the mRS score. EHR passages were short, verbatim, and continuous text excerpts from a single clinical note. These typically originated from a physician, nursing, occupational therapy, or physical therapy note (<xref ref-type="table" rid="table1">Table 1</xref>). To prevent data leakage, direct mentions of mRS scores could not be included. EHR passages collected by researchers were used in their original form, without additional preprocessing or normalization beyond tokenization. This process generated the study&#x2019;s observational unit: a paired EHR passage and mRS score. Because multiple, distinct EHR passages can support a single mRS score, each patient could contribute up to four EHR passage&#x2013;mRS score pairs to the dataset: two from the discharge summary and two from the follow-up visit (one from each researcher at each time point). If the two researchers initially disagreed on the mRS score before reaching consensus, only the correctly adjudicated mRS score&#x2013;EHR passage was included in the dataset. If a consensus could not be reached on the mRS score, both observational units were excluded from the analysis. <xref ref-type="fig" rid="figure1">Figure 1A</xref> illustrates EHR text collection, mRS scoring, and data inclusion decisions for model training.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Model training and evaluation data characteristics.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Characteristics of the observational units</td><td align="left" valign="bottom">Discharge (n=1325)</td><td align="left" valign="bottom">Post discharge (n=966)</td></tr></thead><tbody><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Age (y), median (IQR)</td><td align="left" valign="top">71 (60-81)</td><td align="left" valign="top">70 (59-80)</td></tr><tr><td align="left" valign="top">Stroke type, n (%)</td><td align="left" valign="top" colspan="2"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ischemic</td><td align="left" valign="top">1116 (84.2)</td><td align="left" valign="top">844 (87.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hemorrhagic</td><td align="left" valign="top">133 (10.0)</td><td align="left" valign="top">79 (8.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Not a stroke</td><td align="left" valign="top">75 (5.7)</td><td align="left" valign="top">42 (4.3)</td></tr><tr><td align="left" valign="top">Days from hospitalization to when the original note was written, median (IQR)</td><td align="left" valign="top">1 (1-3)</td><td align="left" valign="top">79 (56-103)</td></tr><tr><td align="left" valign="top">Profession of the provider whose text was used, n (%)</td><td align="left" valign="top" colspan="2"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Nurse</td><td align="left" valign="top">151 (11.4)</td><td align="left" valign="top">128 (13.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Occupational therapist</td><td align="left" valign="top">321 (24.2)</td><td align="left" valign="top">79 (8.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Physician</td><td align="left" valign="top">361 (27.2)</td><td align="left" valign="top">520 (53.8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Physical therapist</td><td align="left" valign="top">398 (30.0)</td><td align="left" valign="top">79 (8.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other (eg, physician assistant, speech-language pathologist, or social worker)</td><td align="left" valign="top">84 (6.3)</td><td align="left" valign="top">155 (16.0)</td></tr><tr><td align="left" valign="top">mRS<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> score, n (%)</td><td align="left" valign="top" colspan="2"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>0</td><td align="left" valign="top">232 (17.5)</td><td align="left" valign="top">250 (25.9)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1</td><td align="left" valign="top">176 (13.3)</td><td align="left" valign="top">264 (27.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2</td><td align="left" valign="top">66 (5.0)</td><td align="left" valign="top">129 (13.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>3</td><td align="left" valign="top">183 (13.8)</td><td align="left" valign="top">166 (17.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>4</td><td align="left" valign="top">463 (34.9)</td><td align="left" valign="top">80 (8.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>5</td><td align="left" valign="top">132 (10.0)</td><td align="left" valign="top">43 (4.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>6</td><td align="left" valign="top">73 (5.5)</td><td align="left" valign="top">34 (3.5)</td></tr><tr><td align="left" valign="top">Dichotomous mRS score, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Functionally independent (mRS scores 0-2)</td><td align="left" valign="top">474 (36.7)</td><td align="left" valign="top">643 (67.6)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Functionally dependent (mRS scores 3-6)</td><td align="left" valign="top">851 (62.3)</td><td align="left" valign="top">323 (32.4)</td></tr><tr><td align="left" valign="top">Confidence score, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>5 - Answers a specific question of the Rankin Focused Assessment</td><td align="left" valign="top">442 (33.4)</td><td align="left" valign="top">206 (21.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>4 - Does not answer a specific question, but almost certain</td><td align="left" valign="top">596 (45.0)</td><td align="left" valign="top">338 (35.0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>3 - Between two scores</td><td align="left" valign="top">258 (19.5)</td><td align="left" valign="top">324 (33.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2 - Between three scores</td><td align="left" valign="top">30 (2.3)</td><td align="left" valign="top">81 (8.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1 - Guess</td><td align="left" valign="top">2 (0.2)</td><td align="left" valign="top">12 (1.2)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>mRS: modified Rankin scale.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>(A) Illustrative example of the data inclusion and exclusion process for each potential observational unit of a hypothetical patient. (B) Actual data inclusion and exclusion for large language model (LLM) training within our study cohort. mRS: modified Rankin scale.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e82607_fig01.png"/></fig></sec><sec id="s2-2"><title>Model Development</title><p>To develop our model, we used GatorTron-Base [<xref ref-type="bibr" rid="ref16">16</xref>], an existing clinical LLM containing 345 million parameters, which include deidentified clinical notes from the University of Florida Health System, deidentified clinical notes from MIMIC-III, and peer-reviewed medical research. This allows it to capture the nuances of EHR text, making it suitable for our research. This model was chosen based on its performance in early study data, compared with other publicly available EHR-based models. Details of this analysis are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>; the TRIPOD (Transparent Reporting of a Multivariable Prediction Model for individual Prognosis or Diagnosis) checklist is provided in <xref ref-type="supplementary-material" rid="app2">Checklist 1</xref>. All performance metrics were calculated as an average of four-fold cross-validation using 1000 bootstrapped samples to ensure reliable performance metrics. We fully fine-tuned GatorTron on our dataset to classify patients&#x2019; mRS scores based on the paired EHR passage. Fine-tuning is the process of further training a pretrained language model on a specific task&#x2014;in this case, classifying mRS scores from EHR passages&#x2014;to enable the model to adapt its general language understanding to the nuances of functional outcome assessment in stroke [<xref ref-type="bibr" rid="ref17">17</xref>].</p><p>Two separate models were trained: (1) a multiclass model to classify all seven mRS scores and (2) a binary model to classify functional independence (mRS scores 0&#x2010;2) versus non-independence (mRS scores 3&#x2010;6). Four-fold cross-validation was conducted, using accuracy and unweighted and weighted Cohen &#x03BA; as performance metrics.</p><p>Significant class imbalance was noted prior to model training; classes 2, 5, and 6 had less than 10% of the overall dataset, as seen in <xref ref-type="table" rid="table1">Table 1</xref>. Given this class imbalance, each mRS score was assigned training weights inversely proportional to its relative frequency in the overall data during model training to prevent overfitting.</p></sec><sec id="s2-3"><title>Ethical Considerations</title><p>This study was approved by the University of Minnesota&#x2014;Twin Cities Institutional Review Board (STUDY0001939). The requirement for informed consent was waived by the institutional review board. All study data were deidentified prior to analysis. Data were stored and managed using REDCap (Research Electronic Data Capture; Vanderbilt University).</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>A total of 878 patients were evaluated, contributing 2290 observational units (EHR passage&#x2013;mRS score pairs) to LLM training. Observational units were distributed across two stages: discharge (n=1325) and 90 days post discharge (n=966). This represented 75.1% (1325/1765) of the potential observational units at discharge and 54.7% (966/1765) of the potential observational units from the post-discharge period. <xref ref-type="fig" rid="figure1">Figure 1B</xref> details the process of the EHR passage&#x2013;mRS score pairs&#x2019; inclusion and exclusion at each time point. The demographics were similar between groups, with a median (IQR) age of 71 (60&#x2010;81) years versus 70 (59&#x2010;80) years, ischemic stroke proportions of 84.2% (1116/1325) versus 87.4% (844/966), and hemorrhagic stroke proportions of 10.0% (133/1325) versus 8.2% (79/966) in the discharge and post-discharge groups, respectively (<xref ref-type="table" rid="table1">Table 1</xref>). The median (IQR) time from hospitalization to discharge was 1 (1&#x2010;3) day and that from hospitalization to follow-up was 79 (56&#x2010;103) days.</p><p>The two-class model, which combined the mRS scores into two categories (mRS scores 0&#x2010;2 vs mRS scores 3&#x2010;6), had an accuracy of 92% (95% CI 91%&#x2010;93%) and a Cohen &#x03BA; of 0.85 (95% CI 0.83&#x2010;0.87); the model&#x2019;s confusion matrix is shown in <xref ref-type="fig" rid="figure2">Figure 2</xref>. The multiclass model, which included all seven categories of the mRS, achieved an accuracy of 77% (95% CI 76%&#x2010;79%), a Cohen &#x03BA; of 0.71 (95% CI 0.69&#x2010;0.73), and a weighted &#x03BA; of 0.92 (95% CI 0.90&#x2010;0.94). <xref ref-type="fig" rid="figure3">Figure 3</xref> represents the confusion matrix for this model and shows that the highest classification accuracy occurred for scores of 0 (90%) and 6 (99%). Misclassification was more common among intermediate scores, particularly scores of 2, where class accuracy was limited to 25%.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Confusion matrix of the binomial classification model as (A) absolute counts and (B) class accuracy. The model showed high accuracy for both classes.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e82607_fig02.png"/></fig><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Confusion matrix of the multiclass classification model as (A) absolute counts and (B) class accuracy. These patterns suggest less certainty in classes with moderate disability.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e82607_fig03.png"/></fig><p>Detailed performance metrics, including sensitivity (recall), specificity, positive predictive value (precision), and area under the receiver operating characteristic curve, are summarized in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>Greater variability was observed in the most common words for intermediate mRS scores versus extreme scores, which complicated their classification. This finding is illustrated by <xref ref-type="fig" rid="figure4">Figure 4</xref>, a heatmap of the most common words in each mRS strata.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Heatmap of the most common words extracted in the electronic health record (EHR) passage for each class at (A) discharge and (B) long-term follow-up. Keywords associated with extreme classes, modified Rankin scale (mRS) scores 0&#x2010;1, and mRS scores 5&#x2010;6 had higher frequency of common words.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e82607_fig04.png"/></fig></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings in the Context of Previous Research</title><p>The mRS has become a standard outcome measure in stroke clinical trials due to its ability to capture significant levels of disability while remaining intuitive and less time-intensive to ascertain compared to more detailed metrics [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. Automated tools capable of deriving mRS scores from EHRs could streamline clinical research and potentially enable large-scale analyses using existing databases that lack mRS outcome data.</p><p>Our multiclass model demonstrated an accuracy of 77%, marking a significant improvement over previously published machine learning models for mRS determination from the EHR, which reported an accuracy of 59% [<xref ref-type="bibr" rid="ref14">14</xref>]. Furthermore, our model also had a Cohen &#x03BA; of 0.71 and a weighted &#x03BA; of 0.92. The weighted &#x03BA;, which penalizes larger classification errors more heavily than smaller ones, further underscores the model&#x2019;s performance by indicating that most errors are near misses rather than substantial misclassifications. In a systematic review of stroke trials, human raters classified mRS scores with an average agreement accuracy of 73%, unweighted &#x03BA; of 0.62, and weighted &#x03BA; of 0.87 [<xref ref-type="bibr" rid="ref6">6</xref>]. This suggests our model performs comparably to human raters and could assist in real-world applications.</p><p>Our two-class model achieved an accuracy of 92% in differentiating patients who achieved functional independence (mRS scores 0&#x2010;2) from those with functional dependence (mRS scores 3&#x2010;6). This demonstrates that reducing the number of classes can significantly improve model performance, a key consideration depending on application.</p><p>We propose that LLMs could reduce the manual chart review burden in clinical trials and registries. For example, a current standard research practice is to have two researchers with mRS certification independently evaluate each case and then assess agreement, with further adjudication by a third researcher for ambiguous cases. Partnering a certified researcher with a validated LLM for the initial mRS determination would improve operational efficiency. Further advancements in LLMs could enable scalable, consistent mRS extraction from unstructured clinical text. High-end use case examples might include clinical decision support systems or integration into learning health systems to inform iterative, data-driven care pathways.</p><p>Although our findings hint at the vast potential of LLMs in clinical research and clinical care applications, our study also highlights important current limitations. During LLM training, class imbalance necessitated weight classes inversely proportional to frequency. Although this may improve accuracy for less frequent mRS scores, such as mRS scores of 5 and 6, the classification of mRS scores of 2 remained challenging. Misclassification, especially between mRS scores of 2 and 3 (which distinguishes patients with functional independence vs non-independence), could be problematic in both research (eg, incorrect conclusions regarding efficacy) and clinical (eg, inappropriate discharge disposition) applications. However, this finding may be more reflective of the limitations inherent in the mRS than our LLM. In a study of 7374 patients, mRS scores of 0&#x2010;2 showed narrow variability in Longshi scale and Barthel Index scores, whereas mRS scores of 2&#x2010;4 exhibited much broader variability, suggesting poorer differentiation of moderate and severe disability states [<xref ref-type="bibr" rid="ref19">19</xref>]. In future applications, hierarchical classification of patients using a pipeline of our binary model followed by our multiclass model could be used to mitigate the misclassification of scores 2 and 3. We did not implement it here due to limitations in dataset size. In addition, our dataset only included cases where mRS score consensus was achieved and excluded cases where EHR notes were ambiguous. While this design helped maintain a reliable dataset, excluding cases with poor documentation compromises the generalizability of our model to real-world applications.</p></sec><sec id="s4-2"><title>Conclusions</title><p>While promising, these models should currently be viewed as research-support tools to assist data abstraction rather than as stand-alone clinical instruments. Continued validation on multicenter datasets will be essential before clinical deployment. With further advancements of artificial intelligence in stroke research, fully automating mRS scoring from unstructured clinical text could integrate real-time outcome metrics into learning health systems.</p><p>Our findings, though preliminary, support the continued validation, investigation, and integration of LLMs into medical research and clinical care. They demonstrate that a fine-tuned LLM using EHR passages accurately classified 77% of mRS scores in the multiclass model (mRS scores 0&#x2010;6) and 92% of mRS scores in the two-class model (stratifying mRS scores 0&#x2010;2 vs mRS scores 3&#x2010;6). The multiclass model had the most difficulty differentiating between mRS scores of 2 and 3.</p></sec></sec></body><back><ack><p>The authors would like to thank the Department of Neurology and the School of Public Health at the University of Minnesota for their invaluable efforts in making this research possible.</p><p>The authors acknowledge the use of ChatGPT (OpenAI) as a supportive tool to assist with the refinement of manuscript structure and aspects of code organization. All content was reviewed and verified by the authors.</p></ack><notes><sec><title>Funding</title><p>LS obtained a grant from the National Institutes of Health (NIH) National Institute of Neurological Disorders and Stroke StrokeNet fellowship. JY obtained the University of Minnesota Data Science Initiative Seed Grant. HE obtained funding from the Tubitak 2219 scholarship program. KL is supported by grant K24AG078506.</p></sec><sec><title>Data Availability</title><p>The datasets generated or analyzed during the current study are not publicly available due to the presence of protected health information in the electronic health records. Deidentified data and code may be made available by the corresponding author upon reasonable request and with appropriate institutional approvals.</p></sec></notes><fn-group><fn fn-type="con"><p>LS was responsible for conceptualization, database architecture, data collection, model fine-tuning, statistical analysis, and primary manuscript authorship. SB and MM contributed substantially to data collection and assisted with minor manuscript editing. SI participated in conceptualization and manuscript editing. MT, KF, AD, HE, AS, DC, and KK contributed significantly to data collection. JY provided support in machine learning conceptualization, coding assistance, and model training. KL and CS served as mentors and provided guidance across all stages of the project, including conceptualization, methodology, analysis, and manuscript preparation.</p></fn><fn fn-type="other"><label>Disclosures</label><p>The authors declare no conflicts of interest relevant to this study.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">EHR</term><def><p>electronic health record</p></def></def-item><def-item><term id="abb2">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb3">mRS</term><def><p>modified Rankin scale</p></def></def-item><def-item><term id="abb4">REDCap</term><def><p>Research Electronic Data Capture</p></def></def-item><def-item><term id="abb5">TRIPOD</term><def><p>Transparent Reporting of a Multivariable Prediction Model for individual Prognosis or Diagnosis</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Abdalkader</surname><given-names>M</given-names> </name><name name-style="western"><surname>Siegler</surname><given-names>JE</given-names> </name><etal/></person-group><article-title>Mechanical thrombectomy for large ischemic stroke: a systematic review and meta-analysis</article-title><source>Neurology (ECronicon)</source><year>2023</year><month>08</month><day>29</day><volume>101</volume><issue>9</issue><fpage>e922</fpage><lpage>e932</lpage><pub-id pub-id-type="doi">10.1212/WNL.0000000000207536</pub-id><pub-id pub-id-type="medline">37277200</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hendrix</surname><given-names>P</given-names> </name><name name-style="western"><surname>Collins</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Griessenauer</surname><given-names>CJ</given-names> </name><etal/></person-group><article-title>Tenecteplase versus alteplase before mechanical thrombectomy: experience from a US healthcare system undergoing a system-wide transition of primary thrombolytic</article-title><source>J Neurointerv Surg</source><year>2023</year><month>11</month><volume>15</volume><issue>e2</issue><fpage>e277</fpage><lpage>e281</lpage><pub-id pub-id-type="doi">10.1136/jnis-2022-019662</pub-id><pub-id pub-id-type="medline">36414389</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Broderick</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Adeoye</surname><given-names>O</given-names> </name><name name-style="western"><surname>Elm</surname><given-names>J</given-names> </name></person-group><article-title>Evolution of the modified Rankin scale and its use in future stroke trials</article-title><source>Stroke</source><year>2017</year><month>07</month><volume>48</volume><issue>7</issue><fpage>2007</fpage><lpage>2012</lpage><pub-id pub-id-type="doi">10.1161/STROKEAHA.117.017866</pub-id><pub-id pub-id-type="medline">28626052</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Berkhemer</surname><given-names>OA</given-names> </name><name name-style="western"><surname>Fransen</surname><given-names>PSS</given-names> </name><name name-style="western"><surname>Beumer</surname><given-names>D</given-names> </name><etal/></person-group><article-title>A randomized trial of intraarterial treatment for acute ischemic stroke</article-title><source>N Engl J Med</source><year>2015</year><month>01</month><day>1</day><volume>372</volume><issue>1</issue><fpage>11</fpage><lpage>20</lpage><pub-id pub-id-type="doi">10.1056/NEJMoa1411587</pub-id><pub-id pub-id-type="medline">25517348</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sucharew</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kleindorfer</surname><given-names>D</given-names> </name><name name-style="western"><surname>Khoury</surname><given-names>JC</given-names> </name><etal/></person-group><article-title>Deriving place of residence, modified Rankin scale, and EuroQol-5D scores from the medical record for stroke survivors</article-title><source>Cerebrovasc Dis</source><year>2021</year><volume>50</volume><issue>5</issue><fpage>567</fpage><lpage>573</lpage><pub-id pub-id-type="doi">10.1159/000516571</pub-id><pub-id pub-id-type="medline">34107479</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Quinn</surname><given-names>TJ</given-names> </name><name name-style="western"><surname>Dawson</surname><given-names>J</given-names> </name><name name-style="western"><surname>Walters</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Lees</surname><given-names>KR</given-names> </name></person-group><article-title>Exploring the reliability of the modified Rankin scale</article-title><source>Stroke</source><year>2009</year><month>03</month><volume>40</volume><issue>3</issue><fpage>762</fpage><lpage>766</lpage><pub-id pub-id-type="doi">10.1161/STROKEAHA.108.522516</pub-id><pub-id pub-id-type="medline">19131664</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Banks</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Marotta</surname><given-names>CA</given-names> </name></person-group><article-title>Outcomes validity and reliability of the modified Rankin scale: implications for stroke clinical trials: a literature review and synthesis</article-title><source>Stroke</source><year>2007</year><month>03</month><volume>38</volume><issue>3</issue><fpage>1091</fpage><lpage>1096</lpage><pub-id pub-id-type="doi">10.1161/01.STR.0000258355.23810.c6</pub-id><pub-id pub-id-type="medline">17272767</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhao</surname><given-names>H</given-names> </name><name name-style="western"><surname>Collier</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Quah</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Purvis</surname><given-names>T</given-names> </name><name name-style="western"><surname>Bernhardt</surname><given-names>J</given-names> </name></person-group><article-title>The modified Rankin Scale in acute stroke has good inter-rater-reliability but questionable validity</article-title><source>Cerebrovasc Dis</source><year>2010</year><month>01</month><volume>29</volume><issue>2</issue><fpage>188</fpage><lpage>193</lpage><pub-id pub-id-type="doi">10.1159/000267278</pub-id><pub-id pub-id-type="medline">20029188</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Vaswani</surname><given-names>A</given-names> </name><name name-style="western"><surname>Shazeer</surname><given-names>N</given-names> </name><name name-style="western"><surname>Parmar</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Attention is all you need</article-title><source>arXiv</source><comment>Preprint posted online on  Aug 2, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.1706.03762</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Achiam</surname><given-names>J</given-names> </name><name name-style="western"><surname>Adler</surname><given-names>S</given-names> </name><name name-style="western"><surname>Agarwal</surname><given-names>S</given-names> </name><etal/></person-group><article-title>GPT-4 technical report</article-title><source>arXiv</source><comment>Preprint posted online on  Mar 4, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2303.08774</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Choi</surname><given-names>E</given-names> </name><name name-style="western"><surname>McDougal</surname><given-names>R</given-names> </name><name name-style="western"><surname>Lytton</surname><given-names>WW</given-names> </name></person-group><article-title>GPT-4 performance for neurologic localization</article-title><source>Neurol Clin Pract</source><year>2024</year><month>06</month><volume>14</volume><issue>3</issue><fpage>e200293</fpage><pub-id pub-id-type="doi">10.1212/CPJ.0000000000200293</pub-id><pub-id pub-id-type="medline">38596779</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schubert</surname><given-names>MC</given-names> </name><name name-style="western"><surname>Wick</surname><given-names>W</given-names> </name><name name-style="western"><surname>Venkataramani</surname><given-names>V</given-names> </name></person-group><article-title>Performance of large language models on a neurology board-style examination</article-title><source>JAMA Netw Open</source><year>2023</year><month>12</month><day>1</day><volume>6</volume><issue>12</issue><fpage>e2346721</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2023.46721</pub-id><pub-id pub-id-type="medline">38060223</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>TC</given-names> </name><name name-style="western"><surname>Kaminski</surname><given-names>E</given-names> </name><name name-style="western"><surname>Koduri</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Chat GPT as a neuro-score calculator: analysis of a large language model&#x2019;s performance on various neurological exam grading scales</article-title><source>World Neurosurg</source><year>2023</year><month>11</month><volume>179</volume><fpage>e342</fpage><lpage>e347</lpage><pub-id pub-id-type="doi">10.1016/j.wneu.2023.08.088</pub-id><pub-id pub-id-type="medline">37634667</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fernandes</surname><given-names>MB</given-names> </name><name name-style="western"><surname>Valizadeh</surname><given-names>N</given-names> </name><name name-style="western"><surname>Alabsi</surname><given-names>HS</given-names> </name><etal/></person-group><article-title>Classification of neurologic outcomes from medical notes using natural language processing</article-title><source>Expert Syst Appl</source><year>2023</year><month>03</month><day>15</day><volume>214</volume><fpage>119171</fpage><pub-id pub-id-type="doi">10.1016/j.eswa.2022.119171</pub-id><pub-id pub-id-type="medline">36865787</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Saver</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Filip</surname><given-names>B</given-names> </name><name name-style="western"><surname>Hamilton</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Improving the reliability of stroke disability grading in clinical trials and clinical practice: the Rankin Focused Assessment (RFA)</article-title><source>Stroke</source><year>2010</year><month>05</month><volume>41</volume><issue>5</issue><fpage>992</fpage><lpage>995</lpage><pub-id pub-id-type="doi">10.1161/STROKEAHA.109.571364</pub-id><pub-id pub-id-type="medline">20360551</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>A</given-names> </name><name name-style="western"><surname>PourNejatian</surname><given-names>N</given-names> </name><etal/></person-group><article-title>GatorTron: a large clinical language model to unlock patient information from unstructured electronic health records</article-title><source>arXiv</source><comment>Preprint posted online on  Dec 16, 2022</comment><pub-id pub-id-type="doi">10.48550/arXiv.2203.03540</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>A</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Weng</surname><given-names>C</given-names> </name></person-group><article-title>Fine-tuning large language models for rare disease concept normalization</article-title><source>J Am Med Inform Assoc</source><year>2024</year><month>09</month><day>1</day><volume>31</volume><issue>9</issue><fpage>2076</fpage><lpage>2083</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocae133</pub-id><pub-id pub-id-type="medline">38829731</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Quinn</surname><given-names>TJ</given-names> </name><name name-style="western"><surname>Dawson</surname><given-names>J</given-names> </name><name name-style="western"><surname>Walters</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Lees</surname><given-names>KR</given-names> </name></person-group><article-title>Functional outcome measures in contemporary stroke trials</article-title><source>Int J Stroke</source><year>2009</year><month>06</month><volume>4</volume><issue>3</issue><fpage>200</fpage><lpage>205</lpage><pub-id pub-id-type="doi">10.1111/j.1747-4949.2009.00271.x</pub-id><pub-id pub-id-type="medline">19659822</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kwon</surname><given-names>S</given-names> </name><name name-style="western"><surname>Hartzema</surname><given-names>AG</given-names> </name><name name-style="western"><surname>Duncan</surname><given-names>PW</given-names> </name><name name-style="western"><surname>Min-Lai</surname><given-names>S</given-names> </name></person-group><article-title>Disability measures in stroke: relationship among the Barthel Index, the Functional Independence Measure, and the modified Rankin scale</article-title><source>Stroke</source><year>2004</year><month>04</month><volume>35</volume><issue>4</issue><fpage>918</fpage><lpage>923</lpage><pub-id pub-id-type="doi">10.1161/01.STR.0000119385.56094.32</pub-id><pub-id pub-id-type="medline">14976324</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Model information and detailed model results.</p><media xlink:href="ai_v5i1e82607_app1.docx" xlink:title="DOCX File, 446 KB"/></supplementary-material><supplementary-material id="app2"><label>Checklist 1</label><p>The TRIPOD checklist.</p><media xlink:href="ai_v5i1e82607_app2.docx" xlink:title="DOCX File, 21 KB"/></supplementary-material></app-group></back></article>