<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR AI</journal-id>
      <journal-title>JMIR AI</journal-title>
      <issn pub-type="epub">2817-1705</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v5i1e88356</article-id>
      <article-id pub-id-type="pmid">42284574</article-id>
      <article-id pub-id-type="doi">10.2196/88356</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Knowledge-Augmented Large Language Model for Multimodal Electronic Health Record–Based Risk Prediction: Development and Validation Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Zelko</surname>
            <given-names>Sofia</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Luo</surname>
            <given-names>Gang</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Hou</surname>
            <given-names>Jiacheng</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chiu</surname>
            <given-names>Hung-Wen</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Datta</surname>
            <given-names>Rituparna</given-names>
          </name>
          <degrees>ME</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0002-1830-614X</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Cui</surname>
            <given-names>Jiaming</given-names>
          </name>
          <degrees>PhD, Prof Dr</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2685-2776</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Guan</surname>
            <given-names>Zihan</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0331-3403</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Reddy</surname>
            <given-names>Vishal</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0008-3027-9678</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Eby</surname>
            <given-names>Joshua</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9683-1073</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Madden</surname>
            <given-names>Gregory R</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5951-3156</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Silwal</surname>
            <given-names>Rupesh</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4972-0602</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Vullikanti</surname>
            <given-names>Anil</given-names>
          </name>
          <degrees>PhD, Prof Dr</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>University of Virginia</institution>
            <addr-line>946 Grady Ave</addr-line>
            <addr-line>Charlottesville, VA, 22903</addr-line>
            <country>United States</country>
            <phone>1 5405773102</phone>
            <email>vsakumar@virginia.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8597-6197</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>University of Virginia</institution>
        <addr-line>Charlottesville, VA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Virginia Tech</institution>
        <addr-line>Blacksburg, VA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>School of Medicine</institution>
        <institution>University of Virginia</institution>
        <addr-line>Charlottesville, VA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Medicine</institution>
        <institution>University of Virginia</institution>
        <addr-line>Charlottesville, VA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Anil Vullikanti <email>vsakumar@virginia.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2026</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>12</day>
        <month>6</month>
        <year>2026</year>
      </pub-date>
      <volume>5</volume>
      <elocation-id>e88356</elocation-id>
      <history>
        <date date-type="received">
          <day>24</day>
          <month>11</month>
          <year>2025</year>
        </date>
        <date date-type="rev-request">
          <day>11</day>
          <month>2</month>
          <year>2026</year>
        </date>
        <date date-type="rev-recd">
          <day>6</day>
          <month>4</month>
          <year>2026</year>
        </date>
        <date date-type="accepted">
          <day>7</day>
          <month>4</month>
          <year>2026</year>
        </date>
      </history>
      <copyright-statement>©Rituparna Datta, Jiaming Cui, Zihan Guan, Vishal Reddy, Joshua Eby, Gregory R Madden, Rupesh Silwal, Anil Vullikanti. Originally published in JMIR AI (https://ai.jmir.org), 12.06.2026.</copyright-statement>
      <copyright-year>2026</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on https://www.ai.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://ai.jmir.org/2026/1/e88356" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Accurate clinical outcome prediction using electronic health records (EHRs) is crucial for patient care and resource allocation. EHRs include both structured data and rich, unstructured clinical notes. However, prior machine learning methods struggle with the multimodality, long context of notes, and severe class imbalance in clinical tasks.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to introduce and evaluate KAMELEON (Knowledge-Augmented Multimodal EHR Learning for Outcome Prediction), a unified, 2-stage hybrid framework that integrates diverse EHR modalities and external biomedical knowledge to enhance clinical risk prediction.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This study used the publicly available, deidentified Medical Information Mart for Intensive Care-III dataset, which includes structured and unstructured data for over 40,000 intensive care unit patients. The 2 tasks studied were 30-day readmission (403/10,031, 4% positive rate) and in-hospital mortality prediction (2423/17,903, 13% positive rate). Train-test splits were patient-disjoint (80:20). Performance was evaluated against general and medical large language models (LLMs) and structured baselines. Key metrics included the area under the receiver operating characteristic curve (AUROC), area under the precision-recall curve (AUPRC), and macro <italic>F</italic><sub>1</sub>-score.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The KAMELEON framework consistently outperformed all existing baselines. 30-day readmission: the KAMELEON-balanced random forests model achieved an AUROC of 0.85 and a sensitivity (recall) of 0.79. Ablation analysis shows the critical role of the LLM-generated reasoning, with its removal causing the AUROC to drop from 0.85 to 0.7 and sensitivity to fall by over 80%. In-hospital mortality: the KAMELEON-extreme gradient boosting model achieved an AUROC of 0.92 and an AUPRC of 0.650. Unstructured-only models showed limited ability to discern mortality, with AUROC values near chance (around 0.51-0.53).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>To our knowledge, KAMELEON represents one of the first systematic frameworks to enhance LLMs for health care prediction through graph-guided knowledge retrieval combined with structured machine learning. The framework demonstrates superior performance across both prediction tasks, highlighting the synergistic value of combining diverse data modalities and LLM reasoning for robust clinical risk estimation.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>biomedical knowledge graphs</kwd>
        <kwd>clinical risk prediction</kwd>
        <kwd>EHR</kwd>
        <kwd>electronic health records</kwd>
        <kwd>knowledge-augmented reasoning</kwd>
        <kwd>large language models</kwd>
        <kwd>machine learning</kwd>
        <kwd>multimodal data integration</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Appropriate use of clinical prediction tools for early identification of high-risk patients for different conditions allows for clinical decision-making, timely interventions, escalation of care, intensive monitoring, and identification of gaps in outpatient management [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. For instance, readmission within a short period is a priority under many regulatory frameworks and value-based care models, where high readmission rates may lead to financial penalties [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. Therefore, effective models of short-term risk prediction (eg, 30 days) can guide targeted interventions, including more detailed discharge instructions, closer postdischarge monitoring, or referrals to transitional care programs. While traditional prediction tools relied on simple statistical models, for example, regression and decision trees [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>], for risk assessment, more complex machine learning (ML) methods are increasingly applied to clinical prediction tasks [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. In this work, we focus on two commonly studied clinical problems: (1) 30-day readmission prediction, which determines whether a patient will be readmitted to the hospital within 30 days after discharge, and (2) mortality prediction, which determines the patient’s in-hospital mortality status.</p>
      <p>There has been a lot of work on developing diverse kinds of ML methods for these problems using electronic health record (EHR) data, which contain rich information on patient health [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref18">18</xref>]. Most of this work focuses on structured EHR data, which includes admission/discharge information, procedures and interventions, medications, laboratory orders and results, billing codes (eg, <italic>ICD</italic> [<italic>International Classification of Diseases</italic>] and Current Procedural Terminology), and physiological time-series (eg, vital signs). While imaging data and clinical documentation (such as progress notes or discharge summaries) represent unstructured data sources, they have been underused in prior clinical prediction models or processed in overly simplified ways (eg, bag-of-words representations). Unstructured data from clinical notes have also been used in a fairly simple manner, such as bag of words or term frequency-inverse document frequency representations [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>], to facilitate the use of conventional ML methods for clinical tasks. Clinical notes are complex and poorly structured, which limits their use in clinical informatics tasks, even when using advanced natural language processing techniques. While large language models (LLMs) offer a powerful means to process such notes, especially when combined with large biomedical datasets to capture richer semantics beyond keywords and embeddings [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref23">23</xref>], they still face significant limitations, such as hallucinations, factual inaccuracies, and inadequate domain grounding [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. For instance, models such as Med-PaLM [<xref ref-type="bibr" rid="ref23">23</xref>] exhibit strong language generation capabilities but frequently misinterpret similar-sounding medical terms. Recent approaches have attempted to enhance LLMs with structured knowledge via graph-based retrieval (eg, GraphRAG), but their performance remains limited due to a lack of explicit reasoning [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]. In recent work, Jiang et al [<xref ref-type="bibr" rid="ref15">15</xref>] developed Knowledge Aware Reasoning-Enhanced HealthCare Prediction (KARE), a GraphRAG and context augmentation approach, for clinical prediction tasks, which address many challenges associated with using LLMs for clinical tasks on the Medical Information Mart for Intensive Care (MIMIC) dataset.</p>
      <p>However, the performance of all prior methods remains limited because clinical tasks using EHRs present many nontrivial challenges: (1) multimodality of clinical data: the presence of both structured data and unstructured text requires methods capable of effectively learning from both modalities. (2) Long-context textual data: clinical text often contains a mix of specialized medical terminology and informal or colloquial expressions, making information retrieval challenging. (3) Severe class imbalance: prediction tasks are typically highly imbalanced. For example, only about 4% (403/10,031) of patients are readmitted within 30 days, resulting in heavily skewed training data.</p>
      <p>Here, we develop a novel framework, KAMELEON (Knowledge-Augmented Multimodal EHR Learning for Outcome Prediction), that addresses the limitations of prior work by integrating multimodal EHR data (including structured clinical components and unstructured physician notes) and external biomedical knowledge. We refer to our approach as a “knowledge-augmented LLM,” as its predictions and reasoning are systematically enriched using external biomedical knowledge retrieved from a domain-specific knowledge graph (KG) constructed from the Unified Medical Language System (UMLS), PubMed abstracts, and LLM-generated entity-relation triples. KAMELEON consists of 2 components (as shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
      <list list-type="order">
        <list-item>
          <p>An unstructured model (<italic>M</italic><sub>1</sub>) that processes clinical notes and retrieves relevant biomedical knowledge using a PubMed-derived graph and knowledge-augmented reasoning, and outputs a prediction for a patient, along with its reasoning; this extends the approach of Jiang et al [<xref ref-type="bibr" rid="ref15">15</xref>]. Physician notes in EHRs can be lengthy and exceed the context window, and these are summarized using an LLM and used as context. To introduce domain-level medical knowledge, we build a biomedical KG by combining the UMLS [<xref ref-type="bibr" rid="ref28">28</xref>], PubMed abstracts, and LLM-generated entity-relation triples. KG is partitioned into semantically coherent and well-connected clusters, and the textual summary generated by an LLM for the most relevant clusters for each patient cluster is used to enrich the context. Furthermore, labeled context is added by identifying semantically similar patient visits, which is used to fine-tune the LLM. Finally, <italic>M</italic><sub>1</sub> produces a prediction for the patient, along with a reasoning.</p>
        </list-item>
        <list-item>
          <p>A structured model (<italic>M</italic><sub>2</sub>), which extracts structured features from the patient’s EHR for the stay, including (1) static demographic and admission data, (2) time-varying vitals (which are normalized and summarized, when used as features), and (3) diagnoses, procedures, and medications. In addition,
         <italic>M</italic><sub>2</sub> includes the LLM’s prediction and its tokenized reasoning transformed into an embedding, as inputs. Finally, different kinds of standard ML methods are trained in
           <italic>M</italic><sub>2</sub> using these inputs. We first train
          <italic>M</italic><sub>1</sub> separately and use the LLM outputs to train
          <italic>M</italic><sub>2</sub>.</p>
        </list-item>
      </list>
      <p>We demonstrate the effectiveness of KAMELEON for the 30-day readmission risk and mortality prediction tasks, which have been studied extensively, both using MIMIC-III and other private EHRs from specific hospitals. We compare the performance with a number of structured ML and LLM baselines, with respect to multiple metrics, KAMELEON consistently outperforms all prior work on MIMIC-III datasets. It also shows clear gains over the strongest unstructured LLM baseline (LLaMA3-Med42-8B). The only other prior work that has similar performance for 30-day readmission [<xref ref-type="bibr" rid="ref29">29</xref>] is on a Norwegian EHR dataset, which is significantly less imbalanced (5936/35,591, 16<italic>.</italic>7% readmission positive rate, instead of 403/10,031, 4% in the case of MIMIC-III; <xref ref-type="table" rid="table1">Table 1</xref>).</p>
      <p>In summary, KAMELEON is the first systematic framework to enhance the power of LLMs for health care prediction tasks through graph-guided knowledge retrieval combined with structured ML methods. We expect this framework to be readily applicable to other clinical questions beyond those examined in this study.</p>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>A 2-stage hybrid framework for predictive tasks, integrating structured and unstructured patient data with large language models (LLMs). Step 1 (M1) focuses on knowledge-enhanced context generation with an initial LLM output, while Step 2 (M2) integrates the fine-tuned LLM outputs with structured data by creating an embedding for final machine learning prediction. KG: knowledge graph.</p>
        </caption>
        <graphic xlink:href="ai_v5i1e88356_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>Dataset statistics for mortality and readmission prediction tasks. Positive denotes that the target outcome occurred.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="250"/>
          <col width="0"/>
          <col width="340"/>
          <col width="0"/>
          <col width="380"/>
          <thead>
            <tr valign="top">
              <td colspan="3">Task and split</td>
              <td colspan="2">Samples, n</td>
              <td>Positive, %</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="6">
                <bold>In-hospital</bold>
                <bold>mortality</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Train</td>
              <td colspan="2">17,903</td>
              <td colspan="2">13<italic>.</italic>53</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Test</td>
              <td colspan="2">3236</td>
              <td colspan="2">11<italic>.</italic>55</td>
            </tr>
            <tr valign="top">
              <td colspan="6">
                <bold>Readmission</bold>
                <bold>in</bold>
                <bold>30</bold>
                <bold>days</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Train</td>
              <td colspan="2">10,031</td>
              <td colspan="2">4<italic>.</italic>01</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Test</td>
              <td colspan="2">2425</td>
              <td colspan="2">3<italic>.</italic>80</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Background</title>
        <sec>
          <title>Clinical Datasets</title>
          <p>MIMIC-III data used in this study include both structured and unstructured data collected across patient visits, capturing the patient’s condition over time.</p>
          <list list-type="bullet">
            <list-item>
              <p>Structured data includes standardized fields such as laboratory results, vital signs, and demographic attributes (eg, age, sex, and ethnicity). These are typically numeric or categorical and readily usable for statistical modeling.</p>
            </list-item>
            <list-item>
              <p>Unstructured data consists of free-text clinical documentation such as physician notes, as well as patient conditions, diagnoses, and prescribed medications.</p>
            </list-item>
          </list>
          <p>Additional datasets include PubMed and the UMLS.</p>
        </sec>
        <sec>
          <title>Problem Statements</title>
          <p>To demonstrate the effectiveness of our method, we study 2 popular clinical tasks: in-hospital mortality prediction and 30-day readmission prediction [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. We define these problems formally after introducing some notation.</p>
        </sec>
      </sec>
      <sec>
        <title>Notation</title>
        <sec>
          <title>Overview</title>
          <p>We use <italic>v<sub>i</sub></italic> to denote a hospital visit by a patient. For each visit <italic>v<sub>i</sub></italic>, the patient is associated with a set of medical data, <italic>D<sub>i</sub></italic> = <italic>D<sub>i</sub></italic><sup>struct</sup> <italic>∪ D<sub>i</sub></italic><sup>unstruct</sup>, comprising both structured information <italic>D<sub>i</sub></italic><sup>struct</sup> (eg, codes, vitals, and laboratory results) and unstructured information <italic>D</italic><sup>unstruct</sup> (eg, clinical free-text notes). Our goal is to build a model <italic>f<sub>θ</sub></italic> that predicts a patient’s target status based on their historical visit information, specifically, <italic>y</italic>ˆ<italic><sub>i</sub></italic> = <italic>f<sub>θ</sub></italic> (<italic>D<sub>i</sub></italic>).</p>
        </sec>
        <sec>
          <title>30-Day Readmission Problem</title>
          <p>The objective is to determine whether the patient is readmitted to the hospital within 30 days following discharge from visit <italic>v<sub>i</sub></italic>. We define the readmission indicator <italic>y<sup>readm</sup></italic></p>
          <disp-formula>
            <graphic xlink:href="ai_v5i1e88356_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>The goal is to develop a predictive model that estimates <italic>y<sup>readm</sup></italic> using all structured and unstructured data from <italic>v<sub>i</sub></italic></p>
        </sec>
        <sec>
          <title>Mortality Prediction Problem</title>
          <p>Given the complete set of information for a visit <italic>v<sub>i</sub></italic>, the objective is to determine the patient’s in-hospital mortality status, denoted as <italic>y<sup>mort</sup></italic>, where:</p>
          <disp-formula>
            <graphic xlink:href="ai_v5i1e88356_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>The goal is to develop a predictive model that accurately estimates <italic>y<sup>mort</sup></italic> based on all available structured and unstructured clinical data associated with visit <italic>v<sub>i</sub></italic>.</p>
        </sec>
      </sec>
      <sec>
        <title>KAMELEON Framework</title>
        <sec>
          <title>Overview</title>
          <p>We propose a hybrid framework, KAMELEON, that integrates multimodal EHR data, including structured clinical components and unstructured physician notes, and external biomedical knowledge to predict 2 key clinical outcomes: in-hospital mortality and 30-day readmission. As shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>, KAMELEON consists of two components: (1) an unstructured encoder <italic>M</italic><sub>1</sub> that processes clinical notes and retrieves relevant biomedical knowledge using a PubMed-derived graph and knowledge-augmented reasoning, and outputs a prediction, along with its reasoning; and (2) a structured encoder <italic>M</italic><sub>2</sub> that combines multiple time-series corresponding to vitals and tabular datasets (laboratories, medications, etc), along with the outputs from <italic>M</italic><sub>1</sub> (ie, both the prediction and the embedding associated with the reasoning it produces) with static features for downstream prediction. The notations used in algorithms 1 and 2 are explained in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Summary of notation used in the framework.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="230"/>
              <col width="770"/>
              <thead>
                <tr valign="top">
                  <td>Symbol</td>
                  <td>Description</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td><sub><italic>X</italic></sub>struct</td>
                  <td>Structured clinical features</td>
                </tr>
                <tr valign="top">
                  <td><sub><italic>X</italic></sub>unstruct</td>
                  <td>Clinical free-text notes (eg, physician notes)</td>
                </tr>
                <tr valign="top">
                  <td><sub><italic>X</italic></sub>demo</td>
                  <td>Demographic information</td>
                </tr>
                <tr valign="top">
                  <td><sub><italic>X</italic></sub>sim</td>
                  <td>Embeddings of similar patient notes</td>
                </tr>
                <tr valign="top">
                  <td>
                    <italic>G</italic>
                    <italic>,</italic>
                    <italic>T</italic>
                  </td>
                  <td>Biomedical knowledge graph, triples</td>
                </tr>
                <tr valign="top">
                  <td><sub><italic>H</italic></sub>text</td>
                  <td>Unstructured text embedding</td>
                </tr>
                <tr valign="top">
                  <td><sub><italic>H</italic></sub>KG</td>
                  <td>Knowledge graph community summary embedding</td>
                </tr>
                <tr valign="top">
                  <td><sub><italic>H</italic></sub>LLM reasoning</td>
                  <td>LLM<sup>a</sup>-generated reasoning with context</td>
                </tr>
                <tr valign="top">
                  <td><sub><italic>D</italic></sub>LLM<break/>train/test</td>
                  <td>Augmented LLM training/test inputs</td>
                </tr>
                <tr valign="top">
                  <td><sup><italic>f</italic></sup>LLM</td>
                  <td>Fine-tuned large language model</td>
                </tr>
                <tr valign="top">
                  <td><sup><italic>y</italic></sup><sup>ˆ</sup>reasoning<sup><italic>,</italic></sup><sup><italic>y</italic></sup><sup>ˆ</sup><sup><italic>′</italic></sup></td>
                  <td>LLM-generated textual reasoning, output label</td>
                </tr>
                <tr valign="top">
                  <td><sup><italic>f</italic></sup>ML</td>
                  <td>Final machine learning classifier</td>
                </tr>
                <tr valign="top">
                  <td><sub><italic>H</italic></sub>concat</td>
                  <td>Concatenated features for <italic>f</italic><sub>ML</sub></td>
                </tr>
                <tr valign="top">
                  <td><sup><italic>y</italic></sup><sup>ˆ</sup>task</td>
                  <td>Final binary classification output</td>
                </tr>
                <tr valign="top">
                  <td><sup><italic>L</italic></sup>LLM</td>
                  <td>LLM fine-tuning loss</td>
                </tr>
                <tr valign="top">
                  <td><sup><italic>L</italic></sup>task</td>
                  <td>Task-specific binary cross-entropy loss</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table2fn1">
                <p><sup>a</sup>LLM: large language model.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Unstructured Data Encoder (M1)</title>
          <sec>
            <title>Overview</title>
            <p>For each hospital visit <italic>v<sub>i</sub></italic>, we collect physician-authored clinical notes and extract entities like conditions, procedures, and medications. To enrich context, we use PubMed literature parsed into knowledge triples (subject-relation-object) via an LLM-based extraction pipeline. We retain only triples that appear across patient visits. These triples form a biomedical KG, serving as an auxiliary source to support LLM reasoning and diagnosis.</p>
          </sec>
          <sec>
            <title>Generating Context</title>
            <p>The first step of the framework is generating patient context. We use EHR data, including physicians’ notes, patient conditions, prescribed medications, and procedures in natural language. Since physician notes can be lengthy and exceed the context window of a small, locally fine-tuned LLM, we summarize them using an LLM. This approach addresses <italic>Challenge 2 (Long Context)</italic>.</p>
          </sec>
          <sec>
            <title>KG Retrieval</title>
            <p>To introduce domain-level medical knowledge, we build a biomedical KG by combining the UMLS [<xref ref-type="bibr" rid="ref28">28</xref>], PubMed abstracts, and LLM-generated entity-relation triples. UMLS provides standardized biomedical concepts and relationships, and the entity-relation triples are structured facts extracted by the LLM in the form (entity<sub>1</sub><italic>,</italic> relation<italic>,</italic> entity<sub>2</sub>), capturing semantic connections. We apply the Leiden algorithm for community detection [<xref ref-type="bibr" rid="ref30">30</xref>], which partitions the KG into semantically coherent and well-connected subgraphs. After clustering, we use a separate LLM to generate a textual summary for each cluster (Figure S8 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). These summaries are produced by reasoning over the relationships among entities within each cluster, capturing the latent biomedical semantics encoded in the graph. Each summary serves as a high-level abstraction of the biomedical concepts and interactions within a subgraph. We embed these summaries using SentenceTransformer (MiniLM-L6-v2) [<xref ref-type="bibr" rid="ref31">31</xref>] and retrieve the most relevant ones for each patient by computing semantic similarity with the embedded patient context. This process directly addresses <italic>Challenge 3 (Specialized Medical Domains)</italic> by enriching patient context with structured, domain-specific knowledge, improving the model’s understanding of specialized medical terminology. <xref rid="figure2" ref-type="fig">Figure 2</xref> illustrates a partial biomedical KG constructed from PubMed data, which is used to retrieve domain-relevant knowledge associated with each patient’s clinical context.</p>
            <fig id="figure2" position="float">
              <label>Figure 2</label>
              <caption>
                <p>A partial snapshot of a knowledge graph built from PubMed data, filtered to include only patient-related concepts.</p>
              </caption>
              <graphic xlink:href="ai_v5i1e88356_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
          <sec>
            <title>Finding Similar Patients</title>
            <p>We provide additional context by retrieving semantically similar patient visits using precomputed visit-level embeddings and a similarity index using the Facebook AI Similarity Search [<xref ref-type="bibr" rid="ref32">32</xref>] library with inner-product search on L2-normalized embeddings, which effectively approximates cosine similarity. For each target patient visit, we retrieve the top 50 most similar patients while excluding self-matches and other visits from the same individual. Each retrieved patient is scored by similarity, and we filter them into positive and negative cohorts based on matching or nonmatching ground truth labels (eg, readmission vs no readmission). The final output includes the top-k positive and negative similar patients (with k=1,2). Unlike KARE [<xref ref-type="bibr" rid="ref15">15</xref>], we also provide the physician notes of the retrieved similar patients, enabling the language model to leverage more clinical context when assessing patient risk. To prevent data leakage, we maintain patient-level separation between training and testing sets. The similarity index was constructed exclusively using training-set patient embeddings. During testing, each test patient retrieved similar cases only from the training-set hub.</p>
          </sec>
          <sec>
            <title>Reasoning Module</title>
            <p>In this module, we prepare inputs to fine-tune the LLM for clinical prediction. For each patient visit, we create a prompt with the patient’s context with the top-k most similar cases retrieved earlier. These similar cases guide the model by highlighting patterns in clinically comparable scenarios. We also add biomedical knowledge summaries from clustered subgraphs of a PubMed KG, providing literature-based context. Combining patient data, historical cases, and domain knowledge, we fine-tune the LLM to produce task-specific predictions with interpretable reasoning, supporting each outcome.</p>
          </sec>
          <sec>
            <title>Fine-Tuning LLM</title>
            <p>We fine-tune a <italic>LLaMA-3 8B</italic> model using the Unsloth framework [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>], which enables memory-efficient training via 4-bit quantization and low-rank adaptation [<xref ref-type="bibr" rid="ref35">35</xref>]. Prediction tasks are framed as instruction-following using Alpaca-style prompts with task description, patient context, and optional justification. Each prompt combines clinical notes, retrieved similar cases, and biomedical knowledge summaries. The model is trained via supervised learning to generate both predictions and reasoning. We use limited training steps with gradient accumulation and sequence lengths up to 8192 tokens. Unlike KARE, which trains larger models with higher compute, our method uses smaller, quantized models to reduce computational cost while maintaining interpretability and performance. Algorithm 1 outlines the training and inference procedures of <italic>M</italic><sub>1</sub>. Additionally, an example prompt and its overall structure are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          </sec>
        </sec>
        <sec>
          <title>Structured Data Encoder (M2)</title>
          <sec>
            <title>Overview</title>
            <p>We extract structured features from the patient’s visit history during each stay, including:</p>
            <list list-type="bullet">
              <list-item>
                <p>Time-varying variables: we extract hourly vitals and diagnoses in MIMIC-III, including heart rate, systolic and diastolic blood pressure, mean blood pressure, oxygen saturation, Glasgow Coma Scale scores, glucose level, respiratory rate, temperature, weight, and pH.</p>
              </list-item>
              <list-item>
                <p>Static metadata: demographic and admission features include gender, ethnicity, admission type, location, insurance, language, and religion.</p>
              </list-item>
              <list-item>
                <p>Diagnoses, procedures, and medications in <italic>ICD-9 (International Classification of Diseases, Ninth Revision)</italic> codes: <italic>ICD-9</italic> codes, drug names are encoded via one-hot or counts. We compute binary indicators for key comorbidities (eg, sepsis, infection, organ failure, dementia, cancer, and diabetes).</p>
              </list-item>
            </list>
            <p>While both structured and unstructured models use information about conditions, medications, and procedures, they access this information from different data modalities. <italic>M</italic><sub>1</sub> processes the clinical narrative and reasoning about these elements while <italic>M</italic><sub>2</sub> processes the structured codes and standardized entries.</p>
          </sec>
          <sec>
            <title>Structured Data Preprocessing</title>
            <p>The structured data are first transformed using a discretization step to enforce uniform temporal resolution and impute missing values. This is followed by normalization using precomputed mean and SD statistics over the continuous variables.</p>
          </sec>
          <sec>
            <title>Incorporating LLM Output</title>
            <p>To augment the structured input, we include the LLM’s prediction and its tokenized reasoning. For each patient visit, <italic>M</italic><sub>1</sub> generates (1) a prediction probability and (2) a textual reasoning explanation. The reasoning text is embedded using SentenceTransformer (all-MiniLM-L6-v2) [<xref ref-type="bibr" rid="ref36">36</xref>], resulting in a 384-dimensional vector. This embedding, together with the scalar prediction probability, is concatenated with structured features and provided as input to the final classifier in <italic>M</italic><sub>2</sub>.</p>
          </sec>
          <sec>
            <title>Final Integration</title>
            <p>The LLM-derived vector is merged with structured input features to create a unified representation, directly addressing <italic>Challenge 1 (Multimodal Information)</italic>. To reduce dimensionality and suppress noise from high-dimensional embeddings, we apply principal component analysis to the combined feature vector.</p>
          </sec>
        </sec>
      </sec>
      <sec>
        <title>Training</title>
        <sec>
          <title>Overview</title>
          <p>We follow a 2-stage training procedure. First, we fine-tune the unstructured text encoder <italic>M</italic><sub>1</sub> using instruction-style prompts built from clinical notes, retrieved similar cases, and external biomedical knowledge. After fine-tuning, we perform <italic>final integration</italic> and use the outputs of <italic>M</italic><sub>1</sub> as input features to train <italic>M</italic><sub>2</sub> for final prediction.</p>
          <p>In our experiments, we benchmark several ML models for <italic>M</italic><sub>2</sub>, such as KAMELEON-X, where X represents logistic regression, balanced random forests, long short-term memory, light gradient boosting machine (LightGBM), multilayer perceptron (MLP), or extreme gradient boosting (XGBoost), selected for its effectiveness in capturing clinical patterns. These models were selected based on class imbalance severity: the extreme imbalance in 30-day readmission favors BalancedRandomForest and regularized logistic regression, whereas the moderate imbalance in in-hospital mortality is better suited to gradient boosting methods that capture complex feature interactions; multiple architectures were evaluated before selecting the final task-specific model. For MLP, we use weighted binary cross-entropy loss</p>
          <disp-formula>
            <graphic xlink:href="ai_v5i1e88356_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where <italic>w</italic><sub>1</sub> and <italic>w</italic><sub>0</sub> are the positive and negative class weights, respectively, used to address class imbalance by giving more emphasis to the minority class. We further use the synthetic minority over-sampling technique [<xref ref-type="bibr" rid="ref37">37</xref>] to mitigate class imbalance, addressing <italic>Challenge 4 (Highly imbalanced data)</italic>.</p>
          <p>In addition, for gradient boosting models, we use the scale_pos_weight parameter to up-weight minority samples during training. These complementary strategies—class-weighted loss, balanced bootstrapping, oversampling, and cost-sensitive boosting—ensure that minority-class examples are not overwhelmed by the dominant negative class and are consistent with our task-specific model selection based on imbalance severity.</p>
          <p>For the KAMELEON-balanced random forests (BalancedRF) model, which is designed to handle class imbalance. Here, each decision tree is trained on a bootstrapped sample drawn by undersampling the majority class and combining it with all minority class examples, ensuring balanced class proportions at the tree level. Each split is chosen to minimize the <italic>Gini impurity</italic>:</p>
          <disp-formula>
            <graphic xlink:href="ai_v5i1e88356_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
          <p>where <italic>p<sub>c</sub></italic> denotes the proportion of class <italic>c</italic> in a given node. The balanced sampling overcomes the issue with the dominance of the majority class and improves sensitivity to rare outcomes. Final predictions are obtained by aggregating probabilities across trees.</p>
          <p>The complete training and inference pipeline for <italic>M</italic><sub>2</sub> is outlined in algorithm 2.</p>
        </sec>
        <sec>
          <title>Algorithm 1. Unstructured Data Encoder (M1)</title>
          <p>Require: Training data: <inline-graphic xlink:href="ai_v5i1e88356_fig11.png" xlink:type="simple" mimetype="image"/>
</p>
          <p>Require: Test data: <inline-graphic xlink:href="ai_v5i1e88356_fig12.png" xlink:type="simple" mimetype="image"/></p>
          <p>Ensure: Intermediate prediction <inline-graphic xlink:href="ai_v5i1e88356_fig13.png" xlink:type="simple" mimetype="image"/>, Reasoning <inline-graphic xlink:href="ai_v5i1e88356_fig14.png" xlink:type="simple" mimetype="image"/></p>
          <sec>
            <title>Phase 1: Preprocessing and LLM Input Preparation</title>
            <disp-formula>
              <graphic xlink:href="ai_v5i1e88356_fig15.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
            <disp-formula>
              <graphic xlink:href="ai_v5i1e88356_fig16.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </sec>
          <sec>
            <title>Phase 2: Fine-Tuning LLM Model (fLLM)</title>
            <disp-formula>
              <graphic xlink:href="ai_v5i1e88356_fig17.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </sec>
          <sec>
            <title>Phase 3: Inference With Fine-Tuned LLM</title>
            <disp-formula>
              <graphic xlink:href="ai_v5i1e88356_fig18.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </sec>
        </sec>
        <sec>
          <title>Algorithm 2. Structured Data Encoder (M2)</title>
          <p>Require: Structured training data: <inline-graphic xlink:href="ai_v5i1e88356_fig19.png" xlink:type="simple" mimetype="image"/></p>
          <p>Require: Structured test data: <inline-graphic xlink:href="ai_v5i1e88356_fig20.png" xlink:type="simple" mimetype="image"/></p>
          <p>Require: From algorithm 1: <inline-graphic xlink:href="ai_v5i1e88356_fig21.png" xlink:type="simple" mimetype="image"/></p>
          <p>Ensure: Final prediction <inline-graphic xlink:href="ai_v5i1e88356_fig22.png" xlink:type="simple" mimetype="image"/></p>
          <sec>
            <title>Phase 1: Training Final ML Model (fML)</title>
            <disp-formula>
              <graphic xlink:href="ai_v5i1e88356_fig23.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
            <disp-formula>
              <graphic xlink:href="ai_v5i1e88356_fig24.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </sec>
          <sec>
            <title>Phase 2: Inference on Test Set</title>
            <disp-formula>
              <graphic xlink:href="ai_v5i1e88356_fig25.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </sec>
        </sec>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study did not involve direct human participants. We used the MIMIC-III database, a publicly available, deidentified critical care dataset.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overview</title>
        <p>We evaluate our model on the 2 clinical prediction tasks: in-hospital mortality and 30-day readmission. Our experiments compare performance against strong baselines, including general and medical LLMs, and traditional structured ML approaches. Given the severe class imbalance in 30-day readmission, we use balanced random forest (with internal bootstrap resampling) and regularized logistic regression (C = 1.0/0.01) to mitigate minority-class overfitting. In contrast, in-hospital mortality (2423/17,903, 13% positive rate) presents a moderate imbalance, for which gradient boosting methods (XGBoost and LightGBM) are better suited due to their ability to capture complex feature interactions. Balanced random forest is retained across tasks for consistency.</p>
      </sec>
      <sec>
        <title>Experimental Setup</title>
        <sec>
          <title>Datasets</title>
          <p>We use the MIMIC-III dataset [<xref ref-type="bibr" rid="ref38">38</xref>], which includes structured and unstructured data for over 40,000 intensive care unit patients. It includes structured data (demographics, vitals, laboratories, admissions, and <italic>ICD-9 Clinical Modification</italic> codes) and unstructured clinical text (physician notes, discharge summaries, and radiology reports). For this study, we focus on physician-authored notes containing clinical reasoning, assessments, and treatment plans. Here, we exclude discharge summaries and notes written after outcomes to prevent label leakage; otherwise, consistent with prior methods on MIMIC-III, we use the remaining structured and unstructured data recorded during the admission. Only 0<italic>.</italic>85% (1202/141,624) of notes mention hospice, indicating rare explicit terminal indicators. However, a limitation of MIMIC-III for the readmission task is the inability to distinguish planned or elective readmissions and interfacility transfers, which may inflate the count of “avoidable” readmissions.</p>
        </sec>
        <sec>
          <title>Biomedical Knowledge</title>
          <p>We use abstracts from the annual PubMed Baseline dataset, comprising over 36 million biomedical citation records, to build a medical KG that enriches LLM input and reduces hallucinations. We also incorporate UMLS [<xref ref-type="bibr" rid="ref28">28</xref>] to construct concept-centric subgraphs from EHR data.</p>
        </sec>
        <sec>
          <title>Dataset Statistics</title>
          <p>We include a summary table in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, showing dataset statistics, indicating moderate class imbalance for in-hospital mortality (~13% positive) and severe imbalance for 30-day readmission. To prevent data leakage, train-test splits (80:20) are patient disjoint, meaning that multiple visits from the same patient do not appear in both sets.</p>
        </sec>
        <sec>
          <title>Baselines</title>
          <p>We compare against Claude 3.7 Sonnet [<xref ref-type="bibr" rid="ref39">39</xref>], MedGemma [<xref ref-type="bibr" rid="ref40">40</xref>], LLaMA3-Med [<xref ref-type="bibr" rid="ref41">41</xref>], and KARE [<xref ref-type="bibr" rid="ref15">15</xref>], as well as structured-data models including logistic regression, tree-based models, and MLPs used in prior work on MIMIC-III [<xref ref-type="bibr" rid="ref42">42</xref>-<xref ref-type="bibr" rid="ref44">44</xref>]. All LLMs are evaluated in a zero-shot setting with the same patient-context prompt. KARE uses a similar patient retrieval but lacks clinical notes. Our model incorporates retrieved notes for better context. Implementation details are in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        </sec>
        <sec>
          <title>Metrics</title>
          <p>We evaluate model performance using a comprehensive set of measures. Area under the receiver operating characteristic curve (AUROC) and area under the precision-recall curve (AUPRC) capture discrimination ability across thresholds, while overall accuracy reflects the proportion of correct predictions. For class-specific evaluation, we report precision (positive predictive value), recall (sensitivity), negative predictive value (NPV), and specificity. These capture both the model’s ability to correctly identify positive cases (sensitivity and positive predictive value) and its reliability on negatives (specificity and NPV). Finally, macro <italic>F</italic><sub>1</sub>-score balances precision and recall across classes, ensuring that performance on the minority positive class is not overshadowed by the majority class. All metrics are computed on a held-out test set using standardized preprocessing to ensure comparability across models.</p>
        </sec>
      </sec>
      <sec>
        <title>Performance of KAMELEON</title>
        <sec>
          <title>Overview</title>
          <p>We explore different kinds of standard ML methods in <italic>M</italic><sub>2</sub> for making the final prediction, using all the integrated inputs. We refer to the corresponding algorithm as KAMELEON-X, where X represents Balanced-RF (balanced random forest), logistic regression, random forests, long short-term memory, LightGBM, MLP, or XGBoost.</p>
        </sec>
        <sec>
          <title>30-Day Readmission Prediction</title>
          <sec>
            <title>Overview</title>
            <p>Readmission within 30 days is a highly imbalanced task, with only about 4% (403/10,031) positive cases in the dataset. This severe imbalance is reflected in the results, where most models achieve high accuracy and precision on the negative class but struggle with recall for the positive (readmitted) class.</p>
            <p>As shown in <xref ref-type="table" rid="table3">Table 3</xref>, our framework with a balanced random forest (KAMELEON-BalancedRF) classifier achieves the highest AUROC (0<italic>.</italic>845) and notably improves recall on positive cases to 0<italic>.</italic>79, a crucial metric since identifying patients at risk of readmission is clinically imperative. The KAMELEON-MLP model, while achieving the highest overall accuracy (0<italic>.</italic>91) and macro <italic>F</italic><sub>1</sub> (0<italic>.</italic>58), still attains a sensitivity of 0<italic>.</italic>28 on positive cases, illustrating the persistent challenge in detecting rare events. Unstructured LLM-based baselines such as Claude-3.7-Sonnet, MedGemma, LLaMA3-Med, and KARE show substantially lower sensitivity for positives (below 0.3), suggesting that these models struggle to identify the minority class without further fine-tuning or domain-specific adaptation.</p>
            <p>In <xref ref-type="table" rid="table3">Table 3</xref>, KAMELEON-BalancedRF achieves a precision of 0.13, meaning that about 1 in 8 patients flagged as high-risk were actually readmitted. Recall (sensitivity) is 0.79, indicating that the model correctly identifies nearly 8 out of 10 true readmissions—a clinically critical result. The <italic>F</italic><sub>1</sub>-score of 0.55 reflects the balance between precision and recall. The NPV is 0.99, showing that almost all patients predicted as low risk were indeed not readmitted. Specificity is 0.80, meaning the model correctly classifies 8 in 10 patients who were not readmitted as low risk.</p>
            <p>To better understand this model’s behavior, we perform Shapley additive explanations (SHAP) analysis to identify feature importance, and <xref rid="figure3" ref-type="fig">Figure 3</xref> indicates that the model relies primarily on prediction embeddings (59<italic>.</italic>3%) and laboratory/vital features (40<italic>.</italic>4%) for predicting 30-day readmission, highlighting the importance of multimodal inputs.</p>
            <p>While prior studies like Morgan et al [<xref ref-type="bibr" rid="ref45">45</xref>] reported AUROCs up to 0.81 for readmission, and general models typically ranged from 0.61 to 0.73 [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>], our multimodal approach effectively captures complex clinical nuances.</p>
            <table-wrap position="float" id="table3">
              <label>Table 3</label>
              <caption>
                <p>Comparison of models for 30-day readmission and in-hospital mortality prediction. Scores from Knowledge Aware Reasoning-Enhanced HealthCare Prediction (KARE) [<xref ref-type="bibr" rid="ref15">15</xref>] are reevaluated using our pipeline due to large language model differences and incorrect data preprocessing in their code. Reported metrics include accuracy, negative predictive value (NPV), precision (positive predictive value), sensitivity (recall), specificity, macro F1, area under the receiver operating characteristic curve (AUROC), and area under the precision-recall curve (AUPRC).</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="30"/>
                <col width="120"/>
                <col width="0"/>
                <col width="90"/>
                <col width="0"/>
                <col width="100"/>
                <col width="0"/>
                <col width="90"/>
                <col width="0"/>
                <col width="60"/>
                <col width="0"/>
                <col width="90"/>
                <col width="0"/>
                <col width="90"/>
                <col width="0"/>
                <col width="90"/>
                <col width="0"/>
                <col width="80"/>
                <col width="0"/>
                <col width="80"/>
                <col width="0"/>
                <col width="80"/>
                <thead>
                  <tr valign="top">
                    <td colspan="3">Model</td>
                    <td colspan="2">
                      <sub>
                        <italic>D</italic>
                      </sub>
                      <italic>struct</italic>
                      <sup>a</sup>
                    </td>
                    <td colspan="2">
                      <sub>
                        <italic>D</italic>
                      </sub>
                      <italic>unstruct</italic>
                      <sup>b</sup>
                    </td>
                    <td colspan="2">Accuracy</td>
                    <td colspan="2">NPV</td>
                    <td colspan="2">Precision</td>
                    <td colspan="2">Specificity</td>
                    <td colspan="2">Sensitivity</td>
                    <td colspan="2">Macro <italic>F</italic><sub>1</sub></td>
                    <td colspan="2">AUROC</td>
                    <td>AUPRC</td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td colspan="22">
                      <bold>Task:</bold>
                      <bold>30-day</bold>
                      <bold>readmission</bold>
                      <bold>prediction</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>Logistic regression [<xref ref-type="bibr" rid="ref42">42</xref>]</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">0.831</td>
                    <td colspan="2">0.869</td>
                    <td colspan="2">0.036</td>
                    <td colspan="2">0.951</td>
                    <td colspan="2">0.013</td>
                    <td colspan="2">0.464</td>
                    <td colspan="2">0.463</td>
                    <td colspan="2">0.090</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>MLP<sup>c</sup> [<xref ref-type="bibr" rid="ref29">29</xref>]</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">0.828</td>
                    <td colspan="2">0.876</td>
                    <td colspan="2">0.182</td>
                    <td colspan="2">0.934</td>
                    <td colspan="2">0.100</td>
                    <td colspan="2">0.516</td>
                    <td colspan="2">0.559</td>
                    <td colspan="2">0.165</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>BalancedRF<sup>d</sup></td>
                    <td colspan="2">✓</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">0.760</td>
                    <td colspan="2">0.970</td>
                    <td colspan="2">0.070</td>
                    <td colspan="2">0.780</td>
                    <td colspan="2">0.430</td>
                    <td colspan="2">0.490</td>
                    <td colspan="2">0.673</td>
                    <td colspan="2">0.066</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>LSTM<sup>e</sup> [<xref ref-type="bibr" rid="ref42">42</xref>]</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">0.820</td>
                    <td colspan="2">0.876</td>
                    <td colspan="2">0.163</td>
                    <td colspan="2">0.925</td>
                    <td colspan="2">0.100</td>
                    <td colspan="2">0.512</td>
                    <td colspan="2">0.569</td>
                    <td colspan="2">0.152</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>Claude-3.7-Sonnet<sup>f</sup> [<xref ref-type="bibr" rid="ref39">39</xref>]</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.240</td>
                    <td colspan="2">0.790</td>
                    <td colspan="2">0.199</td>
                    <td colspan="2">0.068</td>
                    <td colspan="2">0.927</td>
                    <td colspan="2">0.227</td>
                    <td colspan="2">0.498</td>
                    <td colspan="2">0.199</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>MedGemma-4b-it<sup>f</sup> [<xref ref-type="bibr" rid="ref40">40</xref>]</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.350</td>
                    <td colspan="2">0.770</td>
                    <td colspan="2">0.190</td>
                    <td colspan="2">0.270</td>
                    <td colspan="2">0.690</td>
                    <td colspan="2">0.350</td>
                    <td colspan="2">0.480</td>
                    <td colspan="2">0.190</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>LLaMA3-Med42-8B<sup>f</sup> [<xref ref-type="bibr" rid="ref41">41</xref>]</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.390</td>
                    <td colspan="2">0.800</td>
                    <td colspan="2">0.210</td>
                    <td colspan="2">0.360</td>
                    <td colspan="2">0.670</td>
                    <td colspan="2">0.410</td>
                    <td colspan="2">0.510</td>
                    <td colspan="2">0.210<sup>g</sup></td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <italic>M</italic>
                      <sub>1</sub>
                      <sup>h</sup>
                    </td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.660</td>
                    <td colspan="2">0.870</td>
                    <td colspan="2">0.130</td>
                    <td colspan="2">0.720</td>
                    <td colspan="2">0.280</td>
                    <td colspan="2">0.480</td>
                    <td colspan="2">0.506</td>
                    <td colspan="2">0.195</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>KARE [<xref ref-type="bibr" rid="ref15">15</xref>]</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.271</td>
                    <td colspan="2">0.785</td>
                    <td colspan="2">0.191</td>
                    <td colspan="2">0.131</td>
                    <td colspan="2">0.851</td>
                    <td colspan="2">0.269</td>
                    <td colspan="2">0.491</td>
                    <td colspan="2">0.191</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>KAMELEON<sup>i</sup>-LogReg<sup>j</sup>, (C=1.0)</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.833</td>
                    <td colspan="2">0.869</td>
                    <td colspan="2">0.037</td>
                    <td colspan="2">0.953</td>
                    <td colspan="2">0.013</td>
                    <td colspan="2">0.519</td>
                    <td colspan="2">0.130</td>
                    <td colspan="2">0.148</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>KAMELEON-LogReg, (C=0.01)</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.871</td>
                    <td colspan="2">0.874</td>
                    <td colspan="2">0.333<sup>g</sup></td>
                    <td colspan="2">0.996<sup>g</sup></td>
                    <td colspan="2">0.013</td>
                    <td colspan="2">0.478</td>
                    <td colspan="2">0.551</td>
                    <td colspan="2">0.152</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>KAMELEON-LSTM</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.840</td>
                    <td colspan="2">0.880</td>
                    <td colspan="2">0.190</td>
                    <td colspan="2">0.950</td>
                    <td colspan="2">0.090</td>
                    <td colspan="2">0.510</td>
                    <td colspan="2">0.505</td>
                    <td colspan="2">0.135</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>KAMELEON-BalancedRF</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.800</td>
                    <td colspan="2">0.990<sup>g</sup></td>
                    <td colspan="2">0.130</td>
                    <td colspan="2">0.800</td>
                    <td colspan="2">0.790<sup>g</sup></td>
                    <td colspan="2">0.550</td>
                    <td colspan="2">0.845<sup>g</sup></td>
                    <td colspan="2">0.150</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>KAMELEON-MLP</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.910<sup>g</sup></td>
                    <td colspan="2">0.970</td>
                    <td colspan="2">0.160</td>
                    <td colspan="2">0.940</td>
                    <td colspan="2">0.280</td>
                    <td colspan="2">0.580<sup>g</sup></td>
                    <td colspan="2">0.820</td>
                    <td colspan="2">0.138</td>
                  </tr>
                  <tr valign="top">
                    <td colspan="22">
                      <bold>Task:</bold>
                      <bold>in-hospital</bold>
                      <bold>mortality</bold>
                      <bold>prediction</bold>
                    </td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>Logistic regression [<xref ref-type="bibr" rid="ref42">42</xref>]</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">0.850</td>
                    <td colspan="2">0.912</td>
                    <td colspan="2">0.340</td>
                    <td colspan="2">0.916</td>
                    <td colspan="2">0.331</td>
                    <td colspan="2">0.625</td>
                    <td colspan="2">0.624</td>
                    <td colspan="2">0.190</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>LSTM [<xref ref-type="bibr" rid="ref42">42</xref>]</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">0.690</td>
                    <td colspan="2">0.800</td>
                    <td colspan="2">0.260</td>
                    <td colspan="2">0.800</td>
                    <td colspan="2">0.250</td>
                    <td colspan="2">0.530</td>
                    <td colspan="2">0.560</td>
                    <td colspan="2">0.240</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>BalancedRF</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">0.810</td>
                    <td colspan="2">0.950</td>
                    <td colspan="2">0.340</td>
                    <td colspan="2">0.820</td>
                    <td colspan="2">0.700</td>
                    <td colspan="2">0.670</td>
                    <td colspan="2">0.860</td>
                    <td colspan="2">0.475</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>LightGBM<sup>k</sup> [<xref ref-type="bibr" rid="ref29">29</xref>]</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">0.890</td>
                    <td colspan="2">0.930</td>
                    <td colspan="2">0.510</td>
                    <td colspan="2">0.940</td>
                    <td colspan="2">0.480</td>
                    <td colspan="2">0.720</td>
                    <td colspan="2">0.866</td>
                    <td colspan="2">0.534</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>MLP [<xref ref-type="bibr" rid="ref29">29</xref>]</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">0.870</td>
                    <td colspan="2">0.920</td>
                    <td colspan="2">0.430</td>
                    <td colspan="2">0.920</td>
                    <td colspan="2">0.430</td>
                    <td colspan="2">0.680</td>
                    <td colspan="2">0.829</td>
                    <td colspan="2">0.426</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>XGBoost<sup>l</sup></td>
                    <td colspan="2">✓</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">0.890</td>
                    <td colspan="2">0.920</td>
                    <td colspan="2">0.520</td>
                    <td colspan="2">0.950</td>
                    <td colspan="2">0.380</td>
                    <td colspan="2">0.695</td>
                    <td colspan="2">0.835</td>
                    <td colspan="2">0.487</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>Claude-3.7-Sonnet<sup>f</sup> [<xref ref-type="bibr" rid="ref39">39</xref>]</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.800</td>
                    <td colspan="2">0.890</td>
                    <td colspan="2">0.120</td>
                    <td colspan="2">0.880</td>
                    <td colspan="2">0.130</td>
                    <td colspan="2">0.510</td>
                    <td colspan="2">0.510</td>
                    <td colspan="2">0.110</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>MedGemma-4b-it<sup>f</sup> [<xref ref-type="bibr" rid="ref40">40</xref>]</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.120</td>
                    <td colspan="2">0.950</td>
                    <td colspan="2">0.100</td>
                    <td colspan="2">0.020</td>
                    <td colspan="2">0.990</td>
                    <td colspan="2">0.120</td>
                    <td colspan="2">0.510</td>
                    <td colspan="2">0.110</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>LLaMA3-Med42-8B<sup>f</sup> [<xref ref-type="bibr" rid="ref41">41</xref>]</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.160</td>
                    <td colspan="2">0.950</td>
                    <td colspan="2">0.120</td>
                    <td colspan="2">0.100</td>
                    <td colspan="2">0.970</td>
                    <td colspan="2">0.190</td>
                    <td colspan="2">0.530</td>
                    <td colspan="2">0.120</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <italic>M</italic>
                      <sub>1</sub>
                    </td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.614</td>
                    <td colspan="2">0.890</td>
                    <td colspan="2">0.134</td>
                    <td colspan="2">0.641</td>
                    <td colspan="2">0.413</td>
                    <td colspan="2">0.474</td>
                    <td colspan="2">0.527</td>
                    <td colspan="2">0.125</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>KARE [<xref ref-type="bibr" rid="ref15">15</xref>]</td>
                    <td colspan="2">
                      <break/>
                    </td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.639</td>
                    <td colspan="2">0.885</td>
                    <td colspan="2">0.129</td>
                    <td colspan="2">0.678</td>
                    <td colspan="2">0.353</td>
                    <td colspan="2">0.478</td>
                    <td colspan="2">0.515</td>
                    <td colspan="2">0.122</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>KAMELEON-BalancedRF</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.880</td>
                    <td colspan="2">0.930</td>
                    <td colspan="2">0.490</td>
                    <td colspan="2">0.934</td>
                    <td colspan="2">0.492</td>
                    <td colspan="2">0.710</td>
                    <td colspan="2">0.876</td>
                    <td colspan="2">0.543</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>KAMELEON-LSTM</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.730</td>
                    <td colspan="2">0.820</td>
                    <td colspan="2">0.430</td>
                    <td colspan="2">0.840</td>
                    <td colspan="2">0.390</td>
                    <td colspan="2">0.620</td>
                    <td colspan="2">0.740</td>
                    <td colspan="2">0.350</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>KAMELEON-LightGBM</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.880</td>
                    <td colspan="2">0.940<sup>g</sup></td>
                    <td colspan="2">0.470</td>
                    <td colspan="2">0.910</td>
                    <td colspan="2">0.590<sup>g</sup></td>
                    <td colspan="2">0.730</td>
                    <td colspan="2">0.890</td>
                    <td colspan="2">0.550</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>KAMELEON-MLP</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.900</td>
                    <td colspan="2">0.940<sup>g</sup></td>
                    <td colspan="2">0.550</td>
                    <td colspan="2">0.940</td>
                    <td colspan="2">0.550</td>
                    <td colspan="2">0.750<sup>g</sup></td>
                    <td colspan="2">0.890</td>
                    <td colspan="2">0.600</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>KAMELEON-XGBoost</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">✓</td>
                    <td colspan="2">0.920<sup>g</sup></td>
                    <td colspan="2">0.920</td>
                    <td colspan="2">0.790<sup>g</sup></td>
                    <td colspan="2">0.980<sup>g</sup></td>
                    <td colspan="2">0.369</td>
                    <td colspan="2">0.660</td>
                    <td colspan="2">0.920<sup>g</sup></td>
                    <td colspan="2">0.650<sup>g</sup></td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table3fn1">
                  <p><sup>a</sup><italic>D<sub>struct</sub></italic>: structured data.</p>
                </fn>
                <fn id="table3fn2">
                  <p><sup>b</sup><italic>D<sub>unstruct</sub></italic>: unstructured data.</p>
                </fn>
                <fn id="table3fn3">
                  <p><sup>c</sup>MLP: multilayer perceptron.</p>
                </fn>
                <fn id="table3fn4">
                  <p><sup>d</sup>BalancedRF: balanced random forests.</p>
                </fn>
                <fn id="table3fn5">
                  <p><sup>e</sup>LSTM: long short-term memory.</p>
                </fn>
                <fn id="table3fn6">
                  <p><sup>f</sup>Models are evaluated in a zero-shot setting without fine-tuning.</p>
                </fn>
                <fn id="table3fn7">
                  <p><sup>g</sup>Best-performing value.</p>
                </fn>
                <fn id="table3fn8">
                  <p><sup>h</sup><italic>M</italic><sub>1</sub>: unstructured data encoder.</p>
                </fn>
                <fn id="table3fn9">
                  <p><sup>i</sup>KAMELEON: Knowledge-Augmented Multimodal EHR Learning for Outcome Prediction.</p>
                </fn>
                <fn id="table3fn10">
                  <p><sup>j</sup>LogReg: logistic regression.</p>
                </fn>
                <fn id="table3fn11">
                  <p><sup>k</sup>LightGBM: light gradient boosting machine.</p>
                </fn>
                <fn id="table3fn12">
                  <p><sup>l</sup>XGBoost: extreme gradient boosting.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
            <fig id="figure3" position="float">
              <label>Figure 3</label>
              <caption>
                <p>(A) KAMELEON (Knowledge-Augmented Multimodal EHR Learning for Outcome Prediction) achieves the highest area under the curve (AUC) for 30-day readmission when combining all features of unstructured model (M1) and structured model (M2), outperforming ablation variants. (B) Shapley additive explanations (SHAP) analysis shows prediction embeddings from M1 are key contributors.</p>
              </caption>
              <graphic xlink:href="ai_v5i1e88356_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
          <sec>
            <title>Relative Importance of Different Classes of Inputs</title>
            <p>We conduct an ablation study for the readmission task, where we retrain KAMELEON after dropping different components—the retraining and prediction models from <italic>M</italic><sub>1</sub> and the demographics used in <italic>M</italic><sub>2</sub> (<xref rid="figure3" ref-type="fig">Figure 3</xref> and <xref ref-type="table" rid="table4">Table 4</xref>). We find that the reasoning component output by <italic>M</italic><sub>1</sub> is very significant and affects multiple metrics beyond AUROC. In the full model, KAMELEON achieves balanced performance (accuracy=0.80; macro <italic>F</italic><sub>1</sub>=0.55; AUROC=0.844; AUPRC=0.147) with both high specificity (0.80) and sensitivity (0.77). When we drop the reasoning component, sensitivity falls by over 80% (falling to 0.06), and AUPRC is nearly halved, revealing strong bias toward the majority class; however, there is a gain in accuracy (rising to 0.92). Removing the reasoning component from KAMELEON drops performance from 0.844 to 0.7, a 17% decline in AUROC, highlighting the critical role of the fine-tuned LLM’s reasoning in risk prediction. Removing reasoning and prediction (output from <italic>M</italic><sub>1</sub>) causes intermediate degradation, with AUROC falling by ~18%, consistent with the loss of calibrated probability signals and semantic rationale. Eliminating reasoning together with demographics and prediction yields the sharpest overall decline, with macro <italic>F</italic><sub>1</sub> dropping by ~13% and AUROC by more than ~22% (to 0.663), confirming the complementary value of these components. These analyses collectively demonstrate that reasoning substantially improves minority-class detection: it boosts sensitivity and AUPRC by more than threefold compared to variants without it, while also preventing misleading accuracy gains driven solely by the dominant negative class. <xref rid="figure3" ref-type="fig">Figure 3</xref> visualizes these contributions, showing that reasoning, <italic>M</italic><sub>1</sub>’s prediction embedding, and demographics each add critical and nonredundant signals for accurate readmission risk estimation.</p>
            <table-wrap position="float" id="table4">
              <label>Table 4</label>
              <caption>
                <p>Ablation study on readmission.</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="320"/>
                <col width="90"/>
                <col width="70"/>
                <col width="100"/>
                <col width="100"/>
                <col width="90"/>
                <col width="80"/>
                <col width="80"/>
                <col width="70"/>
                <thead>
                  <tr valign="top">
                    <td>Model</td>
                    <td>Accuracy</td>
                    <td>NPV<sup>a</sup></td>
                    <td>Precision (PPV<sup>b</sup>)</td>
                    <td>Specificity</td>
                    <td>Sensitivity</td>
                    <td>Macro <italic>F</italic><sub>1</sub></td>
                    <td>AUROC<sup>c</sup></td>
                    <td>AUPRC<sup>d</sup></td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td>KAMELEON<sup>e</sup></td>
                    <td>0.80</td>
                    <td>0.99</td>
                    <td>0.13</td>
                    <td>0.80</td>
                    <td>0.77</td>
                    <td>0.55</td>
                    <td>0.844</td>
                    <td>0.147</td>
                  </tr>
                  <tr valign="top">
                    <td>Without reasoning<sub><italic>M1</italic></sub></td>
                    <td>0.94</td>
                    <td>0.96</td>
                    <td>0.07</td>
                    <td>0.97</td>
                    <td>0.06</td>
                    <td>0.52</td>
                    <td>0.699</td>
                    <td>0.078</td>
                  </tr>
                  <tr valign="top">
                    <td>Without demographic, reasoning<sub><italic>M1</italic></sub>, prediction<sub><italic>M1</italic></sub></td>
                    <td>0.73</td>
                    <td>0.97</td>
                    <td>0.06</td>
                    <td>0.74</td>
                    <td>0.45</td>
                    <td>0.48</td>
                    <td>0.663</td>
                    <td>0.061</td>
                  </tr>
                  <tr valign="top">
                    <td>Without reasoning<sub><italic>M1</italic></sub>, prediction<sub><italic>M1</italic></sub></td>
                    <td>0.74</td>
                    <td>0.97</td>
                    <td>0.09</td>
                    <td>0.75</td>
                    <td>0.55</td>
                    <td>0.50</td>
                    <td>0.691</td>
                    <td>0.062</td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table4fn1">
                  <p><sup>a</sup>NPV: negative predictive value.</p>
                </fn>
                <fn id="table4fn2">
                  <p><sup>b</sup>PPV: positive predictive value.</p>
                </fn>
                <fn id="table4fn3">
                  <p><sup>c</sup>AUROC: area under the receiver operating characteristic curve.</p>
                </fn>
                <fn id="table4fn4">
                  <p><sup>d</sup>AUPRC: area under the precision-recall curve.</p>
                </fn>
                <fn id="table4fn5">
                  <p><sup>e</sup>KAMELEON: Knowledge-Augmented Multimodal EHR Learning for Outcome Prediction.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
          </sec>
          <sec>
            <title>Sample Inference Output From M1</title>
            <p><xref ref-type="boxed-text" rid="box1">Textbox 1</xref> shows a sample output generated by the fine-tuned LLM for the 30-day hospital readmission prediction task. Instead of returning a raw binary value (0 or 1), the model is prompted to generate both a prediction and its reasoning based on the patient’s diagnoses, procedures, medications, and comparisons with similar cases. This structured explanation allows the model to ground its prediction in a clinical context, improving reliability and interpretability over naive classification. By incorporating rationale into the output, the LLM demonstrates better alignment with real-world clinical decision-making.</p>
            <boxed-text id="box1" position="float">
              <title>Inference results from a fine-tuned LLM for 30-day readmission prediction.</title>
              <p>
                <bold>Patient ID: 25070 0</bold>
              </p>
              <p>To predict whether Patient ID 25070 0 will be readmitted within 30 days, we analyze the provided EHR context, focusing on conditions, procedures, and medications.</p>
              <list list-type="order">
                <list-item>
                  <p>Conditions: The patient has multiple serious conditions, including non-Hodgkin’s lymphoma and complications from a device, which are significant risk factors for readmission. However, they also have other conditions that may not be as acute (eg, asthma and thyroid disorders).</p>
                </list-item>
                <list-item>
                  <p>Procedures: The patient has undergone several diagnostic procedures, including mastoidectomy, which could lead to complications but are not major surgeries. The patient did not have any major surgical interventions that typically increase readmission risk.</p>
                </list-item>
                <list-item>
                  <p>Medications: The patient is on a complex regimen including corticosteroids and other systemic medications. The presence of corticosteroids indicates a potential for complications, particularly in the management of their non-Hodgkin’s lymphoma.</p>
                </list-item>
                <list-item>
                  <p>Similar Patients: The similar patients show that those with chronic obstructive pulmonary disease and heart failure have higher readmission rates. However, Patient ID 25070 0 does not share the same chronic conditions with these patients.</p>
                </list-item>
                <list-item>
                  <p>Overall Assessment: Although the patient has several chronic and acute conditions, they do not have the same severity or complexity as the patients who have been readmitted. The absence of major recent surgeries and the complexity of their medication regimen suggest a lower risk of readmission.</p>
                </list-item>
              </list>
              <p>
                <bold># Prediction #</bold>
              </p>
              <p>0</p>
              <p>
                <bold>Patient ID: 93900 1</bold>
              </p>
              <p>To predict whether Patient ID: 93900 1 will be readmitted to the hospital within 30 days, we analyze the patient’s EHR context, focusing on conditions, procedures, and medications.</p>
              <list list-type="order">
                <list-item>
                  <p>Conditions: The patient has multiple chronic conditions, including congestive heart failure, diabetes mellitus with complications, and various gastrointestinal disorders. These conditions are known to increase the risk of exacerbations and complications, which are significant predictors of readmission.</p>
                </list-item>
                <list-item>
                  <p>Procedures: The patient has undergone significant procedures, such as debridement and upper gastrointestinal endoscopy. These procedures can lead to complications, necessitating close monitoring and follow-up care, which may contribute to readmission.</p>
                </list-item>
                <list-item>
                  <p>Medications: The patient is on a complex medication regimen, including beta-blockers and insulins. The complexity of this regimen can lead to adherence issues and potential side effects, which might complicate the patient’s health status and increase the likelihood of readmission.</p>
                </list-item>
                <list-item>
                  <p>Similar Patients: The similar patients, such as Patient ID: 16994 6, experienced complications from chronic diseases and were readmitted. This indicates a trend in patients with similar conditions and treatment regimens.</p>
                </list-item>
                <list-item>
                  <p>Overall Risk Assessment: The combination of chronic conditions, significant procedures, and complex medication regimens suggests a high risk of readmission due to potential exacerbations and complications.</p>
                </list-item>
              </list>
              <p>Given this comprehensive analysis, it is reasonable to conclude that Patient ID: 93900 1 will likely be readmitted to the hospital within 30 days based on the factors outlined above.</p>
            </boxed-text>
          </sec>
          <sec>
            <title>Analysis of Distribution (Readmission)</title>
            <p><xref rid="figure4" ref-type="fig">Figure 4</xref>A illustrates the distribution of predicted probabilities for the readmission task. In the full model, the majority of negatives cluster near zero, while positives are shifted upward and concentrated above a low threshold of ~0.16. This optimal threshold is chosen using the Youden J statistic, which maximizes sensitivity and specificity. This separation, although not perfectly distinct, reflects the extreme class imbalance in the data: a very low cutoff is required to recover a reasonable fraction of positive cases. The overlap between classes explains the modest AUPRC values, since many positives still lie in regions dominated by negatives. Nevertheless, compared to ablated variants, the full model achieves tighter grouping of positives in the higher-probability region, resulting in better sensitivity and precision-recall trade-offs.</p>
            <p>The t-distributed Stochastic Neighbor Embedding (t-SNE) plot (<xref rid="figure4" ref-type="fig">Figure 4</xref>B) further visualizes patients’ features on the held-out test set. In this space, readmitted patients do not form sharply separated clusters but instead appear partially embedded within the larger manifold of nonreadmitted cases. The lack of clear separation reflects the difficulty of the task and the subtlety of signals driving readmission, where positives and negatives overlap substantially. Still, there is evidence of localized groupings of readmitted patients, suggesting that the model captures some latent patterns that distinguish higher-risk subgroups. This partial clustering is consistent with the modest AUPRC values: while the model cannot fully disentangle the classes, it is able to concentrate a portion of true positives in regions of elevated probability. Clinically, this underscores the challenge of predicting readmission but also highlights the value of identifying even partially coherent patient subgroups for targeted follow-up.</p>
            <fig id="figure4" position="float">
              <label>Figure 4</label>
              <caption>
                <p>Visualization of class separability and predicted probability distributions for the readmission within 30 days. (A) Distribution of predicted probabilities for in-hospital mortality, separated by true class labels. The vertical dashed line marks the optimal threshold (0.276) balancing sensitivity and specificity. (B) T-distributed Stochastic Neighbor Embedding of the test dataset for the in-hospital mortality task, showing Class 0 (blue) and Class 1 (red).</p>
              </caption>
              <graphic xlink:href="ai_v5i1e88356_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
        </sec>
        <sec>
          <title>In-Hospital Mortality Prediction</title>
          <sec>
            <title>Overview</title>
            <p>Mortality prediction is less imbalanced, with approximately 13% positive cases. <xref ref-type="table" rid="table3">Table 3</xref> reports that KAMELEON-XGBoost and KAMELEON-MLP models achieve high accuracy (0<italic>.</italic>92 and 0<italic>.</italic>90, respectively) and AUROC (0<italic>.</italic>92 and 0<italic>.</italic>89, respectively). KAMELEON-XGBoost demonstrates strong performance for mortality prediction, achieving a high precision of 0<italic>.</italic>79, meaning that most patients flagged as high risk did not survive. It also attains a specificity of 0<italic>.</italic>98 and an NPV of 0<italic>.</italic>92, indicating that nearly all patients predicted as low risk were indeed survivors. Furthermore, for a positive class that, while less imbalanced, is still a minority, the AUPRC is a vital metric. Here, the KAMELEON with 0<italic>.</italic>650 sets the benchmark, significantly outperforming all other baselines. Unstructured models consistently yield the lowest performance for mortality prediction, AUROC values hover just above random chance (around 0<italic>.</italic>51-0<italic>.</italic>53), and AUPRCs remain very low (maximum 0<italic>.</italic>125), indicating a limited ability to discern between mortality and survival solely from clinical notes. SHAP results (<xref rid="figure5" ref-type="fig">Figure 5</xref>B) demonstrate that laboratory results and vital signs strongly drive predictions, with prediction embeddings playing a less critical role compared to readmission. Overall, <xref ref-type="table" rid="table3">Table 3</xref> shows that for both tasks, our multimodal model outperforms all individual structured and unstructured baselines across all metrics.</p>
            <fig id="figure5" position="float">
              <label>Figure 5</label>
              <caption>
                <p>Performance and feature importance for the mortality prediction task. (A) Area under the curve (AUC) for the mortality prediction task. (B) Importance of features for predicting mortality. KAMELEON: Knowledge-Augmented Multimodal EHR Learning for Outcome Prediction; ROC: receiver operating characteristic curve; SHAP: Shapley additive explanations.</p>
              </caption>
              <graphic xlink:href="ai_v5i1e88356_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
          <sec>
            <title>Relative Importance of Different Classes of Inputs</title>
            <p>The SHAP values (<xref rid="figure5" ref-type="fig">Figure 5</xref>B) show that the prediction embedding has a smaller influence compared to laboratories and vitals, which contrasts with the readmission task. The AUROC curve (<xref rid="figure5" ref-type="fig">Figure 5</xref>A) further illustrates that removing reasoning reduces AUROC from ~0.92 in the full model to ~0.88, a relative drop of about 4%. This indicates that mortality prediction is comparatively easier, as a strong AUROC is retained even without reasoning. The lower-class imbalance and the strong signal contained in laboratory values and diagnostic codes allow the model to capture patterns associated with terminal illness more directly. Nevertheless, reasoning still contributes by improving discrimination at the margin, capturing subtler risk factors that are less apparent in structured features alone.</p>
          </sec>
          <sec>
            <title>Sample Inference Output From M1</title>
            <p>We show inference outputs for 2 patients on the mortality prediction task, generated by <italic>M</italic><sub>1</sub>, our fine-tuned LLM with reasoning, in Tables S10 and S11 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          </sec>
          <sec>
            <title>Analysis of Distribution (In-Hospital Mortality)</title>
            <p><xref rid="figure6" ref-type="fig">Figure 6</xref>A shows predicted probabilities for in-hospital mortality, separated by true class: survivors (blue) and deceased (red). The dashed line marks the optimal threshold (0.276) balancing sensitivity and specificity.</p>
            <p>Most survivors cluster near zero probability, reflecting strong model confidence, while deceased cases spread across a wider range, showing prediction uncertainty. Overlap between classes causes some misclassifications, highlighting the challenge of predicting this rare event. Despite this, the clear separation and tight clustering of survivors demonstrate the model’s strong ability to distinguish between classes, supporting the usefulness of the selected threshold.</p>
            <fig id="figure6" position="float">
              <label>Figure 6</label>
              <caption>
                <p>Visualization of class separability and predicted probability distributions for the in-hospital mortality task. (A) Distribution of predicted probabilities for in-hospital mortality, separated by true class labels. The vertical dashed line marks the optimal threshold (0.276) balancing sensitivity and specificity. (B) T-distributed Stochastic Neighbor Embedding of the test dataset for the in-hospital mortality task, showing Class 0 (blue) and Class 1 (red).</p>
              </caption>
              <graphic xlink:href="ai_v5i1e88356_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
          <sec>
            <title>Class Separability Analysis Using t-SNE (In-Hospital Mortality Task)</title>
            <p><xref rid="figure6" ref-type="fig">Figure 6</xref>B shows a 2-dimensional t-SNE embedding of the test data, where the samples are colored by their true class labels (Class 0 in blue and Class 1 in red). The visualization reveals that the majority of the data forms a consistent, structured manifold dominated by Class 0 points, with only a small fraction of Class 1 points distributed across the embedding. Notably, a compact cluster of Class 1 samples appears on the right-hand side, indicating localized patterns that can be exploited by advanced models. This structure suggests that although Class 1 is relatively sparse, it exhibits distinct feature signatures in specific regions, which the proposed KAMELEON-X model is designed to capture effectively, contributing to improved predictive performance.</p>
          </sec>
        </sec>
        <sec>
          <title>Incorporating Additional Patient Context in M1</title>
          <p>Our unstructured model, <italic>M</italic><sub>1</sub>, extends KARE [<xref ref-type="bibr" rid="ref15">15</xref>] by incorporating physician notes more explicitly into similar patients, enriching the KG; in contrast, KARE [<xref ref-type="bibr" rid="ref15">15</xref>] uses only structured EHR data related to drugs, procedures, and conditions as context for similar patients, and the KG is constructed without considering patient conditions. Our modification ensures that unstructured clinical narratives contribute to prediction alongside structured features, providing a stronger and more comparable baseline. Our strategy for adding physician notes leads to modest improvements in mortality and more substantial gains in readmission (<xref ref-type="table" rid="table3">Table 3</xref>). For mortality prediction, AUROC and AUPRC improve by ~2.3% and ~1.8%, respectively. For readmission, the gains are stronger, with AUROC improving by ~3.0% and AUPRC by ~2.1%. These results suggest that physician notes provide a useful complementary signal, especially for readmission, where unstructured narratives capture behavioral, discharge-related risk factors less visible in structured EHR data.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this work, we introduce KAMELEON, a novel framework that effectively integrates multimodal EHR data, including structured clinical features and unstructured physician notes, enhanced by knowledge-augmented LLM reasoning, for robust clinical risk prediction. Our 2-stage architecture demonstrated superior performance on both 30-day readmission and in-hospital mortality prediction, with respect to multiple metrics, including the AUROC score. KAMELEON outperforms all prior baselines, which only used one type of dataset (structured or unstructured), on most metrics for these 2 tasks, compared to prior work using</p>
        <p>the MIMIC-III dataset (<xref ref-type="table" rid="table3">Table 3</xref>). Multiple types of standard ML methods have been used for these tasks, yet KAMELEON demonstrates clear improvements across all evaluation metrics. None of the currently most powerful LLMs, including a medical LLM trained on clinical data, has comparable performance to KAMELEON. The only exception is that the LLaMA3-Med42-8B model achieves a higher AUPRC for the readmission task; however, KAMELEON significantly outperforms it across all other metrics.</p>
        <p>The relatively lower performance of the LLM baselines reflects known limitations of standard LLMs in clinical prediction tasks. In our setup, LLM baselines receive only physician notes and therefore lack access to structured EHR signals such as laboratory values, vitals, and coded diagnoses that are critical for accurate risk prediction. At the same time, structured-only baselines (eg, XGBoost) also demonstrate limited performance, indicating that structured signals alone are insufficient. These results highlight the complementary strengths of structured and unstructured modalities and motivate the multimodal fusion design of KAMELEON, which integrates both sources while grounding reasoning with external biomedical knowledge.</p>
        <p>We find that the reasoning component output by the LLM in <italic>M</italic><sub>1</sub>, which is used in <italic>M</italic><sub>2</sub> by constructing an embedding, has high predictive power in both tasks. For the 30-day readmission task, the embedding constructed using the reasoning output by <italic>M</italic><sub>1</sub> is very significant—removing this component causes the AUROC to drop from 84<italic>.</italic>4% to 68<italic>.</italic>7%. This effect is much smaller in the case of the mortality prediction task, but not negligible, dropping the AUROC from 0.92 to 0.88 when this component is dropped. This highlights the synergy achieved by combining these diverse modalities.</p>
        <p>This work underscores the significant potential of knowledge-augmented multimodal EHR modeling to enhance early intervention, optimize resource allocation, and improve patient care in complex clinical settings. While LLMs, including medical LLMs trained on specialized data, have a number of limitations in terms of accuracy and hallucinations, their reasoning outputs provide valuable predictive power.</p>
        <p>Future work will focus on further validating KAMELEON’s generalizability across diverse clinical settings and exploring its application to a wider range of predictive health care tasks. Our framework can be easily extended to other clinical prediction tasks, especially those for which structured models have already been developed. KAMELEON can be applied for such tasks without any changes, and we expect it will provide similar gains.</p>
      </sec>
      <sec>
        <title>Scope and Promise for Social Impact</title>
        <p>KAMELEON offers a strong opportunity to reduce avoidable hospital readmissions, a major driver of morbidity, cost, and financial penalties [<xref ref-type="bibr" rid="ref48">48</xref>-<xref ref-type="bibr" rid="ref51">51</xref>]. It provides real-time risk predictions for inpatients, enabling more effective discharge planning, case management, and postacute care. Our model uses real-time patient data to assess readmission risk prior to discharge, supporting individualized case management, discharge planning, census forecasting, and postacute care coordination. By identifying high-risk patients, our model enables focused use of limited resources, improving efficiency and outcomes.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Additional dataset details, framework explanation, result tables/plots.</p>
        <media xlink:href="ai_v5i1e88356_app1.docx" xlink:title="DOCX File , 1814 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUPRC</term>
          <def>
            <p>area under the precision-recall curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AUROC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BalancedRF</term>
          <def>
            <p>balanced random forests</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">ICD</term>
          <def>
            <p>International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ICD-9</term>
          <def>
            <p>International Classification of Diseases, Ninth Revision</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">KAMELEON</term>
          <def>
            <p>Knowledge-Augmented Multimodal EHR Learning for Outcome Prediction</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">KARE</term>
          <def>
            <p>Knowledge Aware Reasoning-Enhanced HealthCare Prediction</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">KG</term>
          <def>
            <p>knowledge graph</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">LightGBM</term>
          <def>
            <p>light gradient boosting machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">M1</term>
          <def>
            <p>unstructured model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">M2</term>
          <def>
            <p>structured model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">MIMIC</term>
          <def>
            <p>Medical Information Mart for Intensive Care</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">MLP</term>
          <def>
            <p>multilayer perceptron</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">NPV</term>
          <def>
            <p>negative predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">SHAP</term>
          <def>
            <p>Shapley additive explanations</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb19">t-SNE</term>
          <def>
            <p>t-distributed Stochastic Neighbor Embedding</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb20">UMLS</term>
          <def>
            <p>Unified Medical Language System</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb21">XGBoost</term>
          <def>
            <p>extreme gradient boosting</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We thank our collaborators and colleagues for their valuable discussions throughout this work.</p>
    </ack>
    <notes>
      <sec>
        <title>Funding</title>
        <p>We acknowledge support from the University of Virginia Prominence-to-Preeminence (P2PE) STEM Targeted Initiatives Fund, SIF176A Contagion Science, NSF grants CCF-1918656 and CNS-2317193, CDC MIND U01CK000589, and NIGMS R24GM153920.</p>
      </sec>
    </notes>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The study uses the open-access MIMIC-III database, which contains deidentified electronic health records. The dataset is available through PhysioNet [<xref ref-type="bibr" rid="ref52">52</xref>] upon completing the required data use agreement and certification</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>All authors helped in the conceptualization of the project. AV, JC, RD, and ZG helped in developing the computational methodology, while JE, GRM, VR, and RS helped in interpreting the results and refining the methods. The overall framework was designed jointly by all authors. RD led the design and implementation of the complete framework and carried out experiments, with support from ZG and JC. All authors contributed to interpreting results and writing the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Perez-Concha</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Coiera</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Martin-Sanchez</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Day</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Roffe</surname>
              <given-names>D</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Real-time prediction of mortality, readmission, and length of stay using electronic health record data</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2016</year>
          <volume>23</volume>
          <issue>3</issue>
          <fpage>553</fpage>
          <lpage>561</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26374704"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocv110</pub-id>
          <pub-id pub-id-type="medline">26374704</pub-id>
          <pub-id pub-id-type="pii">ocv110</pub-id>
          <pub-id pub-id-type="pmcid">PMC7839923</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kong</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Using machine learning methods to predict in-hospital mortality of sepsis patients in the ICU</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2020</year>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>251</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-020-01271-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-020-01271-2</pub-id>
          <pub-id pub-id-type="medline">33008381</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-020-01271-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC7531110</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brajer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Cozzi</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nichols</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Revoir</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Balu</surname>
              <given-names>S</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Prospective and external evaluation of a machine learning model to predict in-hospital mortality of adults at time of admission</article-title>
          <source>JAMA Netw Open</source>
          <year>2020</year>
          <volume>3</volume>
          <issue>2</issue>
          <fpage>e1920733</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jamanetwork.com/journals/jamanetworkopen/fullarticle/10.1001/jamanetworkopen.2019.20733"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2019.20733</pub-id>
          <pub-id pub-id-type="medline">32031645</pub-id>
          <pub-id pub-id-type="pii">2760438</pub-id>
          <pub-id pub-id-type="pmcid">PMC12068827</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kansagara</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Englander</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Salanitro</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kagen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Theobald</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Freeman</surname>
              <given-names>M</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Risk prediction models for hospital readmission: a systematic review</article-title>
          <source>JAMA</source>
          <year>2011</year>
          <volume>306</volume>
          <issue>15</issue>
          <fpage>1688</fpage>
          <lpage>1698</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/22009101"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jama.2011.1515</pub-id>
          <pub-id pub-id-type="medline">22009101</pub-id>
          <pub-id pub-id-type="pii">306/15/1688</pub-id>
          <pub-id pub-id-type="pmcid">PMC3603349</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mahmoudi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kamdar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzales</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Waljee</surname>
              <given-names>AK</given-names>
            </name>
          </person-group>
          <article-title>Use of electronic medical records in development and validation of risk prediction models of hospital readmission: systematic review</article-title>
          <source>BMJ</source>
          <year>2020</year>
          <volume>369</volume>
          <fpage>m958</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.bmj.com/lookup/pmidlookup?view=long&#38;pmid=32269037"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.m958</pub-id>
          <pub-id pub-id-type="medline">32269037</pub-id>
          <pub-id pub-id-type="pmcid">PMC7249246</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Upadhyay</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Stephenson</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>DG</given-names>
            </name>
          </person-group>
          <article-title>Readmission rates and their impact on hospital financial performance: a study of Washington hospitals</article-title>
          <source>Inquiry</source>
          <year>2019</year>
          <volume>56</volume>
          <fpage>46958019860386</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/0046958019860386?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/0046958019860386</pub-id>
          <pub-id pub-id-type="medline">31282282</pub-id>
          <pub-id pub-id-type="pmcid">PMC6614936</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Clement</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Kheir</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Derman</surname>
              <given-names>PB</given-names>
            </name>
            <name name-style="western">
              <surname>Speck</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Levin</surname>
              <given-names>LS</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Will Medicare readmission penalties motivate hospitals to reduce arthroplasty readmissions?</article-title>
          <source>J Arthroplasty</source>
          <year>2017</year>
          <volume>32</volume>
          <issue>3</issue>
          <fpage>709</fpage>
          <lpage>713</lpage>
          <pub-id pub-id-type="doi">10.1016/j.arth.2016.08.031</pub-id>
          <pub-id pub-id-type="medline">27712937</pub-id>
          <pub-id pub-id-type="pii">S0883-5403(16)30578-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brisimi</surname>
              <given-names>TS</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>WG</given-names>
            </name>
            <name name-style="western">
              <surname>Paschalidis</surname>
              <given-names>IC</given-names>
            </name>
          </person-group>
          <article-title>Predicting chronic disease hospitalizations from electronic health records: an interpretable classification approach</article-title>
          <source>Proc IEEE</source>
          <year>2018</year>
          <volume>106</volume>
          <issue>4</issue>
          <fpage>690</fpage>
          <lpage>707</lpage>
          <pub-id pub-id-type="doi">10.1109/jproc.2017.2789319</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Herazo-Padilla</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Augusto</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Dalmas</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Bongue</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>A decision-tree-based Bayesian approach for chance-constrained health prevention budget rationing</article-title>
          <source>IEEE Trans Automat Sci Eng</source>
          <year>2022</year>
          <volume>19</volume>
          <issue>3</issue>
          <fpage>2322</fpage>
          <lpage>2338</lpage>
          <pub-id pub-id-type="doi">10.1109/tase.2021.3069800</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Heavey</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Madden</surname>
              <given-names>GR</given-names>
            </name>
            <name name-style="western">
              <surname>Sifri</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Vullikanti</surname>
              <given-names>A</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Identifying and forecasting importation and asymptomatic spreaders of multi-drug resistant organisms in hospital settings</article-title>
          <source>NPJ Digit Med</source>
          <year>2025</year>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>147</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-025-01529-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-025-01529-x</pub-id>
          <pub-id pub-id-type="medline">40055525</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-025-01529-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC11889233</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arsalan</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Enhancing predictive healthcare using AI-driven early warning systems</article-title>
          <source>Proc AAAI Conf Artif Intell</source>
          <year>2025</year>
          <volume>39</volume>
          <issue>28</issue>
          <fpage>29564</fpage>
          <lpage>29566</lpage>
          <pub-id pub-id-type="doi">10.1609/aaai.v39i28.35326</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Smart: towards pre-trained missing-aware model for patient health status prediction</article-title>
          <source>Adv Neural Inf Process Syst</source>
          <year>2024</year>
          <volume>37</volume>
          <fpage>63986</fpage>
          <lpage>64009</lpage>
          <pub-id pub-id-type="doi">10.52202/079017-2043</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xiao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>MedLink: de-identified patient health record linkage</article-title>
          <year>2023</year>
          <conf-name>Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>2023 August 6-10</conf-date>
          <conf-loc>Long Beach, CA, USA</conf-loc>
          <fpage>2672</fpage>
          <lpage>2682</lpage>
          <pub-id pub-id-type="doi">10.1145/3580305.3599427</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Yinghao</given-names>
            </name>
          </person-group>
          <article-title>EMERGE: integrating RAG for improved multimodal EHR predictive modeling</article-title>
          <source>ResearchGate</source>
          <year>2024</year>
          <access-date>2024-05-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/profile/Zixiang-Wang-13/publication/381126633_EMERGE_Integrating_RAG_for_Improved_Multimodal_EHR_Predictive_Modeling/links/6668192da54c5f0b945da986/EMERGE-Integrating-RAG-for-Improved-Multimodal-EHR-Predictive-Modeling.pdf">https://www.researchgate.net/profile/Zixiang-Wang-13/publication/381126633_EMERGE_Integrating_RAG_for_Improved _Multimodal_EHR_Predictive_Modeling/links/6668192da54c5f0b945da986/EMERGE-Integrating-RAG-for-Improved- Multimodal-EHR-Predictive-Modeling.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Reasoning-enhanced healthcare predictions with knowledge graph community retrieval</article-title>
          <year>2025</year>
          <conf-name>Thirteenth International Conference on Learning Representations</conf-name>
          <conf-date>2025 April 24-28</conf-date>
          <conf-loc>Singapore</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Erzurumluoglu</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Braenne</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Whitehurst</surname>
              <given-names>C</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Deep representation learning for clustering longitudinal survival data from electronic health records</article-title>
          <source>Nat Commun</source>
          <year>2025</year>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>2534</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41467-025-56625-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-025-56625-z</pub-id>
          <pub-id pub-id-type="medline">40087274</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-025-56625-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC11909183</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Mitra</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Berlowitz</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>TransformEHR: transformer-based encoder-decoder generative model to enhance prediction of disease outcomes using electronic health records</article-title>
          <source>Nat Commun</source>
          <year>2023</year>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>7857</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41467-023-43715-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-023-43715-z</pub-id>
          <pub-id pub-id-type="medline">38030638</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-023-43715-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC10687211</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bellot</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>van der Schaar</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>A Bayesian approach to modelling longitudinal data in electronic health records</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on December 19, 2019</comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Hospital unexpected readmission using multi-model prediction</article-title>
          <year>2021</year>
          <conf-name>ICMLCA 2021 2nd International Conference on Machine Learning and Computer Application</conf-name>
          <conf-date>2021 December 17-19</conf-date>
          <conf-loc>Shenyang, China</conf-loc>
          <fpage>1</fpage>
          <lpage>5</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Osorio</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sy</surname>
              <given-names>LW</given-names>
            </name>
          </person-group>
          <article-title>An empirical evaluation of deep learning for ICD-9 code assignment using MIMIC-III clinical notes</article-title>
          <source>Comput Methods Programs Biomed</source>
          <year>2019</year>
          <volume>177</volume>
          <fpage>141</fpage>
          <lpage>153</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.sciencedirect.com/science/article/pii/"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.cmpb.2019.05.024</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>EHRAgent: code empowers large language models for few-shot complex tabular reasoning on electronic health records</article-title>
          <year>2024</year>
          <conf-name>Proceedings of the Conference on Empirical Methods in Natural Language</conf-name>
          <conf-date>2024 November 4-9</conf-date>
          <conf-loc>Miami, Florida, United States</conf-loc>
          <fpage>22315</fpage>
          <pub-id pub-id-type="doi">10.18653/v1/2024.emnlp-main.1245</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A scoping review of large language models in clinical research and healthcare delivery</article-title>
          <source>NPJ Digit Med</source>
          <year>2024</year>
          <pub-id pub-id-type="medline">40776319</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singhal</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Azizi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mahdavi</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>HW</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Large language models encode clinical knowledge</article-title>
          <source>Nature</source>
          <year>2023</year>
          <volume>620</volume>
          <issue>7972</issue>
          <fpage>172</fpage>
          <lpage>180</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37438534"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41586-023-06291-2</pub-id>
          <pub-id pub-id-type="medline">37438534</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41586-023-06291-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC10396962</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>MedRetriever: target-driven interpretable health risk prediction via retrieving unstructured medical text</article-title>
          <year>2021</year>
          <conf-name>Proceedings of the 30th ACM International Conference on Information &#38; Knowledge Management</conf-name>
          <conf-date>2021 November 1-5</conf-date>
          <conf-loc>Virtual Event, Queensland, Australia</conf-loc>
          <fpage>2414</fpage>
          <lpage>2423</lpage>
          <pub-id pub-id-type="doi">10.1145/3459637.3482273</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Xiao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cross</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>GraphCare: enhancing healthcare predictions with personalized knowledge graphs</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on May 22, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2305.12788"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Martins</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Srikumar</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>RAM-EHR: retrieval augmentation meets clinical predictions on electronic health records</article-title>
          <year>2026</year>
          <conf-name>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</conf-name>
          <conf-date>2026 April 24</conf-date>
          <conf-loc>Bangkok, Thailand</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <pub-id pub-id-type="doi">10.18653/v1/2024.acl-short.68</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Niu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>EHR-knowGen: knowledge-enhanced multimodal learning for disease diagnosis generation</article-title>
          <source>Inf Fusion</source>
          <year>2024</year>
          <volume>102</volume>
          <fpage>102069</fpage>
          <pub-id pub-id-type="doi">10.1016/j.inffus.2023.102069</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bodenreider</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>The unified medical language system (UMLS): integrating biomedical terminology</article-title>
          <source>Nucleic Acids Res</source>
          <year>2004</year>
          <volume>32</volume>
          <issue>90001</issue>
          <fpage>D267</fpage>
          <lpage>D270</lpage>
          <pub-id pub-id-type="doi">10.1093/nar/gkh061</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bopche</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gustad</surname>
              <given-names>LT</given-names>
            </name>
            <name name-style="western">
              <surname>Afset</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Ehrnström</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Damås</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Nytrø</surname>
              <given-names>Ø</given-names>
            </name>
          </person-group>
          <article-title>In-hospital mortality, readmission, and prolonged length of stay risk prediction leveraging historical electronic patient records</article-title>
          <source>JAMIA Open</source>
          <year>2024</year>
          <volume>7</volume>
          <issue>3</issue>
          <fpage>ooae074</fpage>
          <pub-id pub-id-type="doi">10.1093/jamiaopen/ooae074</pub-id>
          <pub-id pub-id-type="medline">39282081</pub-id>
          <pub-id pub-id-type="pii">ooae074</pub-id>
          <pub-id pub-id-type="pmcid">PMC11401612</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Traag</surname>
              <given-names>VA</given-names>
            </name>
            <name name-style="western">
              <surname>Waltman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>van Eck</surname>
              <given-names>NJ</given-names>
            </name>
          </person-group>
          <article-title>From Louvain to Leiden: guaranteeing well-connected communities</article-title>
          <source>Sci Rep</source>
          <year>2019</year>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>5233</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-019-41695-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-019-41695-z</pub-id>
          <pub-id pub-id-type="medline">30914743</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-019-41695-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC6435756</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reimers</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gurevych</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Sentence-BERT: sentence embeddings using siamese BERT-networks</article-title>
          <year>2019</year>
          <conf-name>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</conf-name>
          <conf-date>2026 April 24</conf-date>
          <conf-loc>Hong Kong, China</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>3982</fpage>
          <lpage>3992</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/d19-1410</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Douze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Guzhva</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Szilvasy</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mazaré</surname>
              <given-names>P</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>The Faiss library</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on January 16, 2024</comment>
          <pub-id pub-id-type="doi">10.1109/tbdata.2025.3618474</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Touvron</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <source>LLaMA 3: open foundation and instruction-tuned large language models</source>
          <year>2024</year>
          <access-date>2026-04-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2407.21783">https://arxiv.org/abs/2407.21783</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Daniel Han</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>team</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>Unsloth</article-title>
          <source>GitHub</source>
          <year>2023</year>
          <access-date>2026-04-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://github.com/unslothai/">http://github.com/unslothai/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wallis</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Allen-Zhu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>LoRA: low-rank adaptation of large language models</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on June 17, 2021</comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Efficient estimation of word representations in vector space</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on January 16, 2013</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1301.3781"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chawla</surname>
              <given-names>NV</given-names>
            </name>
            <name name-style="western">
              <surname>Bowyer</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>LO</given-names>
            </name>
            <name name-style="western">
              <surname>Kegelmeyer</surname>
              <given-names>WP</given-names>
            </name>
          </person-group>
          <article-title>SMOTE: synthetic minority over-sampling technique</article-title>
          <source>J Artif Intell Res</source>
          <year>2002</year>
          <volume>16</volume>
          <fpage>321</fpage>
          <lpage>357</lpage>
          <pub-id pub-id-type="doi">10.1613/jair.953</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Pollard</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>MIMIC-III, a freely accessible critical care database</article-title>
          <source>Sci Data</source>
          <year>2016</year>
          <volume>3</volume>
          <fpage>160035</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/sdata.2016.35"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/sdata.2016.35</pub-id>
          <pub-id pub-id-type="medline">27219127</pub-id>
          <pub-id pub-id-type="pii">sdata201635</pub-id>
          <pub-id pub-id-type="pmcid">PMC4878278</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Anthropic</collab>
          </person-group>
          <article-title>Introducing Claude 3.5 Sonnet</article-title>
          <source>Anthropic</source>
          <year>2025</year>
          <access-date>2026-04-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.anthropic.com/">https://www.anthropic.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sellergren</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kazemzadeh</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jaroensri</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kiraly</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Traverse</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kohlberger</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>MedGemma technical report</article-title>
          <source>arXiv</source>
          <year>2025</year>
          <access-date>2026-06-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/html/2507.05201v2">https://arxiv.org/html/2507.05201v2</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Christophe</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kanithi</surname>
              <given-names>PK</given-names>
            </name>
            <name name-style="western">
              <surname>Raha</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pimentel</surname>
              <given-names>MAF</given-names>
            </name>
          </person-group>
          <article-title>Med42-v2: a suite of clinical LLMs</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on August 12, 2024</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2408.06142"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harutyunyan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Khachatrian</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kale</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Ver Steeg</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Galstyan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Multitask learning and benchmarking with clinical time series data</article-title>
          <source>Sci Data</source>
          <year>2019</year>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>96</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41597-019-0103-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41597-019-0103-9</pub-id>
          <pub-id pub-id-type="medline">31209213</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41597-019-0103-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC6572845</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lundberg</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Erion</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>DeGrave</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Prutkin</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Nair</surname>
              <given-names>B</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>From local explanations to global understanding with explainable AI for trees</article-title>
          <source>Nat Mach Intell</source>
          <year>2020</year>
          <volume>2</volume>
          <issue>1</issue>
          <fpage>56</fpage>
          <lpage>67</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32607472"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s42256-019-0138-9</pub-id>
          <pub-id pub-id-type="medline">32607472</pub-id>
          <pub-id pub-id-type="pmcid">PMC7326367</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Purushotham</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Che</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Benchmarking deep learning models on large healthcare datasets</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <volume>83</volume>
          <fpage>112</fpage>
          <lpage>134</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(18)30071-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2018.04.007</pub-id>
          <pub-id pub-id-type="medline">29879470</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(18)30071-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Morgan</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bame</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zimand</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Dooley</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Thom</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>AD</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Assessment of machine learning vs standard prediction rules for predicting hospital readmissions</article-title>
          <source>JAMA Netw Open</source>
          <year>2019</year>
          <volume>2</volume>
          <issue>3</issue>
          <fpage>e190348</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30848808"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2019.0348</pub-id>
          <pub-id pub-id-type="medline">30848808</pub-id>
          <pub-id pub-id-type="pii">2727268</pub-id>
          <pub-id pub-id-type="pmcid">PMC6484642</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dhalluin</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bannay</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lemordant</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sylvestre</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chazard</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cuggia</surname>
              <given-names>M</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Comparison of unplanned 30-day readmission prediction models, based on hospital warehouse and demographic data</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2020</year>
          <volume>270</volume>
          <fpage>547</fpage>
          <lpage>551</lpage>
          <pub-id pub-id-type="doi">10.3233/SHTI200220</pub-id>
          <pub-id pub-id-type="medline">32570443</pub-id>
          <pub-id pub-id-type="pii">SHTI200220</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Matheny</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Ricket</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Goodrich</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>RU</given-names>
            </name>
            <name name-style="western">
              <surname>Stabler</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Perkins</surname>
              <given-names>AM</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Development of electronic health record-based prediction models for 30-day readmission risk among patients hospitalized for acute myocardial infarction</article-title>
          <source>JAMA Netw Open</source>
          <year>2021</year>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>e2035782</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33512518"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2020.35782</pub-id>
          <pub-id pub-id-type="medline">33512518</pub-id>
          <pub-id pub-id-type="pii">2775730</pub-id>
          <pub-id pub-id-type="pmcid">PMC7846941</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fry</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Fluck</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>TS</given-names>
            </name>
          </person-group>
          <article-title>Frequent identical admission-readmission episodes are associated with increased mortality</article-title>
          <source>Clin Med (Lond)</source>
          <year>2021</year>
          <volume>21</volume>
          <issue>4</issue>
          <fpage>e351</fpage>
          <lpage>e356</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1470-2118(24)03095-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.7861/clinmed.2020-0930</pub-id>
          <pub-id pub-id-type="medline">35192477</pub-id>
          <pub-id pub-id-type="pii">S1470-2118(24)03095-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC8313203</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Joynt</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Jha</surname>
              <given-names>AK</given-names>
            </name>
          </person-group>
          <article-title>A path forward on Medicare readmissions</article-title>
          <source>N Engl J Med</source>
          <year>2013</year>
          <volume>368</volume>
          <issue>13</issue>
          <fpage>1175</fpage>
          <lpage>1177</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMp1300122</pub-id>
          <pub-id pub-id-type="medline">23465069</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Panagiotou</surname>
              <given-names>OA</given-names>
            </name>
            <name name-style="western">
              <surname>Voorhies</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Keohane</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Adhikari</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>A</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Association of inclusion of Medicare Advantage patients in hospitals' risk-standardized readmission rates, performance, and penalty status</article-title>
          <source>JAMA Netw Open</source>
          <year>2021</year>
          <volume>4</volume>
          <issue>2</issue>
          <fpage>e2037320</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33595661"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2020.37320</pub-id>
          <pub-id pub-id-type="medline">33595661</pub-id>
          <pub-id pub-id-type="pii">2776528</pub-id>
          <pub-id pub-id-type="pmcid">PMC7890527</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zuckerman</surname>
              <given-names>RB</given-names>
            </name>
            <name name-style="western">
              <surname>Joynt Maddox</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Sheingold</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Epstein</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Effect of a hospital-wide measure on the readmissions reduction program</article-title>
          <source>N Engl J Med</source>
          <year>2017</year>
          <volume>377</volume>
          <issue>16</issue>
          <fpage>1551</fpage>
          <lpage>1558</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMsa1701791</pub-id>
          <pub-id pub-id-type="medline">29045205</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="web">
          <source>PhysioNet</source>
          <access-date>2026-05-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://physionet.org/">https://physionet.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
